You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

106 lines
3.8 KiB

#!/usr/bin/env python3
import os
import json
import math
import sys
#settings
scanForTrackingDomains=True;
scanForExampleSites=False;
domainCharLimit=35;
thisDirPath=os.path.dirname(os.path.abspath(__file__))
domainFiles=os.listdir(thisDirPath+'/LOCAL/tracker-radar-main/domains/US');
uniqueTrackingDomains=[]
uniqueExampleSites=[]
def isValidDomain(domain):
if len(dom) > domainCharLimit:
return False;
domarr=dom.split('.')
for d in range(0,len(domarr)):
if len(domarr[d]) < 1:#has empty element
return False;
if '%' in domarr[d]:#has percentsign
return False;
return True;
for i in range(0,len(domainFiles)):
print('Scanning files: ',i,' of ',len(domainFiles),' ',math.floor(100 * i / len(domainFiles)),'%');
#os.system('sleep 0.1');
jsonFile=open(thisDirPath+'/LOCAL/tracker-radar-main/domains/US/'+domainFiles[i],'r')
dictionary=json.load(jsonFile)
#print('dict: ',dictionary['domain'])
if scanForTrackingDomains == True:
dom=dictionary['domain'];
domExists=False
for d in range(0,len(uniqueTrackingDomains)):
if uniqueTrackingDomains[d][0] == dom:
uniqueTrackingDomains[d][1]=uniqueTrackingDomains[d][1] + 1;
domExists=True
if domExists == False and isValidDomain(dom) == True:
uniqueTrackingDomains.append([dom,1])
if scanForExampleSites:
resources=dictionary['resources']
#look in resources for example sites
for j in range(0,len(resources)):
exsites=resources[j]['exampleSites']
for k in range(0,len(exsites)):
#print('exsites: ',exsites[k])
#add sites if unique, if dups increment their score by one.
exists=False
for l in range(0,len(uniqueExampleSites)):
if uniqueExampleSites[l][0] == exsites[k]:
uniqueExampleSites[l][1]=uniqueExampleSites[l][1] + 1
exists=True
if exists == False and isValidDomain(dom) == True:
uniqueExampleSites.append([exsites[k],1])
os.system('sleep 1');
if scanForTrackingDomains:
os.system('sleep 1');
print('Sorting tracking domains. Length:',len(uniqueTrackingDomains))
uniqueTrackingDomains=sorted(uniqueTrackingDomains);
print('After sorting. Length:',len(uniqueTrackingDomains))
print('writing tracking domains to output file...')
#reset log/output
os.system('printf "[" > '+thisDirPath+'/trackingDomains.txt');
for u in range(0,len(uniqueTrackingDomains)):
#print('un tr do: ',uniqueTrackingDomains[u][0])
os.system('printf "[\''+uniqueTrackingDomains[u][0]+'\','+str(uniqueTrackingDomains[u][1])+']" >> '+thisDirPath+'/trackingDomains.txt')
if u < len(uniqueTrackingDomains) - 1:
os.system('printf "," >> '+thisDirPath+'/trackingDomains.txt')
#add ending bracket
os.system('printf "]" >> '+thisDirPath+'/trackingDomains.txt');
if scanForExampleSites:
os.system('sleep 1');
print('Sorting example sites. Length:',len(uniqueExampleSites))
uniqueExampleSites=sorted(uniqueExampleSites);
print('After sorting. Length:',len(uniqueExampleSites))
os.system('sleep 1');
print('writing example sites to output file...')
#reset log/output
os.system('printf "[" > '+thisDirPath+'/exampleSites.txt');
for u in range(0,len(uniqueExampleSites)):
os.system('printf "[\''+uniqueExampleSites[u][0]+'\','+str(uniqueExampleSites[u][1])+']" >> '+thisDirPath+'/exampleSites.txt')
if u < len(uniqueExampleSites) - 1:
os.system('printf "," >> '+thisDirPath+'/exampleSites.txt')
#add ending bracket
os.system('printf "]" >> '+thisDirPath+'/exampleSites.txt');
print('Done!');