You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
106 lines
3.8 KiB
106 lines
3.8 KiB
#!/usr/bin/env python3 |
|
import os |
|
import json |
|
import math |
|
import sys |
|
|
|
#settings |
|
scanForTrackingDomains=True; |
|
scanForExampleSites=False; |
|
domainCharLimit=35; |
|
|
|
thisDirPath=os.path.dirname(os.path.abspath(__file__)) |
|
|
|
domainFiles=os.listdir(thisDirPath+'/LOCAL/tracker-radar-main/domains/US'); |
|
uniqueTrackingDomains=[] |
|
uniqueExampleSites=[] |
|
|
|
def isValidDomain(domain): |
|
|
|
if len(dom) > domainCharLimit: |
|
return False; |
|
|
|
domarr=dom.split('.') |
|
for d in range(0,len(domarr)): |
|
if len(domarr[d]) < 1:#has empty element |
|
return False; |
|
if '%' in domarr[d]:#has percentsign |
|
return False; |
|
|
|
return True; |
|
|
|
for i in range(0,len(domainFiles)): |
|
print('Scanning files: ',i,' of ',len(domainFiles),' ',math.floor(100 * i / len(domainFiles)),'%'); |
|
#os.system('sleep 0.1'); |
|
jsonFile=open(thisDirPath+'/LOCAL/tracker-radar-main/domains/US/'+domainFiles[i],'r') |
|
dictionary=json.load(jsonFile) |
|
#print('dict: ',dictionary['domain']) |
|
if scanForTrackingDomains == True: |
|
dom=dictionary['domain']; |
|
domExists=False |
|
for d in range(0,len(uniqueTrackingDomains)): |
|
if uniqueTrackingDomains[d][0] == dom: |
|
uniqueTrackingDomains[d][1]=uniqueTrackingDomains[d][1] + 1; |
|
domExists=True |
|
|
|
if domExists == False and isValidDomain(dom) == True: |
|
uniqueTrackingDomains.append([dom,1]) |
|
|
|
if scanForExampleSites: |
|
|
|
resources=dictionary['resources'] |
|
#look in resources for example sites |
|
for j in range(0,len(resources)): |
|
exsites=resources[j]['exampleSites'] |
|
for k in range(0,len(exsites)): |
|
#print('exsites: ',exsites[k]) |
|
#add sites if unique, if dups increment their score by one. |
|
exists=False |
|
for l in range(0,len(uniqueExampleSites)): |
|
if uniqueExampleSites[l][0] == exsites[k]: |
|
uniqueExampleSites[l][1]=uniqueExampleSites[l][1] + 1 |
|
exists=True |
|
|
|
if exists == False and isValidDomain(dom) == True: |
|
uniqueExampleSites.append([exsites[k],1]) |
|
|
|
|
|
os.system('sleep 1'); |
|
if scanForTrackingDomains: |
|
os.system('sleep 1'); |
|
print('Sorting tracking domains. Length:',len(uniqueTrackingDomains)) |
|
uniqueTrackingDomains=sorted(uniqueTrackingDomains); |
|
print('After sorting. Length:',len(uniqueTrackingDomains)) |
|
|
|
print('writing tracking domains to output file...') |
|
#reset log/output |
|
os.system('printf "[" > '+thisDirPath+'/trackingDomains.txt'); |
|
for u in range(0,len(uniqueTrackingDomains)): |
|
#print('un tr do: ',uniqueTrackingDomains[u][0]) |
|
os.system('printf "[\''+uniqueTrackingDomains[u][0]+'\','+str(uniqueTrackingDomains[u][1])+']" >> '+thisDirPath+'/trackingDomains.txt') |
|
|
|
if u < len(uniqueTrackingDomains) - 1: |
|
os.system('printf "," >> '+thisDirPath+'/trackingDomains.txt') |
|
|
|
#add ending bracket |
|
os.system('printf "]" >> '+thisDirPath+'/trackingDomains.txt'); |
|
|
|
if scanForExampleSites: |
|
os.system('sleep 1'); |
|
print('Sorting example sites. Length:',len(uniqueExampleSites)) |
|
uniqueExampleSites=sorted(uniqueExampleSites); |
|
print('After sorting. Length:',len(uniqueExampleSites)) |
|
|
|
os.system('sleep 1'); |
|
print('writing example sites to output file...') |
|
#reset log/output |
|
os.system('printf "[" > '+thisDirPath+'/exampleSites.txt'); |
|
for u in range(0,len(uniqueExampleSites)): |
|
os.system('printf "[\''+uniqueExampleSites[u][0]+'\','+str(uniqueExampleSites[u][1])+']" >> '+thisDirPath+'/exampleSites.txt') |
|
if u < len(uniqueExampleSites) - 1: |
|
os.system('printf "," >> '+thisDirPath+'/exampleSites.txt') |
|
|
|
#add ending bracket |
|
os.system('printf "]" >> '+thisDirPath+'/exampleSites.txt'); |
|
|
|
print('Done!');
|
|
|