#!/usr/bin/python # update dnsmasqs addn-hosts file # removal: delete files specified in cachedir/cacheprefix, dnsmasqblackholeconfig, output import yaml import time import re import os import urllib3 import sys import subprocess # read config configfile = '/etc/py-hole-config.yaml' config = { # base config overridden by configfile 'cachedir': '/var/local/py-hole', 'cacheprefix': 'cache-', 'cacheexpire': 14400, # 4 hours 'hostsfile': '/etc/local-hosts-blackhole', 'dnsmasqblackholeconfig': '/etc/dnsmasq.d/local-hosts-blackhole', 'defaultresponse': '0.0.0.0', 'exclusions': { 'localhost': True, # we need this always else we get it blocked for 127.0.0.1 keys 'www.googleadservices.com': True, # needed for google shopping 'pagead.l.doubleclick.net': True, # CNAME for www.googleadservices.com needed for google shopping }, 'blacklists': { # see https://github.com/pi-hole/pi-hole/blob/master/adlists.default 'StevenBlack': { 'url':'https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts', 'format':'hosts', 'hostskey':'0.0.0.0' }, 'malwaredomains': { 'url':'https://mirror1.malwaredomains.com/files/justdomains', 'format':'raw' }, 'cameleon': { 'url':'http://sysctl.org/cameleon/hosts', 'format':'hosts', 'hostskey':'127.0.0.1' }, 'abuse.ch': { 'url':'https://zeustracker.abuse.ch/blocklist.php?download=domainblocklist', 'format':'raw' }, 'disconnect.me_tracking': { 'url':'https://s3.amazonaws.com/lists.disconnect.me/simple_tracking.txt', 'format':'raw' }, 'disconnect.me_ad': { 'url':'https://s3.amazonaws.com/lists.disconnect.me/simple_ad.txt', 'format':'raw' }, # 'hosts-file.net': { 'url':'https://hosts-file.net/ad_servers.txt', 'format':'hosts0000' }, # 'Windows 10 telemetry': { 'securemecca.com': { 'url':'http://securemecca.com/Downloads/hosts.txt', 'format':'hosts', 'hostskey':'127.0.0.1' }, } } # load yaml file or error if os.path.isfile ( configfile ): with open ( configfile, 'r' ) as f: config.update ( yaml.load(f) ) # always exclude localhost else we get it blocked for 127.0.0.1 keys config['exclusions']['localhost'] = True else: sys.exit ( "Configuration file %s not found\n" % configfile ) # our hostsfile outputdata = "# created %d\n" % int(time.time()) seenbefore = {} commentstart = '#' # for hosts / dnsmasq def addcomment ( comment ): global outputdata outputdata += "%s%s\n" % (commentstart,comment) def addhost ( host ): global outputdata host = host.lower().strip() if host in seenbefore: outputdata += "%s seenbefore in %s %s" % (commentstart,seenbefore[host],commentstart) if host in config['exclusions']: outputdata += "%s excluded %s" % (commentstart,commentstart) outputdata += "%s %s\n" % (config['defaultresponse'],host) seenbefore[host] = source # grab from web or cache cacheupto = time.time() - config['cacheexpire'] if not os.path.isdir ( config['cachedir'] ): os.makedirs ( config['cachedir'] ) http = urllib3.PoolManager () httpheaders = { 'User-Agent': 'py-hole hosts blackhole manager' } for source in config['blacklists']: cachefile = os.path.join ( config['cachedir'], config['cacheprefix'] + source ) # check cache, download if needed if os.path.isfile ( cachefile ) and os.path.getmtime ( cachefile ) >= cacheupto: print "fresh cache %s" % config['blacklists'][source]['url'] with open ( cachefile, 'rt' ) as f: data = f.read () else: print "retrieve %s" % config['blacklists'][source]['url'] response = http.request ( 'GET', config['blacklists'][source]['url'], headers=httpheaders ) if response.status != 200: sys.exit ( "ERROR - got http response %d for %s" % (response.status,config['blacklists'][source]['url']) ) # write cache file with open ( cachefile+'.TMP', 'wt' ) as f: f.write ( response.data ) os.rename ( cachefile+'.TMP', cachefile ) # all done data = response.data # we are good to go outputdata += "\n%s=============================================================================\n" % commentstart outputdata += "%s Source: %s :: %s\n" % (commentstart,source,config['blacklists'][source]['url']) outputdata += "%s=============================================================================\n\n" % commentstart # process data recordcount = 0 if config['blacklists'][source]['format'] == 'hosts': # comments start "#", we only take lines matching "hostskey" for line in data.splitlines(): if line == '': continue if line[0] == '#': addcomment ( line[1:] ) continue hostlist = re.split ( r'\s+', line ) if hostlist[0] != config['blacklists'][source]['hostskey']: # not a matching key continue for host in hostlist[1:]: recordcount += 1 addhost ( host ) elif config['blacklists'][source]['format'] == 'raw': # comments start "#" for line in data.splitlines(): if line == '': continue if line[0] == '#': addcomment ( line[1:] ) continue host = line.strip() recordcount += 1 addhost ( host ) else: sys.exit ( "Unknown format %s for %s" % (config['blacklists'][source]['format'],source) ) if recordcount == 0: sys.exit ( "Got recordcount of %d for %s" % (recordcount,source) ) # if we have a local blacklist, add that also if 'localblacklist' in config: outputdata += "\n%s=============================================================================\n" % commentstart outputdata += "%s Source: Local blacklist from %s\n" % (commentstart,configfile) outputdata += "%s=============================================================================\n\n" % commentstart for host in config['localblacklist']: addhost ( host ) # write the config['hostsfile'] file with open ( config['hostsfile']+'.TMP', 'wt' ) as f: f.write ( outputdata ) os.rename ( config['hostsfile'], config['hostsfile']+'.old' ) os.rename ( config['hostsfile']+'.TMP', config['hostsfile'] ) # ensure we have a dnsmasq config file - we assume if it's there it's sufficient TODO maybe we should check if not os.path.isfile ( config['dnsmasqblackholeconfig'] ): with open ( config['dnsmasqblackholeconfig']+'.TMP', 'wt' ) as f: f.write ( "addn-hosts=%s\n" % output ) os.rename ( config['dnsmasqblackholeconfig']+'.TMP', config['dnsmasqblackholeconfig'] ) # TODO reload dnsmasq (SIGHUP re-reads files, but not config)