Initial working versions

This commit is contained in:
Glen Pit-Pladdy
2017-04-11 11:30:08 +01:00
commit bbfec1a493
4 changed files with 378 additions and 0 deletions

138
py-hole-bind9RPZ Executable file
View File

@@ -0,0 +1,138 @@
#!/usr/bin/python
# update bind9 RPZ file
import yaml
import time
import re
import os
import urllib3
import sys
import subprocess
# read config
configfile = '/etc/bind/py-hole-rpzconfig.yaml'
config = {
# base config overridden by configfile
'cachedir': '/var/local/bindRPZ',
'cacheprefix': 'bindRPZcache-',
'cacheexpire': 14400, # 4 hours
'defaultresponse': 'CNAME .',
'exclusions': {},
'blacklists': {
'StevenBlack': { 'url':'https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts', 'format':'hosts', 'hostskey':'0.0.0.0' },
},
}
# load yaml file or error
if os.path.isfile ( configfile ):
with open ( configfile, 'r' ) as f:
config.update ( yaml.load(f) )
# always exclude localhost else we get it blocked for 127.0.0.1 keys
config['exclusions']['localhost'] = True
else:
sys.exit ( "Configuration file %s not found\n" % configfile )
# at minimum we need to end up with an rpzfile
if 'rpzfile' not in config:
sys.exit ( "Setting for 'rpzfile' not found in configuration %s\n" % configfile )
# and a template with a serial number
if 'rpztemplate' not in config or not re.search ( r'<SERIAL>', config['rpztemplate'] ):
sys.exit ( "Setting for 'rpztemplate' including a serial number marker '<SERIAL>' not found in configuration %s\n" % configfile )
# and a reloadzonecommand:
if 'reloadzonecommand' not in config:
sys.exit ( "Setting for 'reloadzonecommand' not found in configuration %s\n" % configfile )
# build our zone
zonedata = re.sub ( r'<SERIAL>', '%010d' % int(time.time()), config['rpztemplate'] )
seenbefore = {}
commentstart = ';' # for bind
def addcomment ( comment ):
global zonedata
zonedata += ";%s\n" % comment
def addhost ( host ):
global zonedata
host = host.lower().strip()
if host in seenbefore:
zonedata += "%s seenbefore in %s %s" % (commentstart,seenbefore[host],commentstart)
if host in config['exclusions']:
zonedata += "%s excluded %s" % (commentstart,commentstart)
zonedata += "%s %s\n" % (host,config['defaultresponse'])
seenbefore[host] = source
# grab from web or cache
cacheupto = time.time() - config['cacheexpire']
if not os.path.isdir ( config['cachedir'] ):
os.makedirs ( config['cachedir'] )
http = urllib3.PoolManager ()
httpheaders = { 'User-Agent': 'py-hole RPZ blackhole manager' }
for source in config['blacklists']:
cachefile = os.path.join ( config['cachedir'], config['cacheprefix'] + source )
# check cache, download if needed
if os.path.isfile ( cachefile ) and os.path.getmtime ( cachefile ) >= cacheupto:
print "fresh cache %s" % config['blacklists'][source]['url']
with open ( cachefile, 'rt' ) as f:
data = f.read ()
else:
print "retrieve %s" % config['blacklists'][source]['url']
response = http.request ( 'GET', config['blacklists'][source]['url'], headers=httpheaders )
if response.status != 200:
sys.exit ( "ERROR - got http response %d for %s" % (response.status,config['blacklists'][source]['url']) )
# write cache file
with open ( cachefile+'.TMP', 'wt' ) as f:
f.write ( response.data )
os.rename ( cachefile+'.TMP', cachefile )
# all done
data = response.data
# we are good to go
zonedata += ";=============================================================================\n"
zonedata += "; Source: %s :: %s\n" % (source,config['blacklists'][source]['url'])
zonedata += ";=============================================================================\n\n"
# process data
recordcount = 0
if config['blacklists'][source]['format'] == 'hosts':
# comments start "#", we only take lines matching "hostskey"
for line in data.splitlines():
if line == '':
continue
if line[0] == '#':
addcomment ( line[1:] )
continue
hostlist = re.split ( r'\s+', line )
if hostlist[0] != config['blacklists'][source]['hostskey']:
# not a matching key
continue
for host in hostlist[1:]:
recordcount += 1
addhost ( host )
seenbefore[host.lower()] = source
elif config['blacklists'][source]['format'] == 'raw':
# comments start "#"
for line in data.splitlines():
if line == '':
continue
if line[0] == '#':
addcomment ( line[1:] )
continue
host = line.strip()
recordcount += 1
addhost ( host )
else:
sys.exit ( "Unknown format %s for %s" % (config['blacklists'][source]['format'],source) )
if recordcount == 0:
sys.exit ( "Got recordcount of %d for %s" % (recordcount,source) )
# write the config['rpzfile'] file
with open ( config['rpzfile']+'.TMP', 'wt' ) as f:
f.write ( zonedata )
os.rename ( config['rpzfile'], config['rpzfile']+'.old' )
os.rename ( config['rpzfile']+'.TMP', config['rpzfile'] )
# reload bind zone file
p = subprocess.Popen ( config['reloadzonecommand'], stdin=None, stdout=None )

33
py-hole-config.yaml Normal file
View File

@@ -0,0 +1,33 @@
---
hostsfile: /etc/local-hosts-blackhole
dnsmasqblackholeconfig: /etc/dnsmasq.d/local-hosts-blackhole
# see https://github.com/pi-hole/pi-hole/blob/master/adlists.default
# Note: the moment we specify blacklists, the base key completely replaces defaults
blacklists:
StevenBlack:
url: https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts
format: hosts
hostskey: 0.0.0.0
malwaredomains: { url: 'https://mirror1.malwaredomains.com/files/justdomains', format: raw }
cameleon: { 'url':'http://sysctl.org/cameleon/hosts', 'format':'hosts', 'hostskey':'127.0.0.1' }
abuse.ch: { 'url':'https://zeustracker.abuse.ch/blocklist.php?download=domainblocklist', 'format':'raw' }
disconnect.me_tracking: { 'url':'https://s3.amazonaws.com/lists.disconnect.me/simple_tracking.txt', 'format':'raw' }
disconnect.me_ad: { 'url':'https://s3.amazonaws.com/lists.disconnect.me/simple_ad.txt', 'format':'raw' }
# hosts-file.net: { 'url':'https://hosts-file.net/ad_servers.txt', 'format':'hosts0000' },
# Windows 10 telemetry: {
securemecca.com: { url: 'http://securemecca.com/Downloads/hosts.txt', format: hosts, hostskey: 127.0.0.1 }
# currently we support formats of:
# * raw
# - considers lines starting "#" as comments
# - one hostname per line
# * hosts
# - considers lines starting "#" as comments
# - requires "hostskey" matching the IP at the start of the line (anything else ignored)
# - multiple hosts per line (typical hosts file with aliases)
exclusions:
www.googleadservices.com: True # needed for google shopping
pagead.l.doubleclick.net: True # CNAME for www.googleadservices.com needed for google shopping
# Note that "localhost" is always excluded t prevent conflicts

150
py-hole-dnsmasq Executable file
View File

@@ -0,0 +1,150 @@
#!/usr/bin/python
# update dnsmasqs addn-hosts file
# removal: delete files specified in cachedir/cacheprefix, dnsmasqblackholeconfig, output
import yaml
import time
import re
import os
import urllib3
import sys
import subprocess
# read config
configfile = '/etc/py-hole-config.yaml'
config = {
# base config overridden by configfile
'cachedir': '/var/local/py-hole',
'cacheprefix': 'cache-',
'cacheexpire': 14400, # 4 hours
'hostsfile': '/etc/local-hosts-blackhole',
'dnsmasqblackholeconfig': '/etc/dnsmasq.d/local-hosts-blackhole',
'defaultresponse': '0.0.0.0',
'exclusions': {
'localhost': True, # we need this always else we get it blocked for 127.0.0.1 keys
'www.googleadservices.com': True, # needed for google shopping
'pagead.l.doubleclick.net': True, # CNAME for www.googleadservices.com needed for google shopping
},
'blacklists': { # see https://github.com/pi-hole/pi-hole/blob/master/adlists.default
'StevenBlack': { 'url':'https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts', 'format':'hosts', 'hostskey':'0.0.0.0' },
'malwaredomains': { 'url':'https://mirror1.malwaredomains.com/files/justdomains', 'format':'raw' },
'cameleon': { 'url':'http://sysctl.org/cameleon/hosts', 'format':'hosts', 'hostskey':'127.0.0.1' },
'abuse.ch': { 'url':'https://zeustracker.abuse.ch/blocklist.php?download=domainblocklist', 'format':'raw' },
'disconnect.me_tracking': { 'url':'https://s3.amazonaws.com/lists.disconnect.me/simple_tracking.txt', 'format':'raw' },
'disconnect.me_ad': { 'url':'https://s3.amazonaws.com/lists.disconnect.me/simple_ad.txt', 'format':'raw' },
# 'hosts-file.net': { 'url':'https://hosts-file.net/ad_servers.txt', 'format':'hosts0000' },
# 'Windows 10 telemetry': {
'securemecca.com': { 'url':'http://securemecca.com/Downloads/hosts.txt', 'format':'hosts', 'hostskey':'127.0.0.1' },
}
}
# load yaml file or error
if os.path.isfile ( configfile ):
with open ( configfile, 'r' ) as f:
config.update ( yaml.load(f) )
# always exclude localhost else we get it blocked for 127.0.0.1 keys
config['exclusions']['localhost'] = True
else:
sys.exit ( "Configuration file %s not found\n" % configfile )
# our hostsfile
hostsdata = "# created %d\n" % int(time.time())
seenbefore = {}
commentstart = '#' # for hosts / dnsmasq
def addcomment ( comment ):
global hostsdata
hostsdata += ";%s\n" % comment
def addhost ( host ):
global hostsdata
host = host.lower().strip()
if host in seenbefore:
hostsdata += "%s seenbefore in %s %s" % (commentstart,seenbefore[host],commentstart)
if host in config['exclusions']:
hostsdata += "%s excluded %s" % (commentstart,commentstart)
hostsdata += "%s %s\n" % (host,config['defaultresponse'])
seenbefore[host] = source
# grab from web or cache
cacheupto = time.time() - config['cacheexpire']
if not os.path.isdir ( config['cachedir'] ):
os.makedirs ( config['cachedir'] )
http = urllib3.PoolManager ()
httpheaders = { 'User-Agent': 'py-hole hosts blackhole manager' }
for source in config['blacklists']:
cachefile = os.path.join ( config['cachedir'], config['cacheprefix'] + source )
# check cache, download if needed
if os.path.isfile ( cachefile ) and os.path.getmtime ( cachefile ) >= cacheupto:
print "fresh cache %s" % config['blacklists'][source]['url']
with open ( cachefile, 'rt' ) as f:
data = f.read ()
else:
print "retrieve %s" % config['blacklists'][source]['url']
response = http.request ( 'GET', config['blacklists'][source]['url'], headers=httpheaders )
if response.status != 200:
sys.exit ( "ERROR - got http response %d for %s" % (response.status,config['blacklists'][source]['url']) )
# write cache file
with open ( cachefile+'.TMP', 'wt' ) as f:
f.write ( response.data )
os.rename ( cachefile+'.TMP', cachefile )
# all done
data = response.data
# we are good to go
hostsdata += "#=============================================================================\n"
hostsdata += "# Source: %s :: %s\n" % (source,config['blacklists'][source]['url'])
hostsdata += "#=============================================================================\n\n"
# process data
recordcount = 0
if config['blacklists'][source]['format'] == 'hosts':
# comments start "#", we only take lines matching "hostskey"
for line in data.splitlines():
if line == '':
continue
if line[0] == '#':
addcomment ( line[1:] )
continue
hostlist = re.split ( r'\s+', line )
if hostlist[0] != config['blacklists'][source]['hostskey']:
# not a matching key
continue
for host in hostlist[1:]:
recordcount += 1
addhost ( host )
seenbefore[host.lower()] = source
elif config['blacklists'][source]['format'] == 'raw':
# comments start "#"
for line in data.splitlines():
if line == '':
continue
if line[0] == '#':
addcomment ( line[1:] )
continue
host = line.strip()
recordcount += 1
addhost ( host )
else:
sys.exit ( "Unknown format %s for %s" % (config['blacklists'][source]['format'],source) )
if recordcount == 0:
sys.exit ( "Got recordcount of %d for %s" % (recordcount,source) )
# write the config['hostsfile'] file
with open ( config['hostsfile']+'.TMP', 'wt' ) as f:
f.write ( hostsdata )
os.rename ( config['hostsfile'], config['hostsfile']+'.old' )
os.rename ( config['hostsfile']+'.TMP', config['hostsfile'] )
# ensure we have a dnsmasq config file - we assume if it's there it's sufficient TODO maybe we should check
if not os.path.isfile ( config['dnsmasqblackholeconfig'] ):
with open ( config['dnsmasqblackholeconfig']+'.TMP', 'wt' ) as f:
f.write ( "addn-hosts=%s\n" % output )
os.rename ( config['dnsmasqblackholeconfig']+'.TMP', config['dnsmasqblackholeconfig'] )
# TODO reload dnsmasq (SIGHUP re-reads files, but not config)

57
py-hole-rpzconfig.yaml Normal file
View File

@@ -0,0 +1,57 @@
---
rpzfile: /etc/bind/db.rpz.example.com
rpztemplate: |
; see http://www.zytrax.com/books/dns/ch9/rpz.html
; zone file rpz.example.com
$TTL 2h ; default TTL
$ORIGIN rpz.example.com.
; email address is never used
@ SOA nonexistent.nodomain.none. dummy.nodomain.none. <SERIAL> 12h 15m 3w 2h
; name server is never accessed but out-of-zone
; NS nonexistant.nodomain.none
NS boni.example.com.
;example.net CNAME .
;*.example.net CNAME .
; Automatic rules start
;
# end of template
cachedir: /var/local/bindRPZ
cacheprefix: bindRPZcache-
cacheexpire: 14400 # 4 hours
reloadzonecommand: [ 'rndc', 'reload', 'rpz.example.com' ]
defaultresponse: CNAME .
# see https://github.com/pi-hole/pi-hole/blob/master/adlists.default
# Note: the moment we specify blacklists, the base key completely replaces defaults
blacklists:
StevenBlack:
url: https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts
format: hosts
hostskey: 0.0.0.0
malwaredomains: { url: 'https://mirror1.malwaredomains.com/files/justdomains', format: raw }
cameleon: { 'url':'http://sysctl.org/cameleon/hosts', 'format':'hosts', 'hostskey':'127.0.0.1' }
abuse.ch: { 'url':'https://zeustracker.abuse.ch/blocklist.php?download=domainblocklist', 'format':'raw' }
disconnect.me_tracking: { 'url':'https://s3.amazonaws.com/lists.disconnect.me/simple_tracking.txt', 'format':'raw' }
disconnect.me_ad: { 'url':'https://s3.amazonaws.com/lists.disconnect.me/simple_ad.txt', 'format':'raw' }
# hosts-file.net: { 'url':'https://hosts-file.net/ad_servers.txt', 'format':'hosts0000' }
# Windows 10 telemetry: {
securemecca.com: { 'url':'http://securemecca.com/Downloads/hosts.txt', 'format':'hosts', 'hostskey':'127.0.0.1' }
# currently we support formats of:
# * raw
# - considers lines starting "#" as comments
# - one hostname per line
# * hosts
# - considers lines starting "#" as comments
# - requires "hostskey" matching the IP at the start of the line (anything else ignored)
# - multiple hosts per line (typical hosts file with aliases)
exclusions:
www.googleadservices.com: True # needed for google shopping
pagead.l.doubleclick.net: True # CNAME for www.googleadservices.com needed for google shopping
# Note that "localhost" is always excluded t prevent conflicts