Compare commits

...

3 Commits

Author SHA1 Message Date
22de39ff0b Fix logging 2025-04-19 18:01:16 +01:00
7844a7eb9c Add quiet mode 2025-04-19 17:53:16 +01:00
269c345f96 Parallel scraping using worker processes 2025-04-19 17:52:07 +01:00

View File

@@ -8,14 +8,15 @@ the GlobalTalk network, and create some nice visualisations of the network.
Created for MARCHintosh 2025
"""
import logging
import subprocess
import re
import json
import sys
import os
import argparse
import concurrent.futures
import json
import logging
import os
import re
import shutil
import subprocess
import sys
NPBLKUP_RESULTS = re.compile(r"^(.*):(.*)\s(\d*\.\d*:\d*)$")
@@ -83,10 +84,19 @@ def main():
help="Filename to write the resulting JSON to",
)
parser.add_argument("--debug", action="store_true", help="Enable debug logging")
parser.add_argument("--quiet", action="store_true")
parser.add_argument(
"--workers",
type=int,
default=10,
help="The number of concurrent zone scans to run",
)
args = parser.parse_args()
if args.debug:
level = logging.DEBUG
elif args.quiet:
level = logging.ERROR
else:
level = logging.INFO
logging.basicConfig(level=level, stream=sys.stderr)
@@ -108,18 +118,28 @@ def main():
"nodes": [],
}
# Iterate the zones and scan them
for zone in zone_results["zones"]:
# Subfunction to run
def lookup_zone(zone) -> list[(str, str)]:
if args.zone and zone != args.zone:
continue
return None
logging.info("Scanning %s", zone)
node_data = nbplkup(zone)
zone_results["nodes"].extend(node_data)
return nbplkup(zone)
# Use ThreadPoolExecutor for concurrent execution
with concurrent.futures.ThreadPoolExecutor(max_workers=args.workers) as executor:
futures = {
executor.submit(lookup_zone, zone): zone for zone in zone_results["zones"]
}
for future in concurrent.futures.as_completed(futures):
result = future.result()
if result:
zone_results["nodes"].extend(result)
nodes = len(zone_results["nodes"])
zones = len(zone_results["zones"])
print("{0} zones, {1} nodes".format(zones, nodes), file=sys.stderr)
logging.info("{0} zones, {1} nodes".format(zones, nodes), file=sys.stderr)
# Dump out the resulting JSON to stdout
args.output.write(json.dumps(zone_results))