From d2d405a36a9ca369c575db026f6c6041e60f12b1 Mon Sep 17 00:00:00 2001 From: brent s Date: Tue, 4 Apr 2017 07:54:39 -0400 Subject: [PATCH] pretty happy with this now. --- verifyfeed.py | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100755 verifyfeed.py diff --git a/verifyfeed.py b/verifyfeed.py new file mode 100755 index 0000000..9e4ad41 --- /dev/null +++ b/verifyfeed.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 + +# https://sysadministrivia.com/news/every-new-beginning + +import hashlib +import argparse +import os +import glob +from urllib.request import urlopen +try: + from lxml import etree +except ImportError: + import xml.etree.ElementTree as etree + +baseurl = 'https://sysadministrivia.com' + +feeds = {'itunes':'/feed/itunes.xml', + 'google':'/feed/google.xml', + 'mp3':'/feed/podcast.xml', + 'ogg':'/feed/oggcast.xml'} + +def getXML(baseurl, feeds, args): + xml = {} + print('Fetching feed(s) XML, please wait...') + for feed in args.feedlist: + with urlopen(baseurl + feeds[feed]) as url: + xml[feed] = etree.fromstring(url.read()) + return(xml) + +def getSums(xml, args): + sums = {} + for feed in args.feedlist: + sums[feed] = {} + for episode in xml[feed].findall('channel/item'): + epID = episode.find('title').text.split(':')[0] + sums[feed][epID] = {} + sums[feed][epID]['uri'] = episode.find('enclosure').attrib['url'] + sums[feed][epID]['guid'] = episode.find('guid').text + sums[feed][epID]['file'] = os.path.basename(sums[feed][epID]['uri']) + if args.livesums: + livesha = hashlib.sha256() + print('{0}({1}): Fetching/verifying live sum...'.format(epID, feed)) + with urlopen(sums[feed][epID]['uri']) as url: + for chunk in iter(lambda: url.read(4096), b''): + livesha.update(chunk) + sums[feed][epID]['livesha'] = livesha.hexdigest() + if sums[feed][epID]['livesha'] != sums[feed][epID]['guid']: + print('\t\tWARNING: GUID {1} does not match live sum {1}!'.format(sums[feed][epID]['guid'], + sums[feed][epID]['livesha'])) + if args.locdir: + localdir = os.path.abspath(os.path.expanduser(args.locdir)) + if not os.path.isdir(localdir): + exit('ERROR: Directory {0} does not exist!'.format(args.locdir)) + episodes = sums[args.feedlist[0]] + print('Checking local files...') + for episode in episodes.keys(): + filename = episodes[episode]['file'] + guid = episodes[episode]['guid'] + for localfile in glob.iglob('{0}/**/{1}'.format(localdir, filename), recursive = True): + localsha = hashlib.sha256() + print('Checking {0}...'.format(localfile)) + with open(localfile, 'rb') as f: + for chunk in iter(lambda: f.read(4096), b''): + localsha.update(chunk) + if localsha.hexdigest() != guid: + print('WARNING: GUID {0} does not match local hash {1}!'.format(guid, localsha.hexdigest())) + print('Finished checking local files.') + if not args.locdir and not args.livesums: + for episode in sums[args.feedlist[0]].keys(): + print(episode + ':') + for feed in args.feedlist: + print('\t{0:6}: {1}'.format(feed, + sums[feed][episode]['guid'])) + return(sums) + +def parseArgs(): + args = argparse.ArgumentParser(description = 'Sysadministrivia Verifier', + epilog = 'https://git.square-r00t.net/Podloader') + args.add_argument('-l', + '--live', + dest = 'livesums', + action = 'store_true', + help = 'If specified, calculate the sums live from the site and compare against the GUIDs served. This can take a long time.') + args.add_argument('-f', + '--feed', + choices = ['itunes', 'google', 'mp3', 'ogg'], + dest = 'feedlist', + nargs = '*', + default = ['itunes', 'google', 'mp3', 'ogg'], + help = 'Which feed(s) to check. The default is all. Multiple can be specified via "-f itunes google" etc.') + args.add_argument('-d', + '--directory', + dest = 'locdir', + metavar = 'path', + default = False, + help = 'If specified, a directory where local copies of the episodes exist. (e.g. ~/gPodder/Downloads/Sysadministrivia)') + return(args) + +def main(): + args = parseArgs().parse_args() + xml = getXML(baseurl, feeds, args) + sums = getSums(xml, args) + +if __name__ == '__main__': + main()