From d2d405a36a9ca369c575db026f6c6041e60f12b1 Mon Sep 17 00:00:00 2001
From: brent s <bts@square-r00t.net>
Date: Tue, 4 Apr 2017 07:54:39 -0400
Subject: [PATCH] pretty happy with this now.

---
 verifyfeed.py | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 105 insertions(+)
 create mode 100755 verifyfeed.py

diff --git a/verifyfeed.py b/verifyfeed.py
new file mode 100755
index 0000000..9e4ad41
--- /dev/null
+++ b/verifyfeed.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+
+# https://sysadministrivia.com/news/every-new-beginning
+
+import hashlib
+import argparse
+import os
+import glob
+from urllib.request import urlopen
+try:
+    from lxml import etree
+except ImportError:
+    import xml.etree.ElementTree as etree
+
+baseurl = 'https://sysadministrivia.com'
+
+feeds = {'itunes':'/feed/itunes.xml',
+         'google':'/feed/google.xml',
+         'mp3':'/feed/podcast.xml',
+         'ogg':'/feed/oggcast.xml'}
+
+def getXML(baseurl, feeds, args):
+    xml = {}
+    print('Fetching feed(s) XML, please wait...')
+    for feed in args.feedlist:
+        with urlopen(baseurl + feeds[feed]) as url:
+            xml[feed] = etree.fromstring(url.read())
+    return(xml)
+
+def getSums(xml, args):
+    sums = {}
+    for feed in args.feedlist:
+        sums[feed] = {}
+        for episode in xml[feed].findall('channel/item'):
+            epID = episode.find('title').text.split(':')[0]
+            sums[feed][epID] = {}
+            sums[feed][epID]['uri'] = episode.find('enclosure').attrib['url']
+            sums[feed][epID]['guid'] = episode.find('guid').text
+            sums[feed][epID]['file'] = os.path.basename(sums[feed][epID]['uri'])
+            if args.livesums:
+                livesha = hashlib.sha256()
+                print('{0}({1}): Fetching/verifying live sum...'.format(epID, feed))
+                with urlopen(sums[feed][epID]['uri']) as url:
+                    for chunk in iter(lambda: url.read(4096), b''):
+                        livesha.update(chunk)
+                sums[feed][epID]['livesha'] = livesha.hexdigest()
+                if sums[feed][epID]['livesha'] != sums[feed][epID]['guid']:
+                    print('\t\tWARNING: GUID {1} does not match live sum {1}!'.format(sums[feed][epID]['guid'],
+                                                                                      sums[feed][epID]['livesha']))
+    if args.locdir:
+        localdir = os.path.abspath(os.path.expanduser(args.locdir))
+        if not os.path.isdir(localdir):
+            exit('ERROR: Directory {0} does not exist!'.format(args.locdir))
+        episodes = sums[args.feedlist[0]]
+        print('Checking local files...')
+        for episode in episodes.keys():
+            filename = episodes[episode]['file']
+            guid = episodes[episode]['guid']
+            for localfile in glob.iglob('{0}/**/{1}'.format(localdir, filename), recursive = True):
+                localsha = hashlib.sha256()
+                print('Checking {0}...'.format(localfile))
+                with open(localfile, 'rb') as f:
+                    for chunk in iter(lambda: f.read(4096), b''):
+                        localsha.update(chunk)
+                if localsha.hexdigest() != guid:
+                    print('WARNING: GUID {0} does not match local hash {1}!'.format(guid, localsha.hexdigest()))
+        print('Finished checking local files.')
+    if not args.locdir and not args.livesums:
+        for episode in sums[args.feedlist[0]].keys():
+            print(episode + ':')
+            for feed in args.feedlist:
+                print('\t{0:6}: {1}'.format(feed,
+                                        sums[feed][episode]['guid']))
+    return(sums)
+
+def parseArgs():
+    args = argparse.ArgumentParser(description = 'Sysadministrivia Verifier',
+                                   epilog = 'https://git.square-r00t.net/Podloader')
+    args.add_argument('-l',
+                      '--live',
+                      dest = 'livesums',
+                      action = 'store_true',
+                      help = 'If specified, calculate the sums live from the site and compare against the GUIDs served. This can take a long time.')
+    args.add_argument('-f',
+                      '--feed',
+                      choices = ['itunes', 'google', 'mp3', 'ogg'],
+                      dest = 'feedlist',
+                      nargs = '*',
+                      default = ['itunes', 'google', 'mp3', 'ogg'],
+                      help = 'Which feed(s) to check. The default is all. Multiple can be specified via "-f itunes google" etc.')
+    args.add_argument('-d',
+                      '--directory',
+                      dest = 'locdir',
+                      metavar = 'path',
+                      default = False,
+                      help = 'If specified, a directory where local copies of the episodes exist. (e.g. ~/gPodder/Downloads/Sysadministrivia)')
+    return(args)
+
+def main():
+    args = parseArgs().parse_args()
+    xml = getXML(baseurl, feeds, args)
+    sums = getSums(xml, args)
+
+if __name__ == '__main__':
+    main()