pretty happy with this now.
This commit is contained in:
parent
9931d64d48
commit
d2d405a36a
105
verifyfeed.py
Executable file
105
verifyfeed.py
Executable file
@ -0,0 +1,105 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# https://sysadministrivia.com/news/every-new-beginning
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import glob
|
||||||
|
from urllib.request import urlopen
|
||||||
|
try:
|
||||||
|
from lxml import etree
|
||||||
|
except ImportError:
|
||||||
|
import xml.etree.ElementTree as etree
|
||||||
|
|
||||||
|
baseurl = 'https://sysadministrivia.com'
|
||||||
|
|
||||||
|
feeds = {'itunes':'/feed/itunes.xml',
|
||||||
|
'google':'/feed/google.xml',
|
||||||
|
'mp3':'/feed/podcast.xml',
|
||||||
|
'ogg':'/feed/oggcast.xml'}
|
||||||
|
|
||||||
|
def getXML(baseurl, feeds, args):
|
||||||
|
xml = {}
|
||||||
|
print('Fetching feed(s) XML, please wait...')
|
||||||
|
for feed in args.feedlist:
|
||||||
|
with urlopen(baseurl + feeds[feed]) as url:
|
||||||
|
xml[feed] = etree.fromstring(url.read())
|
||||||
|
return(xml)
|
||||||
|
|
||||||
|
def getSums(xml, args):
|
||||||
|
sums = {}
|
||||||
|
for feed in args.feedlist:
|
||||||
|
sums[feed] = {}
|
||||||
|
for episode in xml[feed].findall('channel/item'):
|
||||||
|
epID = episode.find('title').text.split(':')[0]
|
||||||
|
sums[feed][epID] = {}
|
||||||
|
sums[feed][epID]['uri'] = episode.find('enclosure').attrib['url']
|
||||||
|
sums[feed][epID]['guid'] = episode.find('guid').text
|
||||||
|
sums[feed][epID]['file'] = os.path.basename(sums[feed][epID]['uri'])
|
||||||
|
if args.livesums:
|
||||||
|
livesha = hashlib.sha256()
|
||||||
|
print('{0}({1}): Fetching/verifying live sum...'.format(epID, feed))
|
||||||
|
with urlopen(sums[feed][epID]['uri']) as url:
|
||||||
|
for chunk in iter(lambda: url.read(4096), b''):
|
||||||
|
livesha.update(chunk)
|
||||||
|
sums[feed][epID]['livesha'] = livesha.hexdigest()
|
||||||
|
if sums[feed][epID]['livesha'] != sums[feed][epID]['guid']:
|
||||||
|
print('\t\tWARNING: GUID {1} does not match live sum {1}!'.format(sums[feed][epID]['guid'],
|
||||||
|
sums[feed][epID]['livesha']))
|
||||||
|
if args.locdir:
|
||||||
|
localdir = os.path.abspath(os.path.expanduser(args.locdir))
|
||||||
|
if not os.path.isdir(localdir):
|
||||||
|
exit('ERROR: Directory {0} does not exist!'.format(args.locdir))
|
||||||
|
episodes = sums[args.feedlist[0]]
|
||||||
|
print('Checking local files...')
|
||||||
|
for episode in episodes.keys():
|
||||||
|
filename = episodes[episode]['file']
|
||||||
|
guid = episodes[episode]['guid']
|
||||||
|
for localfile in glob.iglob('{0}/**/{1}'.format(localdir, filename), recursive = True):
|
||||||
|
localsha = hashlib.sha256()
|
||||||
|
print('Checking {0}...'.format(localfile))
|
||||||
|
with open(localfile, 'rb') as f:
|
||||||
|
for chunk in iter(lambda: f.read(4096), b''):
|
||||||
|
localsha.update(chunk)
|
||||||
|
if localsha.hexdigest() != guid:
|
||||||
|
print('WARNING: GUID {0} does not match local hash {1}!'.format(guid, localsha.hexdigest()))
|
||||||
|
print('Finished checking local files.')
|
||||||
|
if not args.locdir and not args.livesums:
|
||||||
|
for episode in sums[args.feedlist[0]].keys():
|
||||||
|
print(episode + ':')
|
||||||
|
for feed in args.feedlist:
|
||||||
|
print('\t{0:6}: {1}'.format(feed,
|
||||||
|
sums[feed][episode]['guid']))
|
||||||
|
return(sums)
|
||||||
|
|
||||||
|
def parseArgs():
|
||||||
|
args = argparse.ArgumentParser(description = 'Sysadministrivia Verifier',
|
||||||
|
epilog = 'https://git.square-r00t.net/Podloader')
|
||||||
|
args.add_argument('-l',
|
||||||
|
'--live',
|
||||||
|
dest = 'livesums',
|
||||||
|
action = 'store_true',
|
||||||
|
help = 'If specified, calculate the sums live from the site and compare against the GUIDs served. This can take a long time.')
|
||||||
|
args.add_argument('-f',
|
||||||
|
'--feed',
|
||||||
|
choices = ['itunes', 'google', 'mp3', 'ogg'],
|
||||||
|
dest = 'feedlist',
|
||||||
|
nargs = '*',
|
||||||
|
default = ['itunes', 'google', 'mp3', 'ogg'],
|
||||||
|
help = 'Which feed(s) to check. The default is all. Multiple can be specified via "-f itunes google" etc.')
|
||||||
|
args.add_argument('-d',
|
||||||
|
'--directory',
|
||||||
|
dest = 'locdir',
|
||||||
|
metavar = 'path',
|
||||||
|
default = False,
|
||||||
|
help = 'If specified, a directory where local copies of the episodes exist. (e.g. ~/gPodder/Downloads/Sysadministrivia)')
|
||||||
|
return(args)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = parseArgs().parse_args()
|
||||||
|
xml = getXML(baseurl, feeds, args)
|
||||||
|
sums = getSums(xml, args)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
Reference in New Issue
Block a user