pretty happy with this now.
This commit is contained in:
parent
9931d64d48
commit
d2d405a36a
105
verifyfeed.py
Executable file
105
verifyfeed.py
Executable file
@ -0,0 +1,105 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# https://sysadministrivia.com/news/every-new-beginning
|
||||
|
||||
import hashlib
|
||||
import argparse
|
||||
import os
|
||||
import glob
|
||||
from urllib.request import urlopen
|
||||
try:
|
||||
from lxml import etree
|
||||
except ImportError:
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
baseurl = 'https://sysadministrivia.com'
|
||||
|
||||
feeds = {'itunes':'/feed/itunes.xml',
|
||||
'google':'/feed/google.xml',
|
||||
'mp3':'/feed/podcast.xml',
|
||||
'ogg':'/feed/oggcast.xml'}
|
||||
|
||||
def getXML(baseurl, feeds, args):
|
||||
xml = {}
|
||||
print('Fetching feed(s) XML, please wait...')
|
||||
for feed in args.feedlist:
|
||||
with urlopen(baseurl + feeds[feed]) as url:
|
||||
xml[feed] = etree.fromstring(url.read())
|
||||
return(xml)
|
||||
|
||||
def getSums(xml, args):
|
||||
sums = {}
|
||||
for feed in args.feedlist:
|
||||
sums[feed] = {}
|
||||
for episode in xml[feed].findall('channel/item'):
|
||||
epID = episode.find('title').text.split(':')[0]
|
||||
sums[feed][epID] = {}
|
||||
sums[feed][epID]['uri'] = episode.find('enclosure').attrib['url']
|
||||
sums[feed][epID]['guid'] = episode.find('guid').text
|
||||
sums[feed][epID]['file'] = os.path.basename(sums[feed][epID]['uri'])
|
||||
if args.livesums:
|
||||
livesha = hashlib.sha256()
|
||||
print('{0}({1}): Fetching/verifying live sum...'.format(epID, feed))
|
||||
with urlopen(sums[feed][epID]['uri']) as url:
|
||||
for chunk in iter(lambda: url.read(4096), b''):
|
||||
livesha.update(chunk)
|
||||
sums[feed][epID]['livesha'] = livesha.hexdigest()
|
||||
if sums[feed][epID]['livesha'] != sums[feed][epID]['guid']:
|
||||
print('\t\tWARNING: GUID {1} does not match live sum {1}!'.format(sums[feed][epID]['guid'],
|
||||
sums[feed][epID]['livesha']))
|
||||
if args.locdir:
|
||||
localdir = os.path.abspath(os.path.expanduser(args.locdir))
|
||||
if not os.path.isdir(localdir):
|
||||
exit('ERROR: Directory {0} does not exist!'.format(args.locdir))
|
||||
episodes = sums[args.feedlist[0]]
|
||||
print('Checking local files...')
|
||||
for episode in episodes.keys():
|
||||
filename = episodes[episode]['file']
|
||||
guid = episodes[episode]['guid']
|
||||
for localfile in glob.iglob('{0}/**/{1}'.format(localdir, filename), recursive = True):
|
||||
localsha = hashlib.sha256()
|
||||
print('Checking {0}...'.format(localfile))
|
||||
with open(localfile, 'rb') as f:
|
||||
for chunk in iter(lambda: f.read(4096), b''):
|
||||
localsha.update(chunk)
|
||||
if localsha.hexdigest() != guid:
|
||||
print('WARNING: GUID {0} does not match local hash {1}!'.format(guid, localsha.hexdigest()))
|
||||
print('Finished checking local files.')
|
||||
if not args.locdir and not args.livesums:
|
||||
for episode in sums[args.feedlist[0]].keys():
|
||||
print(episode + ':')
|
||||
for feed in args.feedlist:
|
||||
print('\t{0:6}: {1}'.format(feed,
|
||||
sums[feed][episode]['guid']))
|
||||
return(sums)
|
||||
|
||||
def parseArgs():
|
||||
args = argparse.ArgumentParser(description = 'Sysadministrivia Verifier',
|
||||
epilog = 'https://git.square-r00t.net/Podloader')
|
||||
args.add_argument('-l',
|
||||
'--live',
|
||||
dest = 'livesums',
|
||||
action = 'store_true',
|
||||
help = 'If specified, calculate the sums live from the site and compare against the GUIDs served. This can take a long time.')
|
||||
args.add_argument('-f',
|
||||
'--feed',
|
||||
choices = ['itunes', 'google', 'mp3', 'ogg'],
|
||||
dest = 'feedlist',
|
||||
nargs = '*',
|
||||
default = ['itunes', 'google', 'mp3', 'ogg'],
|
||||
help = 'Which feed(s) to check. The default is all. Multiple can be specified via "-f itunes google" etc.')
|
||||
args.add_argument('-d',
|
||||
'--directory',
|
||||
dest = 'locdir',
|
||||
metavar = 'path',
|
||||
default = False,
|
||||
help = 'If specified, a directory where local copies of the episodes exist. (e.g. ~/gPodder/Downloads/Sysadministrivia)')
|
||||
return(args)
|
||||
|
||||
def main():
|
||||
args = parseArgs().parse_args()
|
||||
xml = getXML(baseurl, feeds, args)
|
||||
sums = getSums(xml, args)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Reference in New Issue
Block a user