optools/ref/rfc.py

103 lines
4.4 KiB
Python
Raw Normal View History

2017-11-12 13:44:06 -05:00
#!/usr/bin/env python3
import os
import argparse
import textwrap
import tarfile
import io
import re
import pydoc
from urllib.request import urlopen
# TODO: non-txt format support? (i.e. PDF, HTML, etc.)
def downloadRFC(destdir, rfcnum):
rfcnum = (str(rfcnum)).lower() # In case argparse interprets it as an int or it's entered in uppercase
# For when we implement format support:
# "plain" = raw text
# "html" = html text
# "pdf" = pdf text
# "x-gzip" is for a gzipped presumably tarball, but we use the "all" keyword for this.
filetypes = ['plain', 'html', 'pdf', 'x-gzip']
filext = {}
for t in filetypes:
filext[t] = t
filext['plain'] = 'txt'
filext['x-gzip'] = 'txt'
if rfcnum == 'all':
rfcuri = 'https://www.rfc-editor.org/rfc/tar/RFC-all.tar.gz'
rfctype = 'tar'
filetype = 'x-gzip'
else:
rfcuri = 'https://tools.ietf.org/rfc/rfc{0}.txt'.format(rfcnum)
rfctype = 'plain' # We'll make use of this later.
filetype = 'plain'
rfcdir = '{0}/{1}'.format(destdir, filext[filetype])
os.makedirs(rfcdir, exist_ok = True)
# And some minor fixes. We don't need .txt extension for plaintype. Remove if someone complains.
if filetype == 'plain':
rfcpath = '{0}/{1}'.format(rfcdir, rfcnum)
elif filetype == 'x-gzip':
# We need to handle this a special way, since it's a gzipped tarball.
rfcpath = rfcdir
else:
rfcpath = '{0}/{1}.{2}'.format(rfcdir, rfcnum, filext[filetype])
with urlopen(rfcuri) as rfc:
# Is this a single RFC, a release tarball, etc.
# Commented out for now until we implement multi-format support, as we have the 'all' keyword to help us.
#rfctype = rfc.response.info().get_content_subtype()
# Handle the tarball here.
if filetype == 'x-gzip':
content = io.BytesIO(rfc.read())
tarball = tarfile.open(fileobj = content)
for i in tarball.getnames():
filedest = '{0}/{1}'.format(rfcpath, re.sub('^rfc([0-9]+)\.txt$', '\g<1>', i))
if re.match('^rfc[0-9]+\.txt$', i):
with tarball.extractfile(i) as e:
with open(filedest, 'wb') as f:
f.write(e.read())
# We don't need to extract from the tarball, so we can just handle it as a plain read.
else:
content = rfc.read()
with open(rfcpath, 'wb') as f:
f.write(content)
def pageRFC(rfcnum):
rfcnum = (str(rfcnum)).lower() # In case argparse interprets it as an int or it's entered in uppercase
with urlopen('https://tools.ietf.org/rfc/rfc{0}.txt'.format(rfcnum)) as rfc:
pydoc.pager(rfc.read().decode('utf-8'))
def parseArgs():
args = argparse.ArgumentParser(description = 'RFC Downloader/Viewer',
epilog = 'TIP: this program has context-specific help. e.g. try "%(prog)s d -h"\nhttps://square-r00t.net/')
subparsers = args.add_subparsers(help = 'Operation to perform', dest = 'operation')
downloadargs = subparsers.add_parser('d', help = 'Download an RFC/RFCs.')
pagerargs = subparsers.add_parser('p', help = 'Print the RFC to the terminal.') # TODO: add -b/--browser? redirect to lynx for html paging?
downloadargs.add_argument('-d',
'--destination',
dest = 'destdir',
metavar = 'DESTINATION',
default = '/usr/local/share/doc/rfc',
help = 'The destination directory to save the RFC to. Will be created if it doesn\'t exist (assuming we have permissions). The default is /usr/local/share/doc/rfc/.')
downloadargs.add_argument(dest = 'rfcnum',
metavar = 'RFC',
default = 'all',
help = 'The RFC number. If the special value "all" is used, then ALL of the published RFCs will be fetched.')
pagerargs.add_argument(dest = 'rfcnum',
metavar = 'RFC',
help = 'The RFC number.')
return(args)
def main():
argsin = parseArgs()
args = argsin.parse_args()
if args.operation == 'd':
downloadRFC(args.destdir, args.rfcnum)
elif args.operation == 'p':
pageRFC(args.rfcnum)
else:
argsin.print_help()
if __name__ == '__main__':
main()