2017-11-12 13:44:06 -05:00
#!/usr/bin/env python3
import os
import argparse
import textwrap
import tarfile
import io
import re
import pydoc
from urllib . request import urlopen
# TODO: non-txt format support? (i.e. PDF, HTML, etc.)
2018-02-13 00:09:36 -05:00
# TODO: search function? keyword or regex, display RFC number and title
2017-11-12 13:44:06 -05:00
def downloadRFC ( destdir , rfcnum ) :
rfcnum = ( str ( rfcnum ) ) . lower ( ) # In case argparse interprets it as an int or it's entered in uppercase
# For when we implement format support:
# "plain" = raw text
# "html" = html text
# "pdf" = pdf text
# "x-gzip" is for a gzipped presumably tarball, but we use the "all" keyword for this.
filetypes = [ ' plain ' , ' html ' , ' pdf ' , ' x-gzip ' ]
filext = { }
for t in filetypes :
filext [ t ] = t
filext [ ' plain ' ] = ' txt '
filext [ ' x-gzip ' ] = ' txt '
if rfcnum == ' all ' :
rfcuri = ' https://www.rfc-editor.org/rfc/tar/RFC-all.tar.gz '
rfctype = ' tar '
filetype = ' x-gzip '
else :
rfcuri = ' https://tools.ietf.org/rfc/rfc {0} .txt ' . format ( rfcnum )
rfctype = ' plain ' # We'll make use of this later.
filetype = ' plain '
rfcdir = ' {0} / {1} ' . format ( destdir , filext [ filetype ] )
os . makedirs ( rfcdir , exist_ok = True )
# And some minor fixes. We don't need .txt extension for plaintype. Remove if someone complains.
if filetype == ' plain ' :
rfcpath = ' {0} / {1} ' . format ( rfcdir , rfcnum )
elif filetype == ' x-gzip ' :
# We need to handle this a special way, since it's a gzipped tarball.
rfcpath = rfcdir
else :
rfcpath = ' {0} / {1} . {2} ' . format ( rfcdir , rfcnum , filext [ filetype ] )
with urlopen ( rfcuri ) as rfc :
# Is this a single RFC, a release tarball, etc.
# Commented out for now until we implement multi-format support, as we have the 'all' keyword to help us.
#rfctype = rfc.response.info().get_content_subtype()
# Handle the tarball here.
if filetype == ' x-gzip ' :
content = io . BytesIO ( rfc . read ( ) )
tarball = tarfile . open ( fileobj = content )
for i in tarball . getnames ( ) :
filedest = ' {0} / {1} ' . format ( rfcpath , re . sub ( ' ^rfc([0-9]+) \ .txt$ ' , ' \ g<1> ' , i ) )
if re . match ( ' ^rfc[0-9]+ \ .txt$ ' , i ) :
with tarball . extractfile ( i ) as e :
with open ( filedest , ' wb ' ) as f :
f . write ( e . read ( ) )
# We don't need to extract from the tarball, so we can just handle it as a plain read.
else :
content = rfc . read ( )
with open ( rfcpath , ' wb ' ) as f :
f . write ( content )
def pageRFC ( rfcnum ) :
rfcnum = ( str ( rfcnum ) ) . lower ( ) # In case argparse interprets it as an int or it's entered in uppercase
with urlopen ( ' https://tools.ietf.org/rfc/rfc {0} .txt ' . format ( rfcnum ) ) as rfc :
pydoc . pager ( rfc . read ( ) . decode ( ' utf-8 ' ) )
def parseArgs ( ) :
args = argparse . ArgumentParser ( description = ' RFC Downloader/Viewer ' ,
epilog = ' TIP: this program has context-specific help. e.g. try " %(prog)s d -h " \n https://square-r00t.net/ ' )
subparsers = args . add_subparsers ( help = ' Operation to perform ' , dest = ' operation ' )
downloadargs = subparsers . add_parser ( ' d ' , help = ' Download an RFC/RFCs. ' )
pagerargs = subparsers . add_parser ( ' p ' , help = ' Print the RFC to the terminal. ' ) # TODO: add -b/--browser? redirect to lynx for html paging?
downloadargs . add_argument ( ' -d ' ,
' --destination ' ,
dest = ' destdir ' ,
metavar = ' DESTINATION ' ,
default = ' /usr/local/share/doc/rfc ' ,
help = ' The destination directory to save the RFC to. Will be created if it doesn \' t exist (assuming we have permissions). The default is /usr/local/share/doc/rfc/. ' )
downloadargs . add_argument ( dest = ' rfcnum ' ,
metavar = ' RFC ' ,
default = ' all ' ,
help = ' The RFC number. If the special value " all " is used, then ALL of the published RFCs will be fetched. ' )
pagerargs . add_argument ( dest = ' rfcnum ' ,
metavar = ' RFC ' ,
help = ' The RFC number. ' )
return ( args )
def main ( ) :
argsin = parseArgs ( )
args = argsin . parse_args ( )
if args . operation == ' d ' :
downloadRFC ( args . destdir , args . rfcnum )
elif args . operation == ' p ' :
pageRFC ( args . rfcnum )
else :
argsin . print_help ( )
if __name__ == ' __main__ ' :
main ( )