#!/usr/bin/env python

# Supports CentOS 6.9 and up, untested on lower versions.
# Lets you dump a list of installed packages for backup purposes
# Reference: https://blog.fpmurphy.com/2011/08/programmatically-retrieve-rpm-package-details.html

import argparse
import copy
import datetime
import io
import re
import sys
try:
    import yum
except ImportError:
    exit('This script only runs on RHEL/CentOS/other yum-based distros.')
# Detect RH version.
ver_re = re.compile('^(centos( linux)? release) ([0-9\.]+) .*$', re.IGNORECASE)
# distro module isn't stdlib, and platform.linux_distribution() (AND platform.distro()) are both deprecated in 3.7.
# So we get hacky.
with open('/etc/redhat-release', 'r') as f:
    ver = [int(i) for i in ver_re.sub('\g<3>', f.read().strip()).split('.')]
import pprint

repo_re = re.compile('^@')

class PkgIndexer(object):
    def __init__(self, **args):
        self.pkgs = []
        self.args = args
        self.yb = yum.YumBase()
        # Make the Yum API shut the heck up.
        self.yb.preconf.debuglevel = 0
        self.yb.preconf.errorlevel = 0
        self._pkgs = self._pkglst()
        self._build_pkginfo()
        if self.args['report'] == 'csv':
            self._gen_csv()
        elif self.args['report'] == 'json':
            self._gen_json()
        elif self.args['report'] == 'xml':
            self._gen_xml()

    def _pkglst(self):
        pkgs = []
        # Get the list of packages
        if self.args['reason'] != 'all':
            for p in sorted(self.yb.rpmdb.returnPackages()):
                if 'reason' not in p.yumdb_info:
                    continue
                reason = getattr(p.yumdb_info, 'reason')
                if reason == self.args['reason']:
                    pkgs.append(p)
        else:
            pkgs = sorted(self.yb.rpmdb.returnPackages())
        return(pkgs)

    def _build_pkginfo(self):
        for p in self._pkgs:
            _pkg = {'name': p.name,
                    'desc': p.summary,
                    'version': p.ver,
                    'release': p.release,
                    'arch': p.arch,
                    'built': datetime.datetime.fromtimestamp(p.buildtime),
                    'installed': datetime.datetime.fromtimestamp(p.installtime),
                    'repo': repo_re.sub('', p.ui_from_repo),
                    'sizerpm': p.packagesize,
                    'sizedisk': p.installedsize}
            self.pkgs.append(_pkg)

    def _gen_csv(self):
        if self.args['plain']:
            _fields = ['name']
        else:
            _fields = ['name', 'version', 'release', 'arch', 'desc', 'built',
                       'installed', 'repo', 'sizerpm', 'sizedisk']
        import csv
        if sys.hexversion >= 0x30000f0:
            _buf = io.StringIO()
        else:
            _buf = io.BytesIO()
        _csv = csv.writer(_buf, delimiter = self.args['sep_char'])
        if self.args['header']:
            if self.args['plain']:
                _csv.writerow(['Name'])
            else:
                _csv.writerow(['Name', 'Version', 'Release', 'Architecture', 'Description', 'Build Time',
                               'Install Time', 'Repository', 'Size (RPM)', 'Size (On-Disk)'])
        _csv = csv.DictWriter(_buf, fieldnames = _fields, extrasaction = 'ignore', delimiter = self.args['sep_char'])
        for p in self.pkgs:
            _csv.writerow(p)
        _buf.seek(0, 0)
        self.report = _buf.read().replace('\r\n', '\n')
        return()

    def _gen_json(self):
        import json
        if self.args['plain']:
            self.report = json.dumps([p['name'] for p in self.pkgs], indent = 4)
        else:
            self.report = json.dumps(self.pkgs, default = str, indent = 4)
        return()

    def _gen_xml(self):
        from lxml import etree
        _xml = etree.Element('packages')
        for p in self.pkgs:
            _attrib = copy.deepcopy(p)
            for i in ('built', 'installed', 'sizerpm', 'sizedisk'):
                _attrib[i] = str(_attrib[i])
            if self.args['plain']:
                _pkg = etree.Element('package', attrib = {'name': p['name']})
            else:
                _pkg = etree.Element('package', attrib = _attrib)
            _xml.append(_pkg)
            #del(_attrib['name'])  # I started to make it a more complex, nested structure... is that necessary?
        if self.args['header']:
            self.report = etree.tostring(_xml, pretty_print = True, xml_declaration = True, encoding = 'UTF-8')
        else:
            self.report = etree.tostring(_xml, pretty_print = True)
        return()


def parseArgs():
    args = argparse.ArgumentParser(description = ('This script lets you dump the list of installed packages'))
    args.add_argument('-p', '--plain',
                      dest = 'plain',
                      action = 'store_true',
                      help = 'If specified, only create a list of plain package names (i.e. don\'t include extra '
                             'information)')
    args.add_argument('-n', '--no-header',
                      dest = 'header',
                      action = 'store_false',
                      help = 'If specified, do not print column headers/XML headers')
    args.add_argument('-s', '--separator',
                      dest = 'sep_char',
                      default = ',',
                      help = 'The separator used to split fields in the output (default: ,) (only used for CSV '
                             'reports)')
    rprt = args.add_mutually_exclusive_group()
    rprt.add_argument('-c', '--csv',
                      dest = 'report',
                      default = 'csv',
                      action = 'store_const',
                      const = 'csv',
                      help = 'Generate CSV output (this is the default). See -n/--no-header, -s/--separator')
    rprt.add_argument('-x', '--xml',
                      dest = 'report',
                      default = 'csv',
                      action = 'store_const',
                      const = 'xml',
                      help = 'Generate XML output (requires the LXML module: yum install python-lxml)')
    rprt.add_argument('-j', '--json',
                      dest = 'report',
                      default = 'csv',
                      action = 'store_const',
                      const = 'json',
                      help = 'Generate JSON output')
    rsn = args.add_mutually_exclusive_group()
    rsn.add_argument('-a', '--all',
                     dest = 'reason',
                     default = 'all',
                     action = 'store_const',
                     const = 'all',
                     help = ('Parse/report all packages that are currently installed. '
                             'Conflicts with -u/--user and -d/--dep. '
                             'This is the default'))
    rsn.add_argument('-u', '--user',
                     dest = 'reason',
                     default = 'all',
                     action = 'store_const',
                     const = 'user',
                     help = ('Parse/report only packages which were explicitly installed. '
                             'Conflicts with -a/--all and -d/--dep'))
    rsn.add_argument('-d', '--dep',
                     dest = 'reason',
                     default = 'all',
                     action = 'store_const',
                     const = 'dep',
                     help = ('Parse/report only packages which were installed to satisfy a dependency. '
                             'Conflicts with -a/--all and -u/--user'))
    return(args)

def main():
    args = vars(parseArgs().parse_args())
    p = PkgIndexer(**args)
    print(p.report)
    return()

if __name__ == '__main__':
    main()