This commit is contained in:
brent s 2018-09-27 14:30:21 -04:00
parent f169080f59
commit e1eefebf9d

View File

@ -1,110 +1,185 @@
#!/usr/bin/env python #!/usr/bin/env python


# Supports CentOS 6.9 and up, untested on lower versions. # Supports CentOS 6.9 and up, untested on lower versions.
# Lets you get a list of files for a given package name(s) without installing # Lets you dump a list of installed packages for backup purposes
# any extra packages (such as yum-utils for repoquery). # Reference: https://blog.fpmurphy.com/2011/08/programmatically-retrieve-rpm-package-details.html


import argparse import argparse
import json import copy
import datetime
import io
import re import re
# For when CentOS/RHEL switch to python 3 by default (if EVER).
import sys import sys
pyver = sys.version_info
try: try:
import rpm import yum
except ImportError: except ImportError:
exit('This script only runs on RHEL/CentOS/other RPM-based distros.') exit('This script only runs on RHEL/CentOS/other yum-based distros.')
# Detect RH version.
ver_re = re.compile('^(centos( linux)? release) ([0-9\.]+) .*$', re.IGNORECASE)
# distro module isn't stdlib, and platform.linux_distribution() (AND platform.distro()) are both deprecated in 3.7.
# So we get hacky.
with open('/etc/redhat-release', 'r') as f:
ver = [int(i) for i in ver_re.sub('\g<3>', f.read().strip()).split('.')]
import pprint


def all_pkgs(): repo_re = re.compile('^@')
# Gets a list of all packages.
class PkgIndexer(object):
def __init__(self, **args):
self.pkgs = []
self.args = args
self.yb = yum.YumBase()
# Make the Yum API shut the heck up.
self.yb.preconf.debuglevel = 0
self.yb.preconf.errorlevel = 0
self._pkgs = self._pkglst()
self._build_pkginfo()
if self.args['report'] == 'csv':
self._gen_csv()
elif self.args['report'] == 'json':
self._gen_json()
elif self.args['report'] == 'xml':
self._gen_xml()

def _pkglst(self):
pkgs = [] pkgs = []
trns = rpm.TransactionSet() # Get the list of packages
for p in trns.dbMatch(): if self.args['reason'] != 'all':
pkgs.append(p['name']) for p in sorted(self.yb.rpmdb.returnPackages()):
pkgs = list(sorted(set(pkgs))) if 'reason' not in p.yumdb_info:
continue
reason = getattr(p.yumdb_info, 'reason')
if reason == self.args['reason']:
pkgs.append(p)
else:
pkgs = sorted(self.yb.rpmdb.returnPackages())
return(pkgs) return(pkgs)


class FileGetter(object): def _build_pkginfo(self):
def __init__(self, symlinks = True, verbose = False, *args, **kwargs): for p in self._pkgs:
self.symlinks = symlinks _pkg = {'name': p.name,
self.verbose = verbose 'desc': p.summary,
self.trns = rpm.TransactionSet() 'version': p.ver,
'release': p.release,
'arch': p.arch,
'built': datetime.datetime.fromtimestamp(p.buildtime),
'installed': datetime.datetime.fromtimestamp(p.installtime),
'repo': repo_re.sub('', p.ui_from_repo),
'sizerpm': p.packagesize,
'sizedisk': p.installedsize}
self.pkgs.append(_pkg)


def getfiles(self, pkgnm): def _gen_csv(self):
files = {} if self.args['plain']:
for pkg in self.trns.dbMatch('name', pkgnm): _fields = ['name']
# The canonical package name
_pkgnm = pkg.sprintf('%{NAME}')
# Return just a list of files, or a dict of filepath:hash
# if verbose is enabled.
if self.verbose:
files[_pkgnm] = {}
else: else:
files[_pkgnm] = [] _fields = ['name', 'version', 'release', 'arch', 'desc', 'built',
for f in pkg.fiFromHeader(): 'installed', 'repo', 'sizerpm', 'sizedisk']
_symlink = (True if re.search('^0+$', f[12]) else False) import csv
if self.verbose: if sys.hexversion >= 0x30000f0:
if _symlink: _buf = io.StringIO()
if self.symlinks:
files[_pkgnm][f[0]] = '(symbolic link)'
continue
files[_pkgnm][f[0]] = f[12]
else: else:
# Skip if it is a symlink but they aren't enabled _buf = io.BytesIO()
if _symlink and not self.symlinks: _csv = csv.writer(_buf, delimiter = self.args['sep_char'])
continue if self.args['header']:
if self.args['plain']:
_csv.writerow(['Name'])
else: else:
files[_pkgnm].append(f[0]) _csv.writerow(['Name', 'Version', 'Release', 'Architecture', 'Description', 'Build Time',
files[_pkgnm].sort() 'Install Time', 'Repository', 'Size (RPM)', 'Size (On-Disk)'])
return(files) _csv = csv.DictWriter(_buf, fieldnames = _fields, extrasaction = 'ignore', delimiter = self.args['sep_char'])
for p in self.pkgs:
_csv.writerow(p)
_buf.seek(0, 0)
self.report = _buf.read()
return()

def _gen_json(self):
import json
self.report = json.dumps(self.pkgs, default = str, indent = 4)
return()

def _gen_xml(self):
from lxml import etree
_xml = etree.Element('packages')
for p in self.pkgs:
_attrib = copy.deepcopy(p)
for i in ('built', 'installed', 'sizerpm', 'sizedisk'):
_attrib[i] = str(_attrib[i])
_pkg = etree.Element('package', attrib = _attrib)
_xml.append(_pkg)
#del(_attrib['name']) # I started to make it a more complex, nested structure... is that necessary?
if self.args['header']:
self.report = etree.tostring(_xml, pretty_print = True, xml_declaration = True, encoding = 'UTF-8')
else:
self.report = etree.tostring(_xml, pretty_print = True)
return()



def parseArgs(): def parseArgs():
args = argparse.ArgumentParser(description = ( args = argparse.ArgumentParser(description = ('This script lets you dump the list of installed packages'))
'This script allows you get a list of files for a given ' args.add_argument('-p', '--plain',
'package name(s) without installing any extra packages ' dest = 'plain',
'(such as yum-utils for repoquery).'))
args.add_argument('-l', '--ignore-symlinks',
dest = 'symlinks',
action = 'store_false',
help = ('If specified, don\'t report files that are ' +
'symlinks in the RPM'))
args.add_argument('-v', '--verbose',
dest = 'verbose',
action = 'store_true', action = 'store_true',
help = ('If specified, include the hashes of the files')) help = 'If specified, only create a list of plain package names (i.e. don\'t include extra '
args.add_argument('-p', '--package', 'information)')
dest = 'pkgs', args.add_argument('-n', '--no-header',
#nargs = 1, dest = 'header',
metavar = 'PKGNAME', action = 'store_false',
action = 'append', help = 'If specified, do not print column headers/XML headers')
default = [], args.add_argument('-s', '--separator',
help = ('If specified, restrict the list of ' + dest = 'sep_char',
'packages to check against to only this ' + default = ',',
'package. Can be specified multiple times. ' + help = 'The separator used to split fields in the output (default: ,) (only used for CSV '
'HIGHLY RECOMMENDED')) 'reports)')
rprt = args.add_mutually_exclusive_group()
rprt.add_argument('-c', '--csv',
dest = 'report',
default = 'csv',
action = 'store_const',
const = 'csv',
help = 'Generate CSV output (this is the default). See -n/--no-header, -s/--separator')
rprt.add_argument('-x', '--xml',
dest = 'report',
default = 'csv',
action = 'store_const',
const = 'xml',
help = 'Generate XML output (requires the LXML module: yum install python-lxml)')
rprt.add_argument('-j', '--json',
dest = 'report',
default = 'csv',
action = 'store_const',
const = 'json',
help = 'Generate JSON output')
rsn = args.add_mutually_exclusive_group()
rsn.add_argument('-a', '--all',
dest = 'reason',
default = 'all',
action = 'store_const',
const = 'all',
help = ('Parse/report all packages that are currently installed. '
'Conflicts with -u/--user and -d/--dep. '
'This is the default'))
rsn.add_argument('-u', '--user',
dest = 'reason',
default = 'all',
action = 'store_const',
const = 'user',
help = ('Parse/report only packages which were explicitly installed. '
'Conflicts with -a/--all and -d/--dep'))
rsn.add_argument('-d', '--dep',
dest = 'reason',
default = 'all',
action = 'store_const',
const = 'dep',
help = ('Parse/report only packages which were installed to satisfy a dependency. '
'Conflicts with -a/--all and -u/--user'))
return(args) return(args)


def main(): def main():
args = vars(parseArgs().parse_args()) args = vars(parseArgs().parse_args())
if not args['pkgs']: p = PkgIndexer(**args)
prompt_str = ( print(p.report)
'You have not specified any package names.\nThis means we will '
'get file lists for EVERY SINGLE installed package.\nThis is a '
'LOT of output and can take a few moments.\nIf this was a '
'mistake, you can hit ctrl-c now.\nOtherwise, hit the enter key '
'to continue.\n')
sys.stderr.write(prompt_str)
if pyver.major >= 3:
input()
elif pyver.major == 2:
raw_input()
args['pkgs'] = all_pkgs()
gf = FileGetter(**args)
file_rslts = {}
for p in args['pkgs']:
if p not in file_rslts.keys():
file_rslts[p] = gf.getfiles(p)
print(json.dumps(file_rslts, indent = 4))
return() return()


if __name__ == '__main__': if __name__ == '__main__':