diff --git a/TODO b/TODO index 334588a..8e7d1a1 100644 --- a/TODO +++ b/TODO @@ -1,11 +1,11 @@ - write classes/functions - XML-based config -x XML syntax ---- regex btags - case-insensitive? this can be represented in-pattern: - https://stackoverflow.com/a/9655186/733214 +--- xregex btags - case-insensitive? this can be represented in-pattern: + xhttps://stackoverflow.com/a/9655186/733214 -x configuration generator ---- print end result xml config to stderr for easier redirection? or print prompts to stderr and xml to stdout? --- XSD for validation +--- xprint end result xml config to stderr for easier redirection? or print prompts to stderr and xml to stdout? +-- xXSD for validation -- Flask app for generating config? -- TKinter (or pygame?) GUI? --- https://docs.python.org/3/faq/gui.html @@ -16,12 +16,9 @@ - locking - for docs, 3.x (as of 3.10) was 2.4M. -- Need ability to write/parse mtree specs (or a similar equivalent) for applying ownerships/permissions to overlay files +- xNeed ability to write/parse mtree specs (or a similar equivalent) for applying ownerships/permissions to overlay files +-- parsing is done. writing may? come later. -- need to package: - python-hashid (https://psypanda.github.io/hashID/, - https://github.com/psypanda/hashID, - https://pypi.org/project/hashID/) - package for PyPI: # https://packaging.python.org/tutorials/distributing-packages/ @@ -37,7 +34,6 @@ BUGS.SQUARE-R00T.NET bugs/tasks: #14: Use os.path.join() for more consistency/pythonicness #24: Run as regular user? (pychroot? fakeroot?) #34: Build-time support for only building single phase of build -#36: Allow parsing pkg lists with inline comments #39: Fix UEFI #40: ISO overlay (to add e.g. memtest86+ to final ISO) -#43: Support resuming partial tarball downloads (Accept-Ranges: bytes) \ No newline at end of file +#43: Support resuming partial tarball downloads (Accept-Ranges: bytes) diff --git a/bdisk/BIOS.py b/bdisk/BIOS.py index 012fb51..cd314bb 100644 --- a/bdisk/BIOS.py +++ b/bdisk/BIOS.py @@ -1,3 +1,4 @@ import jinja2 import os import shutil + diff --git a/bdisk/GPG.py b/bdisk/GPG.py index 202b62e..72a7f80 100644 --- a/bdisk/GPG.py +++ b/bdisk/GPG.py @@ -3,6 +3,17 @@ import os import psutil import gpg.errors + +# This helps translate the input name from the conf to a string compatible with the gpg module. +_algmaps = {#'cv': 'cv{keysize}', # DISABLED, can't sign (only encrypt). Currently only 25519 + 'ed': 'ed{keysize}', # Currently only 25519 + #'elg': 'elg{}', # DISABLED, can't sign (only encrypt). 1024, 2048, 4096 + 'nist': 'nistp{keysize}', # 256, 384, 521 + 'brainpool.1': 'brainpoolP{keysize}r1', # 256, 384, 512 + 'sec.k1': 'secp{keysize}k1', # Currently only 256 + 'rsa': 'rsa{keysize}', # Variable (1024 <> 4096), but we only support 1024, 2048, 4096 + 'dsa': 'dsa{keysize}'} # Variable (768 <> 3072), but we only support 768, 2048, 3072 + # http://files.au.adversary.org/crypto/GPGMEpythonHOWTOen.html # https://www.gnupg.org/documentation/manuals/gpgme.pdf # Support ECC? https://www.gnupg.org/faq/whats-new-in-2.1.html#ecc @@ -60,7 +71,7 @@ class GPGHandler(object): self._prep_home() else: self._check_home() - self.ctx = self.get_context(home_dir = self.home) + self.ctx = self.GetContext(home_dir = self.home) def _check_home(self, home = None): if not home: @@ -94,11 +105,12 @@ class GPGHandler(object): 'write to') return() - def get_context(self, **kwargs): + def GetContext(self, **kwargs): ctx = gpg.Context(**kwargs) return(ctx) - def kill_stale_agent(self): + def KillStaleAgent(self): + # Is this even necessary since I switched to the native gpg module instead of the gpgme one? _process_list = [] # TODO: optimize; can I search by proc name? for p in psutil.process_iter(): @@ -113,7 +125,13 @@ class GPGHandler(object): # for p in plst: # psutil.Process(p).terminate() - def get_sigs(self, data_in): + def CreateKey(self, params): # TODO: explicit params + # We can't use self.ctx.create_key; it's a little limiting. + # It's a fairly thin wrapper to .op_createkey() (the C GPGME API gpgme_op_createkey) anyways. + + pass + + def GetSigs(self, data_in): key_ids = [] # Currently as of May 13, 2018 there's no way using the GPGME API to do # the equivalent of the CLI's --list-packets. @@ -131,3 +149,7 @@ class GPGHandler(object): l = [i.strip() for i in line.split(':')] key_ids.append(l[0]) return(key_ids) + + def CheckSigs(self, keys, sig_data): + try: + self.ctx.verify(sig_data) diff --git a/bdisk/basedistro/archlinux.py b/bdisk/basedistro/archlinux.py index 93a62d7..c57f006 100644 --- a/bdisk/basedistro/archlinux.py +++ b/bdisk/basedistro/archlinux.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.6 +#!/usr/bin/env python3 # Supported initsys values: # systemd @@ -41,6 +41,7 @@ pkg_mgr_prep = """#!/bin/bash pacman -Syy pacman-key --init pacman-key --populate archlinux +pacman -S --noconfirm --needed base pacman -S --noconfirm --needed base-devel multilib-devel git linux-headers \ mercurial subversion vala xorg-server-devel cd /tmp @@ -62,8 +63,8 @@ rm apacman* # should try to install it. #### AUR SUPPORT #### packager = {'pre_check': False, - 'sys_update': ['/usr/bin/aurman', '-S', '-u'], - 'sync_cmd': ['/usr/bin/aurman', '-S', '-y', '-y'], + 'sys_update': ['/usr/bin/apacman', '-S', '-u'], + 'sync_cmd': ['/usr/bin/apacman', '-S', '-y', '-y'], 'check_cmds': {'versioned': ['/usr/bin/pacman', '-Q', '-s', '{PACKAGE}'], diff --git a/bdisk/bdisk.xsd b/bdisk/bdisk.xsd index 62f8b3f..9485a01 100644 --- a/bdisk/bdisk.xsd +++ b/bdisk/bdisk.xsd @@ -669,8 +669,17 @@ + + + + + + + + + @@ -893,4 +902,4 @@ - \ No newline at end of file + diff --git a/bdisk/confgen.py b/bdisk/confgen.py index c6e8ed0..a321ed4 100755 --- a/bdisk/confgen.py +++ b/bdisk/confgen.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.6 +#!/usr/bin/env python3 # Ironically enough, I think building a GUI for this would be *cleaner*. # Go figure. diff --git a/bdisk/confparse.py b/bdisk/confparse.py index d21df53..6ba6616 100644 --- a/bdisk/confparse.py +++ b/bdisk/confparse.py @@ -40,6 +40,8 @@ class Conf(object): You can provide any combination of these (e.g. "profile={'id': 2, 'name' = 'some_profile'}"). + Non-greedy matching (meaning ALL attributes specified + must match). """ if validate_cfg == 'pre': # Validate before attempting any other operations @@ -57,6 +59,7 @@ class Conf(object): if validate_cfg: # Validation post-substitution self.validate(parsed = False) + # TODO: populate checksum{} with hash_algo if explicit def get_pki_obj(self, pki, pki_type): elem = {} @@ -272,6 +275,9 @@ class Conf(object): self.cfg['profile'][a] = transform.xml2py( self.profile.attrib[a], attrib = True) + # Small bug in transform.xml2py that we unfortunately can't fix, so we manually fix. + if 'id' in self.cfg['profile'] and isinstance(self.cfg['profile']['id'], bool): + self.cfg['profile']['id'] = int(self.cfg['profile']['id']) return() def parse_sources(self): @@ -323,11 +329,12 @@ class Conf(object): xml = etree.fromstring(self.xml_suppl.return_full()) self.xsd.assertValid(xml) if parsed: - # TODO: perform further validations that we can't do in XSD. # We wait until after it's parsed to evaluate because otherwise we # can't use utils.valid(). # We only bother with stuff that would hinder building, though - # e.g. we don't check that profile's UUID is a valid UUID4. + # The XSD can catch a lot of stuff, but it's not so hot with things like URI validation, + # email validation, etc. # URLs for url in (self.cfg['uri'], self.cfg['dev']['website']): if not valid.url(url): @@ -335,25 +342,41 @@ class Conf(object): # Emails for k in self.cfg['gpg']['keys']: if not valid.email(k['email']): - raise ValueError( - 'GPG key {0}: {1} is not a valid email ' - 'address'.format(k['name'], k['email'])) + raise ValueError('GPG key {0}: {1} is not a valid email address'.format(k['name'], k['email'])) if not valid.email(self.cfg['dev']['email']): - raise ValueError('{0} is not a valid email address'.format( - self.cfg['dev']['email'])) + raise ValueError('{0} is not a valid email address'.format(self.cfg['dev']['email'])) if self.cfg['pki']: if 'subject' in self.cfg['pki']['ca']: - if not valid.email( - self.cfg['pki']['ca']['subject']['emailAddress']): - raise ValueError('{0} is not a valid email ' - 'address'.format( - self.cfg['pki']['ca']['subject']['emailAddress'])) - - if not self.cfg['pki'][x]['subject']: + if not valid.email(self.cfg['pki']['ca']['subject']['emailAddress']): + raise ValueError('{0} is not a valid email address'.format( + self.cfg['pki']['ca']['subject']['emailAddress'])) + for cert in self.cfg['pki']['clients']: + if not cert['subject']: continue - if not valid.email( - self.cfg['pki'][x]['subject']['emailAddress']): - raise ValueError('{0} is not a valid email ' - 'address'.format( - self.cfg['pki'][x]['subject']['email'])) + if not valid.email(cert['subject']['emailAddress']): + raise ValueError('{0} is not a valid email address'.format(cert['subject']['email'])) + # Salts/hashes + if self.cfg['root']['salt']: + if not valid.salt_hash(self.cfg['root']['salt']): + raise ValueError('{0} is not a valid salt'.format(self.cfg['root']['salt'])) + if self.cfg['root']['hashed']: + if not valid.salt_hash_full(self.cfg['root']['salt_hash'], self.cfg['root']['hash_algo']): + raise ValueError('{0} is not a valid hash of type {1}'.format(self.cfg['root']['salt_hash'], + self.cfg['root']['hash_algo'])) + for u in self.cfg['users']: + if u['salt']: + if not valid.salt_hash(u['salt']): + raise ValueError('{0} is not a valid salt'.format(u['salt'])) + if u['hashed']: + if not valid.salt_hash_full(u['salt_hash'], u['hash_algo']): + raise ValueError('{0} is not a valid hash of type {1}'.format(u['salt_hash'], u['hash_algo'])) + # GPG Key IDs + if self.cfg['gpg']['keyid']: + if not valid.gpgkeyID(self.cfg['gpg']['keyid']): + raise ValueError('{0} is not a valid GPG Key ID/fingerprint'.format(self.cfg['gpg']['keyid'])) + for s in self.cfg['sources']: + if 'sig' in s: + for k in s['sig']['keys']: + if not valid.gpgkeyID(k): + raise ValueError('{0} is not a valid GPG Key ID/fingerprint'.format(k)) return() diff --git a/bdisk/env_prep.py b/bdisk/env_prep.py index 5dcac35..002db3e 100644 --- a/bdisk/env_prep.py +++ b/bdisk/env_prep.py @@ -1,3 +1,67 @@ -import copy -import importlib +import hashlib +import importlib # needed for the guest-os-specific stuff... import os +from . import utils +from urllib.parse import urljoin + + +def hashsum_downloader(url, filename = None): + # TODO: support "latest" and "regex" flags? or remove from specs (since the tarball can be specified by these)? + # move that to the utils.DOwnload() class? + d = utils.Download(url, progress = False) + hashes = {os.path.basename(k):v for (v, k) in [line.split() for line in d.fetch().decode('utf-8').splitlines()]} + if filename: + if filename in hashes: + return(hashes[filename]) + else: + raise KeyError('Filename {0} not in the list of hashes'.format(filename)) + return(hashes) + + +class Prepper(object): + def __init__(self, dirs, sources, gpg = None): + # dirs is a ConfParse.cfg['build']['paths'] dict of dirs + self.CreateDirs(dirs) + # TODO: set up GPG env here so we can use it to import sig key and verify sources + for idx, s in enumerate(sources): + self._download(idx) + + def CreateDirs(self, dirs): + for d in dirs: + os.makedirs(d, exist_ok = True) + return() + + + def _download(self, source_idx): + download = True + _source = self.cfg['sources'][source_idx] + _dest_dir = os.path.join(self.cfg['build']['paths']['cache'], source_idx) + _tarball = os.path.join(_dest_dir, _source['tarball']['fname']) + _remote_dir = urljoin(_source['mirror'], _source['rootpath']) + _remote_tarball = urljoin(_remote_dir + '/', _source['tarball']['fname']) + def _hash_verify(): # TODO: move to utils.valid()? + # Get a checksum. + if 'checksum' in _source: + if not _source['checksum']['explicit']: + _source['checksum']['value'] = hashsum_downloader(urljoin(_remote_dir + '/', + _source['checksum']['fname'])) + if not _source['checksum']['hash_algo']: + _source['checksum']['hash_algo'] = utils.detect.any_hash(_source['checksum']['value'], + normalize = True)[0] + _hash = hashlib.new(_source['checksum']['hash_algo']) + with open(_tarball, 'rb') as f: + # It's potentially a large file, so we chunk it 64kb at a time. + _hashbuf = f.read(64000) + while len(_hashbuf) > 0: + _hash.update(_hashbuf) + _hashbuf = f.read(64000) + if _hash.hexdigest().lower() != _source['checksum']['value'].lower(): + return(False) + return(True) + def _sig_verify(gpg_instance): # TODO: move to utils.valid()? or just use as part of the bdisk.GPG module? + pass + if os.path.isfile(_tarball): + download = _hash_verify() + download = _sig_verify() + if download: + d = utils.Download(_remote_tarball) diff --git a/bdisk/main.py b/bdisk/main.py index 40af6a6..9a54f67 100644 --- a/bdisk/main.py +++ b/bdisk/main.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.6 +#!/usr/bin/env python3 import argparse import confparse @@ -14,8 +14,10 @@ def parseArgs(): epilog = ('https://git.square-r00t.net')) return(args) -def run(): - pass +def run(cfg): + cfg = confparse.Conf(cfg, validate_cfg = True) + cfg.parse_all() + def run_interactive(): args = vars(parseArgs().parse_args()) diff --git a/bdisk/mtree.py b/bdisk/mtree.py index e63889c..b74b730 100755 --- a/bdisk/mtree.py +++ b/bdisk/mtree.py @@ -3,15 +3,188 @@ import argparse import copy import datetime +import grp +import hashlib import os import pathlib +import platform +import pwd import re +import stat +from collections import OrderedDict +try: + import pycksum + has_cksum = True +except ImportError: + has_cksum = False # Parse BSD mtree spec files. # On arch, BSD mtree is ported in the AUR as nmtree. -# TODO: add a generator class as well? +# TODO: add a generator class as well? (in process) # TODO: add a checking function as well? +# The format used for headers +_header_strptime_fmt = '%a %b %d %H:%M:%S %Y' + +# Supported hash types (for generation). These are globally available always. +_hashtypes = ['md5', 'sha1', 'sha256', 'sha384', 'sha512'] +# If RIPEMD-160 is supported, we add it (after MD5). +if 'ripemd160' in hashlib.algorithms_available: + _hashtypes.insert(1, 'rmd160') + +# Iterative to determine which type an item is. +_stype_map = {'block': stat.S_ISBLK, + 'char': stat.S_ISCHR, + 'dir': stat.S_ISDIR, + 'fifo': stat.S_ISFIFO, + 'file': stat.S_ISREG, + 'link': stat.S_ISLNK, + 'socket': stat.S_ISSOCK} + +# Regex pattern for cleaning up an octal perm mode into a string representation. +_octre = re.compile('^0o') + +class MTreeGen(object): + def __init__(self, path): + self.path = pathlib.PosixPath(os.path.abspath(os.path.expanduser(path))) + # These are used to keep a cached copy of the info. + self._sysinfo = {'uids': {}, 'gids': {}} + self._build_header() + # We use this to keep track of where we are exactly in the tree so we can generate a full absolute path at + # any moment relative to the tree. + self._path_pointer = copy.deepcopy(self.path) + + + def paths_iterator(self): + for root, dirs, files in os.walk(self.path): + for f in files: + _fname = self.path.joinpath(f) + _stats = self._get_stats(_fname) + if not _stats: + print(('WARNING: {0} either disappeared while we were trying to parse it or ' + 'it is a broken symlink.').format(_fname)) + continue + # TODO: get /set line here? + item = ' {0} \\\n'.format(f) + _type = 'file' # TODO: stat this more accurately + _cksum = self._gen_cksum(_fname) + item += ' {0} {1} {2}\\\n'.format(_stats['size'], + _stats['time'], + ('{0} '.format(_cksum) if _cksum else '')) + # TODO: here's where the hashes would get added + # TODO: here's where we parse dirs. maybe do that before files? + # remember: mtree specs use ..'s to traverse upwards when done with a dir + for d in dirs: + _dname = self.path.joinpath(d) + _stats = self._get_stats(_dname) + if not _stats: + print(('WARNING: {0} either disappeared while we were trying to parse it or ' + 'it is a broken symlink.').format(_dname)) + continue + # TODO: get /set line here? + return() + + + def _gen_cksum(self, fpath): + if not has_cksum: + return(None) + if not os.path.isfile(fpath): + return(None) + # TODO: waiting on https://github.com/sobotklp/pycksum/issues/2 for byte iteration (because large files maybe?) + c = pycksum.Cksum() + with open(fpath, 'rb') as f: + c.add(f) + return(c.get_cksum()) + + + def _get_stats(self, path): + stats = {} + try: + _st = os.stat(path, follow_symlinks = False) + except FileNotFoundError: + # Broken symlink? Shouldn't occur since follow_symlinks is False anyways, BUT... + return(None) + # Ownership + stats['uid'] = _st.st_uid + stats['gid'] = _st.st_gid + if _st.st_uid in self._sysinfo['uids']: + stats['uname'] = self._sysinfo['uids'][_st.st_uid] + else: + _pw = pwd.getpwuid(_st.st_uid).pw_name + stats['uname'] = _pw + self._sysinfo['uids'][_st.stuid] = _pw + if _st.st_gid in self._sysinfo['gids']: + stats['gname'] = self._sysinfo['gids'][_st.st_gid] + else: + _grp = grp.getgrgid(_st.st_gid).gr_name + stats['gname'] = _grp + self._sysinfo['gids'][_st.stgid] = _grp + # Type and Mode + for t in _stype_map: + if _stype_map[t](_st.st_mode): + stats['type'] = t + # TODO: need a reliable way of parsing this. + # for instance, for /dev/autofs, _st.st_dev = 6 (os.makedev(6) confirms major is 0, minor is 6) + # but netBSD mtree (ported) says it's "0xaeb" (2795? or, as str, "®b" apparently). + # I'm guessing the kernel determines this, but where is it pulling it from/how? + # We can probably do 'format,major,minor' (or, for above, 'linux,0,6'). + # if t in ('block', 'char'): + # stats['device'] = None + # Handle symlinks. + if t == 'link': + _target = path + while os.path.islink(_target): + _target = os.path.realpath(_target) + stats['link'] = _target + break + stats['mode'] = '{0:0>4}'.format(_octre.sub('', str(oct(stat.S_IMODE(_st.st_mode))))) + stats['size'] = _st.st_size + stats['time'] = str(float(_st.st_mtime)) + stats['nlink'] = _st.st_nlink + # TODO: "flags" keyword? is that meaningful on linux? + stats['flags'] = 'none' + return(stats) + + + + def _gen_hashes(self, fpath): + hashes = OrderedDict({}) + if not os.path.isfile(fpath): + return(hashes) + _hashnums = len(_hashtypes) + for idx, h in enumerate(_hashtypes): + # Stupid naming inconsistencies. + _hashname = (h if h is not 'rmd160' else 'ripemd160') + _hasher = hashlib.new(_hashname) + with open(fpath, 'rb') as f: + # Hash 64kb at a time in case it's a huge file. TODO: is this the most ideal chunk size? + _hashbuf = f.read(64000) + while len(_hashbuf) > 0: + _hasher.update(_hashbuf) + _hashbuf = f.read(64000) + hashes[h] = _hasher.hexdigest() + return(hashes) + # if idx + 1 < _hashnums: + # hashes += ' {0}={1} \\\n'.format(h, _hasher.hexdigest()) + # else: + # hashes += ' {0}={1}\n'.format(h, _hasher.hexdigest()) + # return(hashes) + + + def _build_header(self): + self.spec = '' + _header = OrderedDict({}) + _header['user'] = pwd.getpwuid(os.geteuid()).pw_name + _header['machine'] = platform.node() + _header['tree'] = str(self.path) + _header['date'] = datetime.datetime.utcnow().strftime(_header_strptime_fmt) + for h in _header: + self.spec += '#\t{0:>7}: {1}\n'.format(h, _header[h]) + self.spec += '\n' + return() + + + class MTreeParse(object): def __init__(self, spec): if not isinstance(spec, (str, bytes)): @@ -21,7 +194,6 @@ class MTreeParse(object): spec = spec.decode('utf-8') except UnicodeDecodeError: raise ValueError('spec must be a utf-8 encoded set of bytes if using byte mode') - self._strptime_fmt = '%a %b %d %H:%M:%S %Y' self.orig_spec = copy.deepcopy(spec) # For referencing in case someone wanted to write it out. # We NOW need to handle the escaped linebreaking it does. self._specdata = re.sub('\\\\\s+', '', spec).splitlines() @@ -82,7 +254,7 @@ class MTreeParse(object): # They are restored by an "/unset". Since they're global and stateful, they're handled as a class attribute. self.settings = copy.deepcopy(self._tplitem) self._parse_items() - del(self.settings, self._tplitem, self._strptime_fmt) + del(self.settings, self._tplitem) def _get_header(self): @@ -96,7 +268,7 @@ class MTreeParse(object): header = l[0] val = (l[1] if l[1] is not '(null)' else None) if header == 'date': - val = datetime.datetime.strptime(val, self._strptime_fmt) + val = datetime.datetime.strptime(val, _header_strptime_fmt) elif header == 'tree': val = pathlib.PosixPath(val) self.header[header] = val @@ -158,6 +330,8 @@ class MTreeParse(object): return(out) def _unset_parse(unsetline): out = {} + if unsetline[1] == 'all': + return(copy.deepcopy(self._tplitem)) for i in unsetline: out[i] = self._tplitem[i] return(out) diff --git a/bdisk/prompt_strings.py b/bdisk/prompt_strings.py index 36d38cb..3a3d10a 100644 --- a/bdisk/prompt_strings.py +++ b/bdisk/prompt_strings.py @@ -11,18 +11,31 @@ class PromptStrings(object): 'attribs': { 'algo': { 'text': 'the subkey\'s encryption type/algorithm', - 'choices': ['rsa', 'dsa'], - 'default': 'rsa' + # The following can ONLY be used for encryption, not signing: elg, cv + #'choices': ['rsa', 'dsa', 'elg', 'ed', 'cv', 'nistp', 'brainpool.1', 'secp.k1'], + 'choices': ['rsa', 'dsa', 'ed', 'nist', 'brainpool.1', 'sec.k1'], + #'default': 'rsa' + 'default': 'ed' }, 'keysize': { 'text': 'the subkey\'s key size (in bits)', 'choices': { 'rsa': ['1024', '2048', '4096'], - 'dsa': ['768', '2048', '3072'] + 'dsa': ['768', '2048', '3072'], + #'elg': ['1024', '2048', '4096'], # Invalid for signing, etc. + 'ed': ['25519'], + #'cv': ['25519'], + 'nistp': ['256', '384', '521'], + 'brainpool.1': ['256', '384', '512'], + 'sec.k1': ['256'] }, 'default': { 'rsa': '4096', - 'dsa': '3072' + 'dsa': '3072', + 'ed': '25519', + 'nistp': '521', + 'brainpool.1': '512', + 'sec.k1': '256' } } }, @@ -113,4 +126,4 @@ class PromptStrings(object): 'Email: ') } } - } \ No newline at end of file + } diff --git a/bdisk/utils.py b/bdisk/utils.py index 416516e..fd82060 100644 --- a/bdisk/utils.py +++ b/bdisk/utils.py @@ -1,3 +1,5 @@ +# Yes, this is messy. They doesn't belong anywhere else, leave me alone. + import _io import copy import crypt @@ -14,6 +16,7 @@ import string import uuid import validators import zlib +import requests import lxml.etree import lxml.objectify from bs4 import BeautifulSoup @@ -30,7 +33,7 @@ passlib_schemes = ['des_crypt', 'md5_crypt', 'sha256_crypt', 'sha512_crypt'] # Build various hash digest name lists digest_schemes = list(hashlib.algorithms_available) # Provided by zlib -# TODO +# TODO? digest_schemes.append('adler32') digest_schemes.append('crc32') @@ -39,6 +42,54 @@ crypt_map = {'sha512': crypt.METHOD_SHA512, 'md5': crypt.METHOD_MD5, 'des': crypt.METHOD_CRYPT} + +class Download(object): + def __init__(self, url, progress = True, offset = None, chunksize = 1024): + self.cnt_len = None + self.head = requests.head(url, allow_redirects = True).headers + self.req_headers = {} + self.range = False + self.url = url + self.offset = offset + self.chunksize = chunksize + self.progress = progress + if 'accept-ranges' in self.head: + if self.head['accept-ranmges'].lower() != 'none': + self.range = True + if 'content-length' in self.head: + try: + self.cnt_len = int(self.head['content-length']) + except TypeError: + pass + if self.cnt_len and self.offset and self.range: + if not self.offset <= self.cnt_len: + raise ValueError(('The offset requested ({0}) is greater than ' + 'the content-length value').format(self.offset, self.cnt_len)) + self.req_headers['range'] = 'bytes={0}-'.format(self.offset) + + def fetch(self): + if not self.progress: + self.req = requests.get(self.url, allow_redirects = True, headers = self.req_headers) + self.bytes_obj = self.req.content + else: + self.req = requests.get(self.url, allow_redirects = True, stream = True, headers = self.req_headers) + self.bytes_obj = bytes() + _bytelen = 0 + # TODO: better handling for logging instead of print()s? + for chunk in self.req.iter_content(chunk_size = self.chunksize): + self.bytes_obj += chunk + if self.cnt_len: + print('\033[F') + print('{0:.2f}'.format((_bytelen / float(self.head['content-length'])) * 100), + end = '%', + flush = True) + _bytelen += self.chunksize + else: + print('.', end = '') + print() + return(self.bytes_obj) + + class XPathFmt(string.Formatter): def get_field(self, field_name, args, kwargs): vals = self.get_value(field_name, args, kwargs), field_name @@ -50,18 +101,19 @@ class detect(object): def __init__(self): pass - def any_hash(self, hash_str): + def any_hash(self, hash_str, normalize = False): h = hashid.HashID() hashes = [] for i in h.identifyHash(hash_str): if i.extended: continue x = i.name - if x.lower() in ('crc-32', 'ripemd-160', 'sha-1', 'sha-224', - 'sha-256', 'sha-384', 'sha-512'): + if x.lower() in ('crc-32', 'ripemd-160', 'sha-1', 'sha-224', 'sha-256', 'sha-384', 'sha-512'): # Gorram you, c0re. x = re.sub('-', '', x.lower()) - _hashes = [h.lower() for h in digest_schemes] + _hashes = [h.lower() for h in digest_schemes] # TODO: move this outside so we don't define it every invoke + if normalize: + x = re.sub('(-|crypt|\s+)', '', x.lower()) if x.lower() in sorted(list(set(_hashes))): hashes.append(x) return(hashes) @@ -83,8 +135,7 @@ class detect(object): return(salt) def remote_files(self, url_base, ptrn = None, flags = []): - with urlopen(url_base) as u: - soup = BeautifulSoup(u.read(), 'lxml') + soup = BeautifulSoup(Download(url_base, progress = False).bytes_obj, 'lxml') urls = [] if 'regex' in flags: if not isinstance(ptrn, str): @@ -113,8 +164,7 @@ class detect(object): return(urls) def gpgkeyID_from_url(self, url): - with urlopen(url) as u: - data = u.read() + data = Download(url, progress = False).bytes_obj g = GPG.GPGHandler() key_ids = g.get_sigs(data) del(g) @@ -166,7 +216,7 @@ class detect(object): # Get any easy ones out of the way first. if name in digest_schemes: return(name) - # Otherwise grab the first one that matches, in order from the . + # Otherwise grab the first one that matches _digest_re = re.compile('^{0}$'.format(name.strip()), re.IGNORECASE) for h in digest_schemes: if _digest_re.search(h): @@ -774,14 +824,19 @@ class valid(object): return(True) def salt_hash(self, salthash): - _idents = ''.join([i.ident for i in crypt_map if i.ident]) + _idents = ''.join([i.ident for i in crypt_map.values() if i.ident]) # noinspection PyStringFormat - _regex = re.compile('^(\$[{0}]\$)?[./0-9A-Za-z]{{0,16}}\$?'.format( - _idents)) + _regex = re.compile('^(\$[{0}]\$)?[./0-9A-Za-z]{{0,16}}\$?'.format(_idents)) if not _regex.search(salthash): return(False) return(True) + def salt_hash_full(self, salthash, hash_type): + h = [re.sub('-', '', i.lower()).split()[0] for i in detect.any_hash(self, salthash, normalize = True)] + if hash_type.lower() not in h: + return(False) + return(True) + def plugin_name(self, name): if len(name) == 0: return(False) @@ -1068,4 +1123,4 @@ class xml_supplicant(object): for i in selectors.items(): if i[1] and i[0] in self.selector_ids: xpath += '[@{0}="{1}"]'.format(*i) - return(xpath) \ No newline at end of file + return(xpath)