checking in a ton of population

This commit is contained in:
brent s. 2018-09-23 07:55:58 -04:00
parent c48c752f84
commit 0dd54a604d
12 changed files with 426 additions and 66 deletions

18
TODO
View File

@ -1,11 +1,11 @@
- write classes/functions
- XML-based config
-x XML syntax
--- regex btags - case-insensitive? this can be represented in-pattern:
https://stackoverflow.com/a/9655186/733214
--- xregex btags - case-insensitive? this can be represented in-pattern:
xhttps://stackoverflow.com/a/9655186/733214
-x configuration generator
--- print end result xml config to stderr for easier redirection? or print prompts to stderr and xml to stdout?
-- XSD for validation
--- xprint end result xml config to stderr for easier redirection? or print prompts to stderr and xml to stdout?
-- xXSD for validation
-- Flask app for generating config?
-- TKinter (or pygame?) GUI?
--- https://docs.python.org/3/faq/gui.html
@ -16,12 +16,9 @@

- locking
- for docs, 3.x (as of 3.10) was 2.4M.
- Need ability to write/parse mtree specs (or a similar equivalent) for applying ownerships/permissions to overlay files
- xNeed ability to write/parse mtree specs (or a similar equivalent) for applying ownerships/permissions to overlay files
-- parsing is done. writing may? come later.

- need to package:
python-hashid (https://psypanda.github.io/hashID/,
https://github.com/psypanda/hashID,
https://pypi.org/project/hashID/)

- package for PyPI:
# https://packaging.python.org/tutorials/distributing-packages/
@ -37,7 +34,6 @@ BUGS.SQUARE-R00T.NET bugs/tasks:
#14: Use os.path.join() for more consistency/pythonicness
#24: Run as regular user? (pychroot? fakeroot?)
#34: Build-time support for only building single phase of build
#36: Allow parsing pkg lists with inline comments
#39: Fix UEFI
#40: ISO overlay (to add e.g. memtest86+ to final ISO)
#43: Support resuming partial tarball downloads (Accept-Ranges: bytes)
#43: Support resuming partial tarball downloads (Accept-Ranges: bytes)

View File

@ -1,3 +1,4 @@
import jinja2
import os
import shutil


View File

@ -3,6 +3,17 @@ import os
import psutil
import gpg.errors


# This helps translate the input name from the conf to a string compatible with the gpg module.
_algmaps = {#'cv': 'cv{keysize}', # DISABLED, can't sign (only encrypt). Currently only 25519
'ed': 'ed{keysize}', # Currently only 25519
#'elg': 'elg{}', # DISABLED, can't sign (only encrypt). 1024, 2048, 4096
'nist': 'nistp{keysize}', # 256, 384, 521
'brainpool.1': 'brainpoolP{keysize}r1', # 256, 384, 512
'sec.k1': 'secp{keysize}k1', # Currently only 256
'rsa': 'rsa{keysize}', # Variable (1024 <> 4096), but we only support 1024, 2048, 4096
'dsa': 'dsa{keysize}'} # Variable (768 <> 3072), but we only support 768, 2048, 3072

# http://files.au.adversary.org/crypto/GPGMEpythonHOWTOen.html
# https://www.gnupg.org/documentation/manuals/gpgme.pdf
# Support ECC? https://www.gnupg.org/faq/whats-new-in-2.1.html#ecc
@ -60,7 +71,7 @@ class GPGHandler(object):
self._prep_home()
else:
self._check_home()
self.ctx = self.get_context(home_dir = self.home)
self.ctx = self.GetContext(home_dir = self.home)

def _check_home(self, home = None):
if not home:
@ -94,11 +105,12 @@ class GPGHandler(object):
'write to')
return()

def get_context(self, **kwargs):
def GetContext(self, **kwargs):
ctx = gpg.Context(**kwargs)
return(ctx)

def kill_stale_agent(self):
def KillStaleAgent(self):
# Is this even necessary since I switched to the native gpg module instead of the gpgme one?
_process_list = []
# TODO: optimize; can I search by proc name?
for p in psutil.process_iter():
@ -113,7 +125,13 @@ class GPGHandler(object):
# for p in plst:
# psutil.Process(p).terminate()

def get_sigs(self, data_in):
def CreateKey(self, params): # TODO: explicit params
# We can't use self.ctx.create_key; it's a little limiting.
# It's a fairly thin wrapper to .op_createkey() (the C GPGME API gpgme_op_createkey) anyways.

pass

def GetSigs(self, data_in):
key_ids = []
# Currently as of May 13, 2018 there's no way using the GPGME API to do
# the equivalent of the CLI's --list-packets.
@ -131,3 +149,7 @@ class GPGHandler(object):
l = [i.strip() for i in line.split(':')]
key_ids.append(l[0])
return(key_ids)

def CheckSigs(self, keys, sig_data):
try:
self.ctx.verify(sig_data)

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python3.6
#!/usr/bin/env python3

# Supported initsys values:
# systemd
@ -41,6 +41,7 @@ pkg_mgr_prep = """#!/bin/bash
pacman -Syy
pacman-key --init
pacman-key --populate archlinux
pacman -S --noconfirm --needed base
pacman -S --noconfirm --needed base-devel multilib-devel git linux-headers \
mercurial subversion vala xorg-server-devel
cd /tmp
@ -62,8 +63,8 @@ rm apacman*
# should try to install it.
#### AUR SUPPORT ####
packager = {'pre_check': False,
'sys_update': ['/usr/bin/aurman', '-S', '-u'],
'sync_cmd': ['/usr/bin/aurman', '-S', '-y', '-y'],
'sys_update': ['/usr/bin/apacman', '-S', '-u'],
'sync_cmd': ['/usr/bin/apacman', '-S', '-y', '-y'],
'check_cmds': {'versioned': ['/usr/bin/pacman',
'-Q', '-s',
'{PACKAGE}'],

View File

@ -669,8 +669,17 @@
<xs:attribute name="algo" use="optional">
<xs:simpleType>
<xs:restriction base="xs:string">
<!-- rsa, dsa, and elgamal are "normal". Newer GnuPG supports ECC (yay!), so we have support for those in the XSD (you can get a list with gpg -with-colons -list-config curve | cut -f3 -d":" | tr ';' '\n'). -->
<!-- We test in-code if the host supports it. -->
<xs:enumeration value="rsa"/>
<xs:enumeration value="dsa"/>
<!-- The following only support encryption. The entire reason we'd be generating a key is to sign files, so we disable them. -->
<!-- <xs:enumeration value="elg"/> -->
<!-- <xs:enumeration value="cv"/> -->
<xs:enumeration value="ed"/>
<xs:enumeration value="nist"/>
<xs:enumeration value="brainpool.1"/>
<xs:enumeration value="sec.k1"/>
</xs:restriction>
</xs:simpleType>
</xs:attribute>
@ -893,4 +902,4 @@
</xs:complexType>
</xs:element>
<!-- END BDISK -->
</xs:schema>
</xs:schema>

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python3.6
#!/usr/bin/env python3

# Ironically enough, I think building a GUI for this would be *cleaner*.
# Go figure.

View File

@ -40,6 +40,8 @@ class Conf(object):

You can provide any combination of these
(e.g. "profile={'id': 2, 'name' = 'some_profile'}").
Non-greedy matching (meaning ALL attributes specified
must match).
"""
if validate_cfg == 'pre':
# Validate before attempting any other operations
@ -57,6 +59,7 @@ class Conf(object):
if validate_cfg:
# Validation post-substitution
self.validate(parsed = False)
# TODO: populate checksum{} with hash_algo if explicit

def get_pki_obj(self, pki, pki_type):
elem = {}
@ -272,6 +275,9 @@ class Conf(object):
self.cfg['profile'][a] = transform.xml2py(
self.profile.attrib[a],
attrib = True)
# Small bug in transform.xml2py that we unfortunately can't fix, so we manually fix.
if 'id' in self.cfg['profile'] and isinstance(self.cfg['profile']['id'], bool):
self.cfg['profile']['id'] = int(self.cfg['profile']['id'])
return()

def parse_sources(self):
@ -323,11 +329,12 @@ class Conf(object):
xml = etree.fromstring(self.xml_suppl.return_full())
self.xsd.assertValid(xml)
if parsed:
# TODO: perform further validations that we can't do in XSD.
# We wait until after it's parsed to evaluate because otherwise we
# can't use utils.valid().
# We only bother with stuff that would hinder building, though -
# e.g. we don't check that profile's UUID is a valid UUID4.
# The XSD can catch a lot of stuff, but it's not so hot with things like URI validation,
# email validation, etc.
# URLs
for url in (self.cfg['uri'], self.cfg['dev']['website']):
if not valid.url(url):
@ -335,25 +342,41 @@ class Conf(object):
# Emails
for k in self.cfg['gpg']['keys']:
if not valid.email(k['email']):
raise ValueError(
'GPG key {0}: {1} is not a valid email '
'address'.format(k['name'], k['email']))
raise ValueError('GPG key {0}: {1} is not a valid email address'.format(k['name'], k['email']))
if not valid.email(self.cfg['dev']['email']):
raise ValueError('{0} is not a valid email address'.format(
self.cfg['dev']['email']))
raise ValueError('{0} is not a valid email address'.format(self.cfg['dev']['email']))
if self.cfg['pki']:
if 'subject' in self.cfg['pki']['ca']:
if not valid.email(
self.cfg['pki']['ca']['subject']['emailAddress']):
raise ValueError('{0} is not a valid email '
'address'.format(
self.cfg['pki']['ca']['subject']['emailAddress']))

if not self.cfg['pki'][x]['subject']:
if not valid.email(self.cfg['pki']['ca']['subject']['emailAddress']):
raise ValueError('{0} is not a valid email address'.format(
self.cfg['pki']['ca']['subject']['emailAddress']))
for cert in self.cfg['pki']['clients']:
if not cert['subject']:
continue
if not valid.email(
self.cfg['pki'][x]['subject']['emailAddress']):
raise ValueError('{0} is not a valid email '
'address'.format(
self.cfg['pki'][x]['subject']['email']))
if not valid.email(cert['subject']['emailAddress']):
raise ValueError('{0} is not a valid email address'.format(cert['subject']['email']))
# Salts/hashes
if self.cfg['root']['salt']:
if not valid.salt_hash(self.cfg['root']['salt']):
raise ValueError('{0} is not a valid salt'.format(self.cfg['root']['salt']))
if self.cfg['root']['hashed']:
if not valid.salt_hash_full(self.cfg['root']['salt_hash'], self.cfg['root']['hash_algo']):
raise ValueError('{0} is not a valid hash of type {1}'.format(self.cfg['root']['salt_hash'],
self.cfg['root']['hash_algo']))
for u in self.cfg['users']:
if u['salt']:
if not valid.salt_hash(u['salt']):
raise ValueError('{0} is not a valid salt'.format(u['salt']))
if u['hashed']:
if not valid.salt_hash_full(u['salt_hash'], u['hash_algo']):
raise ValueError('{0} is not a valid hash of type {1}'.format(u['salt_hash'], u['hash_algo']))
# GPG Key IDs
if self.cfg['gpg']['keyid']:
if not valid.gpgkeyID(self.cfg['gpg']['keyid']):
raise ValueError('{0} is not a valid GPG Key ID/fingerprint'.format(self.cfg['gpg']['keyid']))
for s in self.cfg['sources']:
if 'sig' in s:
for k in s['sig']['keys']:
if not valid.gpgkeyID(k):
raise ValueError('{0} is not a valid GPG Key ID/fingerprint'.format(k))
return()

View File

@ -1,3 +1,67 @@
import copy
import importlib
import hashlib
import importlib # needed for the guest-os-specific stuff...
import os
from . import utils
from urllib.parse import urljoin


def hashsum_downloader(url, filename = None):
# TODO: support "latest" and "regex" flags? or remove from specs (since the tarball can be specified by these)?
# move that to the utils.DOwnload() class?
d = utils.Download(url, progress = False)
hashes = {os.path.basename(k):v for (v, k) in [line.split() for line in d.fetch().decode('utf-8').splitlines()]}
if filename:
if filename in hashes:
return(hashes[filename])
else:
raise KeyError('Filename {0} not in the list of hashes'.format(filename))
return(hashes)


class Prepper(object):
def __init__(self, dirs, sources, gpg = None):
# dirs is a ConfParse.cfg['build']['paths'] dict of dirs
self.CreateDirs(dirs)
# TODO: set up GPG env here so we can use it to import sig key and verify sources
for idx, s in enumerate(sources):
self._download(idx)

def CreateDirs(self, dirs):
for d in dirs:
os.makedirs(d, exist_ok = True)
return()


def _download(self, source_idx):
download = True
_source = self.cfg['sources'][source_idx]
_dest_dir = os.path.join(self.cfg['build']['paths']['cache'], source_idx)
_tarball = os.path.join(_dest_dir, _source['tarball']['fname'])
_remote_dir = urljoin(_source['mirror'], _source['rootpath'])
_remote_tarball = urljoin(_remote_dir + '/', _source['tarball']['fname'])
def _hash_verify(): # TODO: move to utils.valid()?
# Get a checksum.
if 'checksum' in _source:
if not _source['checksum']['explicit']:
_source['checksum']['value'] = hashsum_downloader(urljoin(_remote_dir + '/',
_source['checksum']['fname']))
if not _source['checksum']['hash_algo']:
_source['checksum']['hash_algo'] = utils.detect.any_hash(_source['checksum']['value'],
normalize = True)[0]
_hash = hashlib.new(_source['checksum']['hash_algo'])
with open(_tarball, 'rb') as f:
# It's potentially a large file, so we chunk it 64kb at a time.
_hashbuf = f.read(64000)
while len(_hashbuf) > 0:
_hash.update(_hashbuf)
_hashbuf = f.read(64000)
if _hash.hexdigest().lower() != _source['checksum']['value'].lower():
return(False)
return(True)
def _sig_verify(gpg_instance): # TODO: move to utils.valid()? or just use as part of the bdisk.GPG module?
pass
if os.path.isfile(_tarball):
download = _hash_verify()
download = _sig_verify()
if download:
d = utils.Download(_remote_tarball)

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python3.6
#!/usr/bin/env python3

import argparse
import confparse
@ -14,8 +14,10 @@ def parseArgs():
epilog = ('https://git.square-r00t.net'))
return(args)

def run():
pass
def run(cfg):
cfg = confparse.Conf(cfg, validate_cfg = True)
cfg.parse_all()


def run_interactive():
args = vars(parseArgs().parse_args())

View File

@ -3,15 +3,188 @@
import argparse
import copy
import datetime
import grp
import hashlib
import os
import pathlib
import platform
import pwd
import re
import stat
from collections import OrderedDict
try:
import pycksum
has_cksum = True
except ImportError:
has_cksum = False

# Parse BSD mtree spec files.
# On arch, BSD mtree is ported in the AUR as nmtree.
# TODO: add a generator class as well?
# TODO: add a generator class as well? (in process)
# TODO: add a checking function as well?

# The format used for headers
_header_strptime_fmt = '%a %b %d %H:%M:%S %Y'

# Supported hash types (for generation). These are globally available always.
_hashtypes = ['md5', 'sha1', 'sha256', 'sha384', 'sha512']
# If RIPEMD-160 is supported, we add it (after MD5).
if 'ripemd160' in hashlib.algorithms_available:
_hashtypes.insert(1, 'rmd160')

# Iterative to determine which type an item is.
_stype_map = {'block': stat.S_ISBLK,
'char': stat.S_ISCHR,
'dir': stat.S_ISDIR,
'fifo': stat.S_ISFIFO,
'file': stat.S_ISREG,
'link': stat.S_ISLNK,
'socket': stat.S_ISSOCK}

# Regex pattern for cleaning up an octal perm mode into a string representation.
_octre = re.compile('^0o')

class MTreeGen(object):
def __init__(self, path):
self.path = pathlib.PosixPath(os.path.abspath(os.path.expanduser(path)))
# These are used to keep a cached copy of the info.
self._sysinfo = {'uids': {}, 'gids': {}}
self._build_header()
# We use this to keep track of where we are exactly in the tree so we can generate a full absolute path at
# any moment relative to the tree.
self._path_pointer = copy.deepcopy(self.path)


def paths_iterator(self):
for root, dirs, files in os.walk(self.path):
for f in files:
_fname = self.path.joinpath(f)
_stats = self._get_stats(_fname)
if not _stats:
print(('WARNING: {0} either disappeared while we were trying to parse it or '
'it is a broken symlink.').format(_fname))
continue
# TODO: get /set line here?
item = ' {0} \\\n'.format(f)
_type = 'file' # TODO: stat this more accurately
_cksum = self._gen_cksum(_fname)
item += ' {0} {1} {2}\\\n'.format(_stats['size'],
_stats['time'],
('{0} '.format(_cksum) if _cksum else ''))
# TODO: here's where the hashes would get added
# TODO: here's where we parse dirs. maybe do that before files?
# remember: mtree specs use ..'s to traverse upwards when done with a dir
for d in dirs:
_dname = self.path.joinpath(d)
_stats = self._get_stats(_dname)
if not _stats:
print(('WARNING: {0} either disappeared while we were trying to parse it or '
'it is a broken symlink.').format(_dname))
continue
# TODO: get /set line here?
return()


def _gen_cksum(self, fpath):
if not has_cksum:
return(None)
if not os.path.isfile(fpath):
return(None)
# TODO: waiting on https://github.com/sobotklp/pycksum/issues/2 for byte iteration (because large files maybe?)
c = pycksum.Cksum()
with open(fpath, 'rb') as f:
c.add(f)
return(c.get_cksum())


def _get_stats(self, path):
stats = {}
try:
_st = os.stat(path, follow_symlinks = False)
except FileNotFoundError:
# Broken symlink? Shouldn't occur since follow_symlinks is False anyways, BUT...
return(None)
# Ownership
stats['uid'] = _st.st_uid
stats['gid'] = _st.st_gid
if _st.st_uid in self._sysinfo['uids']:
stats['uname'] = self._sysinfo['uids'][_st.st_uid]
else:
_pw = pwd.getpwuid(_st.st_uid).pw_name
stats['uname'] = _pw
self._sysinfo['uids'][_st.stuid] = _pw
if _st.st_gid in self._sysinfo['gids']:
stats['gname'] = self._sysinfo['gids'][_st.st_gid]
else:
_grp = grp.getgrgid(_st.st_gid).gr_name
stats['gname'] = _grp
self._sysinfo['gids'][_st.stgid] = _grp
# Type and Mode
for t in _stype_map:
if _stype_map[t](_st.st_mode):
stats['type'] = t
# TODO: need a reliable way of parsing this.
# for instance, for /dev/autofs, _st.st_dev = 6 (os.makedev(6) confirms major is 0, minor is 6)
# but netBSD mtree (ported) says it's "0xaeb" (2795? or, as str, "®b" apparently).
# I'm guessing the kernel determines this, but where is it pulling it from/how?
# We can probably do 'format,major,minor' (or, for above, 'linux,0,6').
# if t in ('block', 'char'):
# stats['device'] = None
# Handle symlinks.
if t == 'link':
_target = path
while os.path.islink(_target):
_target = os.path.realpath(_target)
stats['link'] = _target
break
stats['mode'] = '{0:0>4}'.format(_octre.sub('', str(oct(stat.S_IMODE(_st.st_mode)))))
stats['size'] = _st.st_size
stats['time'] = str(float(_st.st_mtime))
stats['nlink'] = _st.st_nlink
# TODO: "flags" keyword? is that meaningful on linux?
stats['flags'] = 'none'
return(stats)



def _gen_hashes(self, fpath):
hashes = OrderedDict({})
if not os.path.isfile(fpath):
return(hashes)
_hashnums = len(_hashtypes)
for idx, h in enumerate(_hashtypes):
# Stupid naming inconsistencies.
_hashname = (h if h is not 'rmd160' else 'ripemd160')
_hasher = hashlib.new(_hashname)
with open(fpath, 'rb') as f:
# Hash 64kb at a time in case it's a huge file. TODO: is this the most ideal chunk size?
_hashbuf = f.read(64000)
while len(_hashbuf) > 0:
_hasher.update(_hashbuf)
_hashbuf = f.read(64000)
hashes[h] = _hasher.hexdigest()
return(hashes)
# if idx + 1 < _hashnums:
# hashes += ' {0}={1} \\\n'.format(h, _hasher.hexdigest())
# else:
# hashes += ' {0}={1}\n'.format(h, _hasher.hexdigest())
# return(hashes)


def _build_header(self):
self.spec = ''
_header = OrderedDict({})
_header['user'] = pwd.getpwuid(os.geteuid()).pw_name
_header['machine'] = platform.node()
_header['tree'] = str(self.path)
_header['date'] = datetime.datetime.utcnow().strftime(_header_strptime_fmt)
for h in _header:
self.spec += '#\t{0:>7}: {1}\n'.format(h, _header[h])
self.spec += '\n'
return()



class MTreeParse(object):
def __init__(self, spec):
if not isinstance(spec, (str, bytes)):
@ -21,7 +194,6 @@ class MTreeParse(object):
spec = spec.decode('utf-8')
except UnicodeDecodeError:
raise ValueError('spec must be a utf-8 encoded set of bytes if using byte mode')
self._strptime_fmt = '%a %b %d %H:%M:%S %Y'
self.orig_spec = copy.deepcopy(spec) # For referencing in case someone wanted to write it out.
# We NOW need to handle the escaped linebreaking it does.
self._specdata = re.sub('\\\\\s+', '', spec).splitlines()
@ -82,7 +254,7 @@ class MTreeParse(object):
# They are restored by an "/unset". Since they're global and stateful, they're handled as a class attribute.
self.settings = copy.deepcopy(self._tplitem)
self._parse_items()
del(self.settings, self._tplitem, self._strptime_fmt)
del(self.settings, self._tplitem)


def _get_header(self):
@ -96,7 +268,7 @@ class MTreeParse(object):
header = l[0]
val = (l[1] if l[1] is not '(null)' else None)
if header == 'date':
val = datetime.datetime.strptime(val, self._strptime_fmt)
val = datetime.datetime.strptime(val, _header_strptime_fmt)
elif header == 'tree':
val = pathlib.PosixPath(val)
self.header[header] = val
@ -158,6 +330,8 @@ class MTreeParse(object):
return(out)
def _unset_parse(unsetline):
out = {}
if unsetline[1] == 'all':
return(copy.deepcopy(self._tplitem))
for i in unsetline:
out[i] = self._tplitem[i]
return(out)

View File

@ -11,18 +11,31 @@ class PromptStrings(object):
'attribs': {
'algo': {
'text': 'the subkey\'s encryption type/algorithm',
'choices': ['rsa', 'dsa'],
'default': 'rsa'
# The following can ONLY be used for encryption, not signing: elg, cv
#'choices': ['rsa', 'dsa', 'elg', 'ed', 'cv', 'nistp', 'brainpool.1', 'secp.k1'],
'choices': ['rsa', 'dsa', 'ed', 'nist', 'brainpool.1', 'sec.k1'],
#'default': 'rsa'
'default': 'ed'
},
'keysize': {
'text': 'the subkey\'s key size (in bits)',
'choices': {
'rsa': ['1024', '2048', '4096'],
'dsa': ['768', '2048', '3072']
'dsa': ['768', '2048', '3072'],
#'elg': ['1024', '2048', '4096'], # Invalid for signing, etc.
'ed': ['25519'],
#'cv': ['25519'],
'nistp': ['256', '384', '521'],
'brainpool.1': ['256', '384', '512'],
'sec.k1': ['256']
},
'default': {
'rsa': '4096',
'dsa': '3072'
'dsa': '3072',
'ed': '25519',
'nistp': '521',
'brainpool.1': '512',
'sec.k1': '256'
}
}
},
@ -113,4 +126,4 @@ class PromptStrings(object):
'Email: ')
}
}
}
}

View File

@ -1,3 +1,5 @@
# Yes, this is messy. They doesn't belong anywhere else, leave me alone.

import _io
import copy
import crypt
@ -14,6 +16,7 @@ import string
import uuid
import validators
import zlib
import requests
import lxml.etree
import lxml.objectify
from bs4 import BeautifulSoup
@ -30,7 +33,7 @@ passlib_schemes = ['des_crypt', 'md5_crypt', 'sha256_crypt', 'sha512_crypt']
# Build various hash digest name lists
digest_schemes = list(hashlib.algorithms_available)
# Provided by zlib
# TODO
# TODO?
digest_schemes.append('adler32')
digest_schemes.append('crc32')

@ -39,6 +42,54 @@ crypt_map = {'sha512': crypt.METHOD_SHA512,
'md5': crypt.METHOD_MD5,
'des': crypt.METHOD_CRYPT}


class Download(object):
def __init__(self, url, progress = True, offset = None, chunksize = 1024):
self.cnt_len = None
self.head = requests.head(url, allow_redirects = True).headers
self.req_headers = {}
self.range = False
self.url = url
self.offset = offset
self.chunksize = chunksize
self.progress = progress
if 'accept-ranges' in self.head:
if self.head['accept-ranmges'].lower() != 'none':
self.range = True
if 'content-length' in self.head:
try:
self.cnt_len = int(self.head['content-length'])
except TypeError:
pass
if self.cnt_len and self.offset and self.range:
if not self.offset <= self.cnt_len:
raise ValueError(('The offset requested ({0}) is greater than '
'the content-length value').format(self.offset, self.cnt_len))
self.req_headers['range'] = 'bytes={0}-'.format(self.offset)

def fetch(self):
if not self.progress:
self.req = requests.get(self.url, allow_redirects = True, headers = self.req_headers)
self.bytes_obj = self.req.content
else:
self.req = requests.get(self.url, allow_redirects = True, stream = True, headers = self.req_headers)
self.bytes_obj = bytes()
_bytelen = 0
# TODO: better handling for logging instead of print()s?
for chunk in self.req.iter_content(chunk_size = self.chunksize):
self.bytes_obj += chunk
if self.cnt_len:
print('\033[F')
print('{0:.2f}'.format((_bytelen / float(self.head['content-length'])) * 100),
end = '%',
flush = True)
_bytelen += self.chunksize
else:
print('.', end = '')
print()
return(self.bytes_obj)


class XPathFmt(string.Formatter):
def get_field(self, field_name, args, kwargs):
vals = self.get_value(field_name, args, kwargs), field_name
@ -50,18 +101,19 @@ class detect(object):
def __init__(self):
pass

def any_hash(self, hash_str):
def any_hash(self, hash_str, normalize = False):
h = hashid.HashID()
hashes = []
for i in h.identifyHash(hash_str):
if i.extended:
continue
x = i.name
if x.lower() in ('crc-32', 'ripemd-160', 'sha-1', 'sha-224',
'sha-256', 'sha-384', 'sha-512'):
if x.lower() in ('crc-32', 'ripemd-160', 'sha-1', 'sha-224', 'sha-256', 'sha-384', 'sha-512'):
# Gorram you, c0re.
x = re.sub('-', '', x.lower())
_hashes = [h.lower() for h in digest_schemes]
_hashes = [h.lower() for h in digest_schemes] # TODO: move this outside so we don't define it every invoke
if normalize:
x = re.sub('(-|crypt|\s+)', '', x.lower())
if x.lower() in sorted(list(set(_hashes))):
hashes.append(x)
return(hashes)
@ -83,8 +135,7 @@ class detect(object):
return(salt)

def remote_files(self, url_base, ptrn = None, flags = []):
with urlopen(url_base) as u:
soup = BeautifulSoup(u.read(), 'lxml')
soup = BeautifulSoup(Download(url_base, progress = False).bytes_obj, 'lxml')
urls = []
if 'regex' in flags:
if not isinstance(ptrn, str):
@ -113,8 +164,7 @@ class detect(object):
return(urls)

def gpgkeyID_from_url(self, url):
with urlopen(url) as u:
data = u.read()
data = Download(url, progress = False).bytes_obj
g = GPG.GPGHandler()
key_ids = g.get_sigs(data)
del(g)
@ -166,7 +216,7 @@ class detect(object):
# Get any easy ones out of the way first.
if name in digest_schemes:
return(name)
# Otherwise grab the first one that matches, in order from the .
# Otherwise grab the first one that matches
_digest_re = re.compile('^{0}$'.format(name.strip()), re.IGNORECASE)
for h in digest_schemes:
if _digest_re.search(h):
@ -774,14 +824,19 @@ class valid(object):
return(True)

def salt_hash(self, salthash):
_idents = ''.join([i.ident for i in crypt_map if i.ident])
_idents = ''.join([i.ident for i in crypt_map.values() if i.ident])
# noinspection PyStringFormat
_regex = re.compile('^(\$[{0}]\$)?[./0-9A-Za-z]{{0,16}}\$?'.format(
_idents))
_regex = re.compile('^(\$[{0}]\$)?[./0-9A-Za-z]{{0,16}}\$?'.format(_idents))
if not _regex.search(salthash):
return(False)
return(True)

def salt_hash_full(self, salthash, hash_type):
h = [re.sub('-', '', i.lower()).split()[0] for i in detect.any_hash(self, salthash, normalize = True)]
if hash_type.lower() not in h:
return(False)
return(True)

def plugin_name(self, name):
if len(name) == 0:
return(False)
@ -1068,4 +1123,4 @@ class xml_supplicant(object):
for i in selectors.items():
if i[1] and i[0] in self.selector_ids:
xpath += '[@{0}="{1}"]'.format(*i)
return(xpath)
return(xpath)