i'm about to change how i handle regexes entirely

This commit is contained in:
brent s. 2018-05-15 22:01:46 -04:00
parent 1df5bd87e0
commit bf12fbcda3
2 changed files with 41 additions and 29 deletions

View File

@ -34,7 +34,11 @@ crypt_map = {'sha512': crypt.METHOD_SHA512,
'des': crypt.METHOD_CRYPT} 'des': crypt.METHOD_CRYPT}


class XPathFmt(string.Formatter): class XPathFmt(string.Formatter):
def __init__(self):
print('foo')

def get_field(self, field_name, args, kwargs): def get_field(self, field_name, args, kwargs):
# custom arg to specify if it's a regex pattern or not
vals = self.get_value(field_name, args, kwargs), field_name vals = self.get_value(field_name, args, kwargs), field_name
if not vals[0]: if not vals[0]:
vals = ('{{{0}}}'.format(vals[1]), vals[1]) vals = ('{{{0}}}'.format(vals[1]), vals[1])
@ -426,10 +430,14 @@ class xml_supplicant(object):
xmlroot = lxml.etree.fromstring(raw) xmlroot = lxml.etree.fromstring(raw)
self.btags = {'xpath': {}, self.btags = {'xpath': {},
'regex': {}} 'regex': {}}
self.fmt = XPathFmt()
self.max_recurse = max_recurse self.max_recurse = max_recurse
self.ptrn = re.compile('(?<=(?<!\{)\{)[^{}]*(?=\}(?!\}))') #self.ptrn = re.compile('(?<=(?<!\{)\{)[^{}]*(?=\}(?!\}))')
# I don't have permission to credit them, but to the person who helped
# me with this regex - thank you. You know who you are.
self.ptrn = re.compile(('(?<=(?<!\{)\{)(?:[^{}]+'
'|{{[^{}]*}})*(?=\}(?!\}))'))
self.root = lxml.etree.ElementTree(xmlroot) self.root = lxml.etree.ElementTree(xmlroot)
self.substitutions = {}
if not profile: if not profile:
self.profile = xmlroot.xpath('/bdisk/profile[1]')[0] self.profile = xmlroot.xpath('/bdisk/profile[1]')[0]
else: else:
@ -480,43 +488,41 @@ class xml_supplicant(object):
if isinstance(element, lxml.etree._Element): if isinstance(element, lxml.etree._Element):
if isinstance(element, lxml.etree._Comment): if isinstance(element, lxml.etree._Comment):
return(element) return(element)
# if len(element) == 0:
# print(element.text)
if element.text: if element.text:
_dictmap = self.xpath_to_dict(element.text) _dictmap = self.xpath_to_dict(element.text)
while _dictmap: while _dictmap:
for elem in _dictmap: for elem in _dictmap:
# if _dictmap is None:
# continue
# # I still for the life of me cannot figure out why this
# # is not caught by the above. But it isn't.
# if elem not in _dictmap:
# continue
if isinstance(_dictmap[elem], str): if isinstance(_dictmap[elem], str):
try: try:
print('bleh') newpath = element.xpath(_dictmap[elem])
print(_dictmap[elem])
try:
print(self.get_path(element))
except:
pass
newpath = element.xpath(_dictmap[elem])[0]
except (AttributeError, IndexError, TypeError): except (AttributeError, IndexError, TypeError):
print('blugh')
newpath = element newpath = element
except lxml.etree.XPathEvalError:
return(element)
try: try:
self.substitutions[elem] = self.substitute( self.btags['xpath'][elem] = self.substitute(
newpath, newpath, (recurse_count + 1))[0]
(recurse_count + 1)
)[0]
except (IndexError, TypeError): except (IndexError, TypeError):
raise ValueError( raise ValueError(
('Encountered an error while trying to ' ('Encountered an error while trying to '
'substitute {0} at {1}').format( 'substitute {0} at {1}').format(
elem, self.get_path(element) elem, self.get_path(element)
)) ))
element.text = XPathFmt().vformat( print(element.text)
element.text = self.fmt.format(
element.text, element.text,
[], {**self.btags['xpath'],
self.substitutions) **self.btags['regex']})
# element.text = self.fmt.vformat(
# element.text,
# [],
# {**self.btags['xpath'],
# **self.btags['regex']})
# element.text = (element.text).format(
# {**self.btags['xpath'],
# **self.btags['regex']})
_dictmap = self.xpath_to_dict(element.text) _dictmap = self.xpath_to_dict(element.text)
return(element) return(element)


@ -538,11 +544,15 @@ class xml_supplicant(object):
d = {} d = {}
try: try:
_, xpath_expr = item.split('%', 1) _, xpath_expr = item.split('%', 1)
print(_) if _ not in self.btags:
if not _ == 'xpath':
continue continue
if item not in self.substitutions: if item not in self.btags[_]:
self.substitutions[item] = None self.btags[_][item] = None
if _ == 'regex':
_re = re.sub('^regex%', '', item)
_re = re.sub('{{(.*)}}', '\g<1>', _re)
# We use a native python object
self.btags['regex'][item] = re.compile(_re)
d[item] = xpath_expr d[item] = xpath_expr
except ValueError: except ValueError:
return(None) return(None)

View File

@ -48,7 +48,8 @@
<source arch="x86_64"> <source arch="x86_64">
<mirror>http://archlinux.mirror.domain.tld</mirror> <mirror>http://archlinux.mirror.domain.tld</mirror>
<webroot>/iso/latest</webroot> <webroot>/iso/latest</webroot>
<tarball flags="regex,latest">{xpath%../mirror/text()}{xpath%../webroot/text()}/{regex%archlinux-bootstrap-[0-9]{4}\.[0-9]{2}\.[0-9]{2}-x86_64\.tar\.gz}</tarball> <tarball flags="regex,latest">{xpath%../mirror/text()}{xpath%../webroot/text()}/{regex%archlinux-bootstrap-[0-9]{{4}}\.[0-9]{{2}}\.[0-9]{{2}}-x86_64\.tar\.gz}</tarball>
<!-- <tarball flags="regex,latest">{xpath%../mirror/text()}{xpath%../webroot/text()}/{regex%archlinux-bootstrap-[0-9]{4}\.[0-9]{2}\.[0-9]{2}-x86_64\.tar\.gz}</tarball> -->
<checksum hash_algo="sha1" flags="none" >{xpath%../mirror/text()}{xpath%../webroot/text()}/sha1sums.txt</checksum> <checksum hash_algo="sha1" flags="none" >{xpath%../mirror/text()}{xpath%../webroot/text()}/sha1sums.txt</checksum>
<sig keys="7F2D434B9741E8AC" <sig keys="7F2D434B9741E8AC"
keyserver="hkp://pool.sks-keyservers.net" keyserver="hkp://pool.sks-keyservers.net"
@ -57,7 +58,8 @@
<source arch="i686"> <source arch="i686">
<mirror>http://archlinux32.mirror.domain.tld</mirror> <mirror>http://archlinux32.mirror.domain.tld</mirror>
<webroot>/iso/latest</webroot> <webroot>/iso/latest</webroot>
<tarball flag="regex,latest">{xpath%../mirror/text()}/{xpath%../webroot/text()}/{regex%archlinux-bootstrap-[0-9]{4}\.[0-9]{2}\.[0-9]{2}-i686\.tar\.gz}</tarball> <tarball flag="regex,latest">{xpath%../mirror/text()}/{xpath%../webroot/text()}/{regex%archlinux-bootstrap-[0-9]{{4}}\.[0-9]{{2}}\.[0-9]{{2}}-i686\.tar\.gz}</tarball>
<!-- <tarball flag="regex,latest">{xpath%../mirror/text()}/{xpath%../webroot/text()}/{regex%archlinux-bootstrap-[0-9]{4}\.[0-9]{2}\.[0-9]{2}-i686\.tar\.gz}</tarball> -->
<checksum hash_algo="sha512" explicit="yes">cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e</checksum> <checksum hash_algo="sha512" explicit="yes">cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e</checksum>
<sig keys="248BF41F9BDD61D41D060AE774EDA3C6B06D0506" <sig keys="248BF41F9BDD61D41D060AE774EDA3C6B06D0506"
keyserver="hkp://pool.sks-keyservers.net">{xpath%../tarball/text()}.sig</sig> keyserver="hkp://pool.sks-keyservers.net">{xpath%../tarball/text()}.sig</sig>