175 lines
6.6 KiB
Python
175 lines
6.6 KiB
Python
import copy
|
|
import datetime
|
|
import os
|
|
import logging
|
|
import re
|
|
import shutil
|
|
##
|
|
import requests
|
|
import requests.auth
|
|
from lxml import etree
|
|
|
|
|
|
_logger = logging.getLogger()
|
|
|
|
|
|
def create_default_cfg():
|
|
# Create a stripped sample config.
|
|
ws_re = re.compile(r'^\s*$')
|
|
cur_dir = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
|
|
samplexml = os.path.abspath(os.path.join(cur_dir, '..', 'example.config.xml'))
|
|
with open(samplexml, 'rb') as fh:
|
|
xml = etree.fromstring(fh.read())
|
|
# Create a stripped sample config.
|
|
# First we strip comments (and fix the ensuing whitespace).
|
|
# etree has a .canonicalize(), but it chokes on a default namespace.
|
|
# https://bugs.launchpad.net/lxml/+bug/1869455
|
|
# So everything we do is kind of a hack.
|
|
# for c in xml.xpath("//comment()"):
|
|
# parent = c.getparent()
|
|
# parent.remove(c)
|
|
xmlstr = etree.tostring(xml, with_comments = False, method = 'c14n', pretty_print = True).decode('utf-8')
|
|
newstr = []
|
|
for line in xmlstr.splitlines():
|
|
r = ws_re.search(line)
|
|
if not r:
|
|
newstr.append(line.strip())
|
|
xml = etree.fromstring(''.join(newstr).encode('utf-8'))
|
|
# Remove text and attr text.
|
|
xpathq = "descendant-or-self::*[namespace-uri()!='']"
|
|
for e in xml.xpath(xpathq):
|
|
if e.tag == '{{{0}}}mirror'.format(xml.nsmap[None]):
|
|
continue
|
|
if e.text is not None and e.text.strip() != '':
|
|
e.text = ''
|
|
for k, v in e.attrib.items():
|
|
if v is not None:
|
|
e.attrib[k] = ''
|
|
# Remove multiple children of same type to simplify.
|
|
for e in xml.xpath(xpathq):
|
|
if e.tag == '{{{0}}}mirror'.format(xml.nsmap[None]):
|
|
continue
|
|
parent = e.getparent()
|
|
try:
|
|
for idx, child in enumerate(parent.findall(e.tag)):
|
|
if idx == 0:
|
|
continue
|
|
parent.remove(child)
|
|
except AttributeError:
|
|
pass
|
|
# And add a comment pointing them to the fully commented config.
|
|
xml.insert(0, etree.Comment(('\n Please reference the fully commented example.config.xml found either '
|
|
'at:\n '
|
|
' * {0}\n * https://git.square-r00t.net/RepoMirror/tree/'
|
|
'example.config.xml\n and then configure this according to those '
|
|
'instructions.\n ').format(samplexml)))
|
|
return(etree.tostring(xml,
|
|
pretty_print = True,
|
|
with_comments = True,
|
|
with_tail = True,
|
|
encoding = 'UTF-8',
|
|
xml_declaration = True))
|
|
|
|
|
|
class Config(object):
|
|
default_xsd = 'http://schema.xml.r00t2.io/projects/repomirror.xsd'
|
|
default_xml_path = '~/.config/repomirror.xml'
|
|
|
|
def __init__(self, xml_path, *args, **kwargs):
|
|
if not xml_path:
|
|
xml_path = self.default_xml_path
|
|
self.xml_path = os.path.abspath(os.path.expanduser(xml_path))
|
|
if not os.path.isfile(self.xml_path):
|
|
with open(self.xml_path, 'wb') as fh:
|
|
fh.write(create_default_cfg())
|
|
_logger.error(('{0} does not exist so a sample configuration file has been created in its place. '
|
|
'Be sure to configure it appropriately.').format(self.default_xml_path))
|
|
raise ValueError('Config does not exist')
|
|
else:
|
|
with open(self.xml_path, 'rb') as fh:
|
|
self.raw = fh.read()
|
|
self.xml = None
|
|
self.xsd = None
|
|
self.ns_xml = None
|
|
self.tree = None
|
|
self.ns_tree = None
|
|
self.defaults_parser = None
|
|
self.parse_xml()
|
|
_logger.info('Instantiated {0}.'.format(type(self).__name__))
|
|
|
|
def get_xsd(self):
|
|
raw_xsd = None
|
|
base_url = None
|
|
xsi = self.xml.nsmap.get('xsi', 'http://www.w3.org/2001/XMLSchema-instance')
|
|
schemaLocation = '{{{0}}}schemaLocation'.format(xsi)
|
|
schemaURL = self.xml.attrib.get(schemaLocation, self.default_xsd)
|
|
split_url = schemaURL.split()
|
|
if len(split_url) == 2: # a properly defined schemaLocation
|
|
schemaURL = split_url[1]
|
|
else:
|
|
schemaURL = split_url[0] # a LAZY schemaLocation
|
|
if schemaURL.startswith('file://'):
|
|
schemaURL = re.sub(r'^file://', r'', schemaURL)
|
|
with open(schemaURL, 'rb') as fh:
|
|
raw_xsd = fh.read()
|
|
base_url = os.path.dirname(schemaURL) + '/'
|
|
else:
|
|
req = requests.get(schemaURL)
|
|
if not req.ok:
|
|
raise RuntimeError('Could not download XSD')
|
|
raw_xsd = req.content
|
|
base_url = os.path.split(req.url)[0] + '/' # This makes me feel dirty.
|
|
self.xsd = etree.XMLSchema(etree.XML(raw_xsd, base_url = base_url))
|
|
return(None)
|
|
|
|
def parse_xml(self):
|
|
self.parse_raw()
|
|
self.get_xsd()
|
|
self.populate_defaults()
|
|
self.validate()
|
|
return(None)
|
|
|
|
def parse_raw(self, parser = None):
|
|
self.xml = etree.fromstring(self.raw, parser = parser)
|
|
self.ns_xml = etree.fromstring(self.raw, parser = parser)
|
|
self.tree = self.xml.getroottree()
|
|
self.ns_tree = self.ns_xml.getroottree()
|
|
self.tree.xinclude()
|
|
self.ns_tree.xinclude()
|
|
self.strip_ns()
|
|
return(None)
|
|
|
|
def populate_defaults(self):
|
|
if not self.xsd:
|
|
self.get_xsd()
|
|
if not self.defaults_parser:
|
|
self.defaults_parser = etree.XMLParser(schema = self.xsd, attribute_defaults = True)
|
|
self.parse_raw(parser = self.defaults_parser)
|
|
return(None)
|
|
|
|
def remove_defaults(self):
|
|
self.parse_raw()
|
|
return(None)
|
|
|
|
def strip_ns(self, obj = None):
|
|
# https://stackoverflow.com/questions/30232031/how-can-i-strip-namespaces-out-of-an-lxml-tree/30233635#30233635
|
|
xpathq = "descendant-or-self::*[namespace-uri()!='']"
|
|
if not obj:
|
|
for x in (self.tree, self.xml):
|
|
for e in x.xpath(xpathq):
|
|
e.tag = etree.QName(e).localname
|
|
elif isinstance(obj, (etree._Element, etree._ElementTree)):
|
|
obj = copy.deepcopy(obj)
|
|
for e in obj.xpath(xpathq):
|
|
e.tag = etree.QName(e).localname
|
|
return(obj)
|
|
else:
|
|
raise ValueError('Did not know how to parse obj parameter')
|
|
return(None)
|
|
|
|
def validate(self):
|
|
if not self.xsd:
|
|
self.get_xsd()
|
|
self.xsd.assertValid(self.ns_tree)
|
|
return(None)
|