repomirror/repomirror/config.py

import copy
import datetime
import os
import logging
import re
import shutil
##
import requests
import requests.auth
from lxml import etree


_logger = logging.getLogger()


def create_default_cfg():
    # Create a stripped sample config.
    ws_re = re.compile(r'^\s*$')
    cur_dir = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
    samplexml = os.path.abspath(os.path.join(cur_dir, '..', 'example.config.xml'))
    with open(samplexml, 'rb') as fh:
        xml = etree.fromstring(fh.read())
    # Create a stripped sample config.
    # First we strip comments (and fix the ensuing whitespace).
    # etree has a .canonicalize(), but it chokes on a default namespace.
    # https://bugs.launchpad.net/lxml/+bug/1869455
    # So everything we do is kind of a hack.
    # for c in xml.xpath("//comment()"):
    #    parent = c.getparent()
    #    parent.remove(c)
    xmlstr = etree.tostring(xml, with_comments = False, method = 'c14n', pretty_print = True).decode('utf-8')
    newstr = []
    for line in xmlstr.splitlines():
        r = ws_re.search(line)
        if not r:
            newstr.append(line.strip())
    xml = etree.fromstring(''.join(newstr).encode('utf-8'))
    # Remove text and attr text.
    xpathq = "descendant-or-self::*[namespace-uri()!='']"
    for e in xml.xpath(xpathq):
        if e.tag == '{{{0}}}mirror'.format(xml.nsmap[None]):
            continue
        if e.text is not None and e.text.strip() != '':
            e.text = ''
        for k, v in e.attrib.items():
            if v is not None:
                e.attrib[k] = ''
    # Remove multiple children of same type to simplify.
    for e in xml.xpath(xpathq):
        if e.tag == '{{{0}}}mirror'.format(xml.nsmap[None]):
            continue
        parent = e.getparent()
        try:
            for idx, child in enumerate(parent.findall(e.tag)):
                if idx == 0:
                    continue
                parent.remove(child)
        except AttributeError:
            pass
    # And add a comment pointing them to the fully commented config.
    xml.insert(0, etree.Comment(('\n  Please reference the fully commented example.config.xml found either '
                                 'at:\n  '
                                 '  * {0}\n    * https://git.square-r00t.net/RepoMirror/tree/'
                                 'example.config.xml\n  and then configure this according to those '
                                 'instructions.\n  ').format(samplexml)))
    return(etree.tostring(xml,
                          pretty_print = True,
                          with_comments = True,
                          with_tail = True,
                          encoding = 'UTF-8',
                          xml_declaration = True))


class Config(object):
    default_xsd = 'http://schema.xml.r00t2.io/projects/repomirror.xsd'
    default_xml_path = '~/.config/repomirror.xml'

    def __init__(self, xml_path, *args, **kwargs):
        if not xml_path:
            xml_path = self.default_xml_path
        self.xml_path = os.path.abspath(os.path.expanduser(xml_path))
        if not os.path.isfile(self.xml_path):
            with open(self.xml_path, 'wb') as fh:
                fh.write(create_default_cfg())
            _logger.error(('{0} does not exist so a sample configuration file has been created in its place. '
                           'Be sure to configure it appropriately.').format(self.default_xml_path))
            raise ValueError('Config does not exist')
        else:
            with open(self.xml_path, 'rb') as fh:
                self.raw = fh.read()
        self.xml = None
        self.xsd = None
        self.ns_xml = None
        self.tree = None
        self.ns_tree = None
        self.defaults_parser = None
        self.parse_xml()
        _logger.info('Instantiated {0}.'.format(type(self).__name__))

    def get_xsd(self):
        raw_xsd = None
        base_url = None
        xsi = self.xml.nsmap.get('xsi', 'http://www.w3.org/2001/XMLSchema-instance')
        schemaLocation = '{{{0}}}schemaLocation'.format(xsi)
        schemaURL = self.xml.attrib.get(schemaLocation, self.default_xsd)
        split_url = schemaURL.split()
        if len(split_url) == 2:  # a properly defined schemaLocation
            schemaURL = split_url[1]
        else:
            schemaURL = split_url[0]  # a LAZY schemaLocation
        if schemaURL.startswith('file://'):
            schemaURL = re.sub(r'^file://', r'', schemaURL)
            with open(schemaURL, 'rb') as fh:
                raw_xsd = fh.read()
            base_url = os.path.dirname(schemaURL) + '/'
        else:
            req = requests.get(schemaURL)
            if not req.ok:
                raise RuntimeError('Could not download XSD')
            raw_xsd = req.content
            base_url = os.path.split(req.url)[0] + '/'  # This makes me feel dirty.
        self.xsd = etree.XMLSchema(etree.XML(raw_xsd, base_url = base_url))
        return(None)

    def parse_xml(self):
        self.parse_raw()
        self.get_xsd()
        self.populate_defaults()
        self.validate()
        return(None)

    def parse_raw(self, parser = None):
        self.xml = etree.fromstring(self.raw, parser = parser)
        self.ns_xml = etree.fromstring(self.raw, parser = parser)
        self.tree = self.xml.getroottree()
        self.ns_tree = self.ns_xml.getroottree()
        self.tree.xinclude()
        self.ns_tree.xinclude()
        self.strip_ns()
        return(None)

    def populate_defaults(self):
        if not self.xsd:
            self.get_xsd()
        if not self.defaults_parser:
            self.defaults_parser = etree.XMLParser(schema = self.xsd, attribute_defaults = True)
        self.parse_raw(parser = self.defaults_parser)
        return(None)

    def remove_defaults(self):
        self.parse_raw()
        return(None)

    def strip_ns(self, obj = None):
        # https://stackoverflow.com/questions/30232031/how-can-i-strip-namespaces-out-of-an-lxml-tree/30233635#30233635
        xpathq = "descendant-or-self::*[namespace-uri()!='']"
        if not obj:
            for x in (self.tree, self.xml):
                for e in x.xpath(xpathq):
                    e.tag = etree.QName(e).localname
        elif isinstance(obj, (etree._Element, etree._ElementTree)):
            obj = copy.deepcopy(obj)
            for e in obj.xpath(xpathq):
                e.tag = etree.QName(e).localname
            return(obj)
        else:
            raise ValueError('Did not know how to parse obj parameter')
        return(None)

    def validate(self):
        if not self.xsd:
            self.get_xsd()
        self.xsd.assertValid(self.ns_tree)
        return(None)