checking in work
This commit is contained in:
parent
2ba79cd801
commit
eed480c590
@ -4,15 +4,8 @@ import logging
|
|||||||
##
|
##
|
||||||
from . import config
|
from . import config
|
||||||
from . import constants
|
from . import constants
|
||||||
|
from . import sync
|
||||||
|
|
||||||
|
|
||||||
_logger = logging.getLogger()
|
_logger = logging.getLogger()
|
||||||
|
|
||||||
|
|
||||||
class Sync(object):
|
|
||||||
def __init__(self, cfg = None, dummy = False, distro = None, logdir = None, *args, **kwargs):
|
|
||||||
_args = dict(locals())
|
|
||||||
del(_args['self'])
|
|
||||||
_logger.debug('Sync class instantiated with args: {0}'.format(_args))
|
|
||||||
self.cfg = config.Config(cfg)
|
|
||||||
|
|
||||||
|
61
repomirror/sync.py
Normal file
61
repomirror/sync.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
import datetime
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
##
|
||||||
|
from . import config
|
||||||
|
|
||||||
|
|
||||||
|
_logger = logging.getLogger()
|
||||||
|
|
||||||
|
|
||||||
|
class Args(object):
|
||||||
|
def __init__(self, args_xml):
|
||||||
|
self.xml = args_xml
|
||||||
|
self.args = []
|
||||||
|
self._parse_xml()
|
||||||
|
|
||||||
|
def _parse_xml(self):
|
||||||
|
for arg_xml in self.xml.xpath('(short|long)'):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Mount(object):
|
||||||
|
def __init__(self, mpchk_xml):
|
||||||
|
self.path = os.path.abspath(os.path.expanduser(mpchk_xml))
|
||||||
|
self.is_mounted = None
|
||||||
|
self._check_mount()
|
||||||
|
|
||||||
|
def _check_mount(self):
|
||||||
|
with open('/proc/mounts', 'r') as fh:
|
||||||
|
raw = fh.read()
|
||||||
|
for line in raw.splitlines():
|
||||||
|
l = line.split()
|
||||||
|
mp = l[1]
|
||||||
|
if mp == self.path:
|
||||||
|
self.is_mounted = True
|
||||||
|
return(None)
|
||||||
|
self.is_mounted = False
|
||||||
|
return(None)
|
||||||
|
|
||||||
|
|
||||||
|
class TimestampFile(object):
|
||||||
|
def __init__(self, ts_xml):
|
||||||
|
self.fmt = ts_xml.attrib.get('timeFormat', 'UNIX_EPOCH')
|
||||||
|
if self.fmt == 'UNIX_EPOCH':
|
||||||
|
self.fmt = '%s'
|
||||||
|
elif self.fmt == 'MICROSECOND_EPOCH':
|
||||||
|
self.fmt = '%s.%f'
|
||||||
|
self.path = os.path.abspath(os.path.expanduser(ts_xml.text))
|
||||||
|
|
||||||
|
|
||||||
|
class Upstream(object):
|
||||||
|
def __init__(self, upstream_xml):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Sync(object):
|
||||||
|
def __init__(self, cfg = None, dummy = False, distro = None, logdir = None, *args, **kwargs):
|
||||||
|
_args = dict(locals())
|
||||||
|
del(_args['self'])
|
||||||
|
_logger.debug('Sync class instantiated with args: {0}'.format(_args))
|
||||||
|
self.cfg = config.Config(cfg)
|
@ -28,8 +28,8 @@
|
|||||||
* https://strftime.org/
|
* https://strftime.org/
|
||||||
The default is to use a regular UNIX Epoch integer (e.g. June 13, 2020 5:03:53 PM UTC => 1592067833).
|
The default is to use a regular UNIX Epoch integer (e.g. June 13, 2020 5:03:53 PM UTC => 1592067833).
|
||||||
This can be manually specified by the special string "UNIX_EPOCH".
|
This can be manually specified by the special string "UNIX_EPOCH".
|
||||||
Optionally, you can use the special string "MICROSECOND_EPOCH", which will specify the above with microseconds.
|
Optionally, you can use the special string "MICROSECOND_EPOCH", which will specify the above with left-padded
|
||||||
e.g. June 13, 2020 5:09:13.995777 PM UTC => 1592068153.995777
|
microseconds (e.g. June 13, 2020 5:09:13.995777 PM UTC => 1592068153.995777).
|
||||||
-->
|
-->
|
||||||
<lastLocalCheck timeFormat="MICROSECOND_EPOCH">/srv/http/arch.lastcheck</lastLocalCheck>
|
<lastLocalCheck timeFormat="MICROSECOND_EPOCH">/srv/http/arch.lastcheck</lastLocalCheck>
|
||||||
<!--
|
<!--
|
||||||
@ -84,6 +84,10 @@
|
|||||||
<!--
|
<!--
|
||||||
The following example uses "rsync://arch.mirror.constant.com/archlinux/"
|
The following example uses "rsync://arch.mirror.constant.com/archlinux/"
|
||||||
(https://www.archlinux.org/mirrors/constant.com/1008/)
|
(https://www.archlinux.org/mirrors/constant.com/1008/)
|
||||||
|
If you need to find a mirror, you may be interested in the utils/find_fastest_upstream/ scripts. They will
|
||||||
|
automatically find (and sort based on connection speed) all mirrors in your country for a given distro.
|
||||||
|
They can even generate stubbed configuration files using those upstreams.
|
||||||
|
Currently only Arch Linux and CentOS are supported.
|
||||||
-->
|
-->
|
||||||
<!--
|
<!--
|
||||||
Required; one of:
|
Required; one of:
|
||||||
|
@ -1,57 +1,56 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import argparse
|
||||||
import datetime
|
import datetime
|
||||||
import re
|
|
||||||
##
|
|
||||||
import iso3166
|
|
||||||
##
|
##
|
||||||
import classes
|
import classes
|
||||||
|
|
||||||
|
|
||||||
_strip_re = re.compile(r'^\s*(?P<num>[0-9.]+).*$')
|
|
||||||
|
|
||||||
|
|
||||||
class Ranker(classes.Ranker):
|
class Ranker(classes.Ranker):
|
||||||
mirrorlist_url = 'https://www.archlinux.org/mirrors/status/tier/1/'
|
mirrorlist_url = 'https://www.archlinux.org/mirrors/status/tier/1/json/'
|
||||||
|
distro_name = 'archlinux'
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self.get_mirrors()
|
self.get_mirrors()
|
||||||
self.mycountry = iso3166.countries_by_alpha2[self.my_info['country']].name
|
|
||||||
|
|
||||||
def extract_mirrors(self):
|
def extract_mirrors(self):
|
||||||
# Limit to only successful mirrors.
|
for mirror in self.req.json()['urls']:
|
||||||
mirrors = self.bs.find('table', {'id': 'successful_mirrors'})
|
if not all((mirror['active'], # Only successful/active mirrors
|
||||||
# Ayyy, thanks dude.
|
mirror['isos'], # Only mirrors with ISOs
|
||||||
# Modified from https://stackoverflow.com/a/56835562/733214.
|
# Only mirrors that support rsync (Arch mirrors do not support ftp)
|
||||||
header = mirrors.find('thead').find('tr')
|
(mirror['protocol'] == 'rsync'),
|
||||||
headers = [h.text if h.text != '' else 'details' for h in header.find_all('th')]
|
# Only mirrors in the system's country (May be buggy if both are not ISO-3166-1 Alpha-2)
|
||||||
raw_rows = mirrors.find_all('tr')
|
(mirror['country_code'].upper() == self.my_info['country'].upper()),
|
||||||
# rows = [{headers[i]: cell.text for i, cell in enumerate(row.find_all('td'))} for row in raw_rows]
|
# Only mirrors that are at least 100% complete.
|
||||||
rows = [{headers[i]: cell for i, cell in enumerate(row.find_all('td'))} for row in raw_rows]
|
(mirror['completion_pct'] >= 1.0))):
|
||||||
for r in rows:
|
continue
|
||||||
for k, v in r.items():
|
# Convert the timestamp to python-native.
|
||||||
print(v)
|
mirror['last_sync'] = datetime.datetime.strptime(mirror['last_sync'], '%Y-%m-%dT%H:%M:%SZ')
|
||||||
if k in ('Completion %', 'Mirror Score', 'μ Duration (s)', 'σ Duration (s)'):
|
self.raw_mirrors.append(mirror)
|
||||||
r[k] = float(_strip_re.sub(r'\g<num>', v.text).strip())
|
self.mirror_candidates.append(mirror['url'])
|
||||||
elif k == 'μ Delay (hh:mm)':
|
|
||||||
# HOO boy. Wish they just did it in seconds.
|
|
||||||
# elif k == 'Country':
|
|
||||||
self.raw_mirrors.append(r)
|
|
||||||
# for row in rows:
|
|
||||||
# if not row:
|
|
||||||
# continue
|
|
||||||
# for k in ('Completion %', 'Mirror Score', 'μ Duration (s)', 'σ Duration (s)'):
|
|
||||||
# row[k] = float(_strip_re.sub(r'\g<num>', row[k]).strip())
|
|
||||||
|
|
||||||
return(None)
|
return(None)
|
||||||
|
|
||||||
|
|
||||||
|
def parseArgs():
|
||||||
|
args = argparse.ArgumentParser(description = 'Generate a list of suitable Arch Linux upstream mirrors in order of '
|
||||||
|
'speed')
|
||||||
|
args.add_argument('-x', '--xml',
|
||||||
|
dest = 'xml',
|
||||||
|
action = 'store_true',
|
||||||
|
help = ('If specified, generate a config stub instead of a printed list of URLs'))
|
||||||
|
return(args)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
args = parseArgs().parse_args()
|
||||||
r = Ranker()
|
r = Ranker()
|
||||||
r.extract_mirrors()
|
r.extract_mirrors()
|
||||||
import pprint
|
r.speedcheck()
|
||||||
pprint.pprint(r.raw_mirrors)
|
if args.xml:
|
||||||
|
print(r.gen_xml())
|
||||||
|
else:
|
||||||
|
r.print()
|
||||||
return(None)
|
return(None)
|
||||||
|
|
||||||
|
|
||||||
|
@ -0,0 +1,79 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import csv
|
||||||
|
import datetime
|
||||||
|
import io
|
||||||
|
import re
|
||||||
|
##
|
||||||
|
import classes
|
||||||
|
|
||||||
|
|
||||||
|
_proto_re = re.compile(r'^(?P<proto>https?)(?P<uri>.*)')
|
||||||
|
|
||||||
|
|
||||||
|
class Ranker(classes.Ranker):
|
||||||
|
# https://lists.centos.org/pipermail/centos-mirror/2017-March/010312.html
|
||||||
|
mirrorlist_url = 'https://www.centos.org/download/full-mirrorlist.csv'
|
||||||
|
distro_name = 'centos'
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.get_mirrors()
|
||||||
|
|
||||||
|
def extract_mirrors(self, preferred_proto = 'rsync'):
|
||||||
|
preferred_proto = preferred_proto.lower()
|
||||||
|
if preferred_proto not in ('rsync', 'ftp'):
|
||||||
|
raise ValueError('Invalid preferred_proto; must be one of rsync or ftp')
|
||||||
|
non_preferred = ('rsync' if preferred_proto == 'ftp' else 'ftp')
|
||||||
|
c = csv.DictReader(io.StringIO(self.raw_html), )
|
||||||
|
for row in c:
|
||||||
|
if not row['Country'] or row['Country'].strip() == '':
|
||||||
|
continue
|
||||||
|
# GorRAM it, dudes. States are not countries.
|
||||||
|
country = row['Country'].strip()
|
||||||
|
region = row['Region'].strip()
|
||||||
|
if region == 'US':
|
||||||
|
country = region
|
||||||
|
if country != self.my_info['country']:
|
||||||
|
continue
|
||||||
|
for k, v in row.items():
|
||||||
|
if v.strip() == '':
|
||||||
|
row[k] = None
|
||||||
|
pref_url = row['{0} mirror link'.format(preferred_proto)]
|
||||||
|
nonpref_url = row['{0} mirror link'.format(non_preferred)]
|
||||||
|
if pref_url:
|
||||||
|
url = _proto_re.sub(r'{0}\g<uri>'.format(preferred_proto), pref_url)
|
||||||
|
else:
|
||||||
|
if not nonpref_url:
|
||||||
|
continue
|
||||||
|
url = _proto_re.sub(r'{0}\g<uri>'.format(non_preferred), nonpref_url)
|
||||||
|
self.raw_mirrors.append(row)
|
||||||
|
self.mirror_candidates.append(url)
|
||||||
|
return(None)
|
||||||
|
|
||||||
|
|
||||||
|
def parseArgs():
|
||||||
|
args = argparse.ArgumentParser(description = 'Generate a list of suitable CentOS upstream mirrors in order of '
|
||||||
|
'speed')
|
||||||
|
args.add_argument('-x', '--xml',
|
||||||
|
dest = 'xml',
|
||||||
|
action = 'store_true',
|
||||||
|
help = ('If specified, generate a config stub instead of a printed list of URLs'))
|
||||||
|
return(args)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = parseArgs().parse_args()
|
||||||
|
r = Ranker()
|
||||||
|
r.extract_mirrors()
|
||||||
|
r.speedcheck()
|
||||||
|
if args.xml:
|
||||||
|
print(r.gen_xml())
|
||||||
|
else:
|
||||||
|
r.print()
|
||||||
|
return(None)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -1,22 +1,31 @@
|
|||||||
import socket
|
import socket
|
||||||
import time
|
import time
|
||||||
|
from urllib import parse as urlparse
|
||||||
##
|
##
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from lxml import etree
|
||||||
##
|
##
|
||||||
import constants
|
import constants
|
||||||
|
|
||||||
|
|
||||||
class Ranker(object):
|
class Ranker(object):
|
||||||
mirrorlist_url = None # This is replaced by subclasses
|
mirrorlist_url = None # This is replaced by subclasses
|
||||||
|
distro_name = None
|
||||||
|
|
||||||
def __init__(self, parser = 'lxml', *args, **kwargs):
|
def __init__(self, parser = 'lxml', *args, **kwargs):
|
||||||
self.my_info = {}
|
self.my_info = {}
|
||||||
self.raw_html = None
|
self.raw_html = None
|
||||||
self.parser = parser
|
self.parser = parser
|
||||||
self.bs = None
|
self.bs = None
|
||||||
|
self.req = None
|
||||||
self.get_myinfo()
|
self.get_myinfo()
|
||||||
|
# The native collection of mirror information.
|
||||||
self.raw_mirrors = []
|
self.raw_mirrors = []
|
||||||
|
# The list of URLs only of the above.
|
||||||
|
self.mirror_candidates = []
|
||||||
|
self.ranked_mirrors = {}
|
||||||
|
self.ranked_urls = {}
|
||||||
|
|
||||||
def extract_mirrors(self):
|
def extract_mirrors(self):
|
||||||
# A dummy func. This should be overridden by subclasses.
|
# A dummy func. This should be overridden by subclasses.
|
||||||
@ -34,6 +43,79 @@ class Ranker(object):
|
|||||||
req = requests.get(self.mirrorlist_url)
|
req = requests.get(self.mirrorlist_url)
|
||||||
if not req.ok:
|
if not req.ok:
|
||||||
raise RuntimeError('Could not contact information gatherer')
|
raise RuntimeError('Could not contact information gatherer')
|
||||||
self.raw_html = req.content.decode('utf-8')
|
self.req = req
|
||||||
|
self.raw_html = self.req.content.decode('utf-8')
|
||||||
self.bs = BeautifulSoup(self.raw_html, self.parser)
|
self.bs = BeautifulSoup(self.raw_html, self.parser)
|
||||||
return(None)
|
return(None)
|
||||||
|
|
||||||
|
def speedcheck(self):
|
||||||
|
if not self.mirror_candidates:
|
||||||
|
self.extract_mirrors()
|
||||||
|
for url in self.mirror_candidates:
|
||||||
|
u = urlparse.urlparse(url)
|
||||||
|
sock = socket.socket()
|
||||||
|
sock.settimeout(7)
|
||||||
|
port = u.port
|
||||||
|
if not port:
|
||||||
|
port = constants.DEF_PORTS[u.scheme.lower()]
|
||||||
|
try:
|
||||||
|
start = time.perf_counter()
|
||||||
|
sock.connect((u.hostname, port))
|
||||||
|
conntime = time.perf_counter() - start # in seconds
|
||||||
|
sock.close()
|
||||||
|
del(sock)
|
||||||
|
except (socket.timeout, socket.error):
|
||||||
|
continue
|
||||||
|
# Skip the mirror if it has an exact time in the mirrors already.
|
||||||
|
# Sure, it's *very* unlikely, but best practice to do this.
|
||||||
|
if conntime in self.ranked_mirrors:
|
||||||
|
continue
|
||||||
|
mirror = {}
|
||||||
|
for a in ('path', 'port'):
|
||||||
|
mirror[a] = getattr(u, a, None)
|
||||||
|
mirror['domain'] = u.hostname.lower()
|
||||||
|
mirror['syncType'] = u.scheme.lower()
|
||||||
|
if not mirror['port']:
|
||||||
|
mirror['port'] = constants.DEF_PORTS[mirror['syncType']]
|
||||||
|
if mirror['path'] == '':
|
||||||
|
mirror['path'] = '/'
|
||||||
|
self.ranked_mirrors[conntime] = mirror
|
||||||
|
self.ranked_urls[conntime] = url
|
||||||
|
return(None)
|
||||||
|
|
||||||
|
def print(self):
|
||||||
|
if not self.ranked_mirrors:
|
||||||
|
self.speedcheck()
|
||||||
|
print('Mirrors in order of speed:\n')
|
||||||
|
for m in sorted(list(self.ranked_urls.keys())):
|
||||||
|
print('{0} # ({1} seconds to connect)'.format(self.ranked_urls[m], m))
|
||||||
|
return(None)
|
||||||
|
|
||||||
|
def gen_xml(self):
|
||||||
|
if not self.distro_name:
|
||||||
|
raise ValueError('This class must be subclassed to be useful')
|
||||||
|
if not self.ranked_mirrors:
|
||||||
|
self.speedcheck()
|
||||||
|
s = ('<?xml version="1.0" encoding="UTF-8" ?>'
|
||||||
|
'<mirror xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
|
||||||
|
'xmlns="https://git.square-r00t.net/RepoMirror/" '
|
||||||
|
'xsi:schemaLocation="https://git.square-r00t.net/RepoMirror/ '
|
||||||
|
'http://schema.xml.r00t2.io/projects/repomirror.xsd">'
|
||||||
|
'</mirror>')
|
||||||
|
xml = etree.fromstring(s.encode('utf-8'))
|
||||||
|
distro = etree.Element('distro')
|
||||||
|
distro.attrib['name'] = self.distro_name
|
||||||
|
for m in sorted(list(self.ranked_mirrors.keys())):
|
||||||
|
mirror = self.ranked_mirrors[m]
|
||||||
|
distro.append(etree.Comment(' ({0} seconds to connect) '.format(m)))
|
||||||
|
u = etree.SubElement(distro, 'upstream')
|
||||||
|
for k, v in mirror.items():
|
||||||
|
e = etree.SubElement(u, k)
|
||||||
|
e.text = str(v)
|
||||||
|
xml.append(distro)
|
||||||
|
return(etree.tostring(xml,
|
||||||
|
pretty_print = True,
|
||||||
|
with_comments = True,
|
||||||
|
with_tail = True,
|
||||||
|
encoding = 'UTF-8',
|
||||||
|
xml_declaration = True).decode('utf-8'))
|
||||||
|
@ -1 +1,5 @@
|
|||||||
MYINFO_URL = 'https://ipinfo.io'
|
MYINFO_URL = 'https://ipinfo.io'
|
||||||
|
DEF_PORTS = {'ftp': 21,
|
||||||
|
'http': 80,
|
||||||
|
'https': 443,
|
||||||
|
'rsync': 873}
|
||||||
|
@ -1,20 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
import requests
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
|
|
||||||
country = 'US'
|
|
||||||
url = 'https://www.archlinux.org/mirrors/status/tier/1/'
|
|
||||||
|
|
||||||
req = requests.get(url)
|
|
||||||
html = req.content.decode('utf-8')
|
|
||||||
bs = BeautifulSoup(html, 'lxml')
|
|
||||||
|
|
||||||
mirrors = bs.find('table', {'id': 'successful_mirrors'})
|
|
||||||
header = mirrors.find('thead').find('tr')
|
|
||||||
headers = [h.text if h.text != '' else 'details' for h in header.find_all('th')]
|
|
||||||
|
|
||||||
results = [{headers[i]: cell.text for i, cell in enumerate(row.find_all('td'))} for row in mirrors.find_all('tr')]
|
|
||||||
|
|
||||||
import pprint
|
|
||||||
pprint.pprint(results)
|
|
Loading…
Reference in New Issue
Block a user