about to change up a lot of stuff...
This commit is contained in:
parent
5526111743
commit
2ba79cd801
@ -6,57 +6,108 @@
|
|||||||
xmlns="https://git.square-r00t.net/RepoMirror/"
|
xmlns="https://git.square-r00t.net/RepoMirror/"
|
||||||
xsi:schemaLocation="https://git.square-r00t.net/RepoMirror/ http://schema.xml.r00t2.io/projects/repomirror.xsd">
|
xsi:schemaLocation="https://git.square-r00t.net/RepoMirror/ http://schema.xml.r00t2.io/projects/repomirror.xsd">
|
||||||
<distro name="arch">
|
<distro name="arch">
|
||||||
|
<!--
|
||||||
|
If provided (and the sync script is running as the root user), the files/directories can be chowned to the
|
||||||
|
provided user/group. Otherwise they'll be owned by whatever user the script is running as (and its primary group).
|
||||||
|
-->
|
||||||
|
<owner>
|
||||||
|
<user>root</user>
|
||||||
|
<group>root</group>
|
||||||
|
</owner>
|
||||||
<!--
|
<!--
|
||||||
The local path to where the hierarchy/files should be synced to.
|
The local path to where the hierarchy/files should be synced to.
|
||||||
-->
|
-->
|
||||||
<dest>/srv/repos/arch/.</dest>
|
<dest>/srv/repos/arch/.</dest>
|
||||||
<!--
|
<!--
|
||||||
The local file to update with a timestamp with the last time we checked for updates.
|
The local file to update with a timestamp with the last time we *checked* for updates.
|
||||||
|
If not provided, don't update a file (NOT recommended!).
|
||||||
|
It may or may not be optional; check with the spec for mirroring for the specified distro.
|
||||||
|
If the timeFormat attribute is provided, write the timestamp format in the specified format.
|
||||||
|
See the following for details:
|
||||||
|
* https://docs.python.org/library/datetime.html#strftime-and-strptime-format-codes
|
||||||
|
* https://strftime.org/
|
||||||
|
The default is to use a regular UNIX Epoch integer (e.g. June 13, 2020 5:03:53 PM UTC => 1592067833).
|
||||||
|
This can be manually specified by the special string "UNIX_EPOCH".
|
||||||
|
Optionally, you can use the special string "MICROSECOND_EPOCH", which will specify the above with microseconds.
|
||||||
|
e.g. June 13, 2020 5:09:13.995777 PM UTC => 1592068153.995777
|
||||||
-->
|
-->
|
||||||
<lastLocalCheck>/srv/http/arch.lastcheck</lastLocalCheck>
|
<lastLocalCheck timeFormat="MICROSECOND_EPOCH">/srv/http/arch.lastcheck</lastLocalCheck>
|
||||||
<!--
|
<!--
|
||||||
The file to update with a timestamp with the last time we synced from our upstream.
|
The file to update with a timestamp with the last time we *synced from our upstream*.
|
||||||
|
If not provided, don't update a file (NOT recommended!).
|
||||||
|
It may or may not be optional; check with the spec for mirroring for the specified distro.
|
||||||
|
If not provided, don't update a file (NOT recommended!).
|
||||||
|
It takes the same optional attribute "timeFormat" as above, with the same behaviour.
|
||||||
-->
|
-->
|
||||||
<lastLocalSync>/srv/http/arch.lastsync</lastLocalSync>
|
<lastLocalSync timeFormat="UNIX_EPOCH">/srv/repos/arch/lastsync</lastLocalSync>
|
||||||
<!--
|
<!--
|
||||||
The path to a file on the upstream(s) that gives a time when it last updated.
|
The path to a file on the upstream(s) that gives a time when it last updated.
|
||||||
|
The optional timeFormat attribute behavior is the same as above.
|
||||||
|
If neither this nor lastRemoteSync is provided, a sync will be attempted regardless of when the last one was
|
||||||
|
attempted.
|
||||||
-->
|
-->
|
||||||
<lastRemoteUpdate>/lastupdate</lastRemoteUpdate>
|
<lastRemoteUpdate timeFormat="UNIX_EPOCH">/lastupdate</lastRemoteUpdate>
|
||||||
<!--
|
<!--
|
||||||
The path to a file on the upstream(s) that gives a time when it last synced from its upstream.
|
The path to a file on the upstream(s) that gives a time when it last synced from its upstream.
|
||||||
|
The optional timeFormat attribute behavior is the same as above.
|
||||||
|
If neither this nor lastRemoteUpdate is provided, a sync will be attempted regardless of when the last one was
|
||||||
|
attempted.
|
||||||
-->
|
-->
|
||||||
<lastRemoteSync>/lastsync</lastRemoteSync>
|
<lastRemoteSync timeFormat="UNIX_EPOCH">/lastsync</lastRemoteSync>
|
||||||
<!--
|
<!--
|
||||||
The path that must be currently mounted for sync to proceed.
|
The path that must be currently mounted for sync to proceed.
|
||||||
|
This is required.
|
||||||
-->
|
-->
|
||||||
<mountCheck>/</mountCheck>
|
<mountCheck>/</mountCheck>
|
||||||
<!--
|
<!--
|
||||||
The speed to cap socket bandwidth at (in KiB). Decimals are okay.
|
You cannot reliably use two dashes in XML strings, so this is a workaround.
|
||||||
|
The following is only used for rsync upstreams and is optional. The default is just archive and delete-after.
|
||||||
|
If arguments are provided, the defaults are overwritten so if you need the above, be sure to specify them.
|
||||||
|
See the rsync man page (rsync(1)) for more details and a listing of supported flags on your system.
|
||||||
-->
|
-->
|
||||||
<bwlimit>7000</bwlimit>
|
<rsyncArgs>
|
||||||
|
<!--
|
||||||
|
A "long" option (two hyphens).
|
||||||
|
-->
|
||||||
|
<long>archive</long>
|
||||||
|
<long>delete-after</long>
|
||||||
|
<!--
|
||||||
|
An argument with a value (info=2).
|
||||||
|
-->
|
||||||
|
<long value="2">info</long>
|
||||||
|
<!--
|
||||||
|
A "short" option (single hyphen).
|
||||||
|
-->
|
||||||
|
<short>c</short><!-- checksum -->
|
||||||
|
</rsyncArgs>
|
||||||
<upstream>
|
<upstream>
|
||||||
<!--
|
<!--
|
||||||
The following example uses "rsync://arch.mirror.constant.com/archlinux/"
|
The following example uses "rsync://arch.mirror.constant.com/archlinux/"
|
||||||
(https://www.archlinux.org/mirrors/constant.com/1008/)
|
(https://www.archlinux.org/mirrors/constant.com/1008/)
|
||||||
-->
|
-->
|
||||||
<!--
|
<!--
|
||||||
One of:
|
Required; one of:
|
||||||
* rsync
|
* rsync
|
||||||
* ftp
|
* ftp
|
||||||
-->
|
-->
|
||||||
<syncType>rsync</syncType>
|
<syncType>rsync</syncType>
|
||||||
<!--
|
<!--
|
||||||
ONLY the domain goes here.
|
Required; ONLY the domain goes here.
|
||||||
-->
|
-->
|
||||||
<domain>arch.mirror.constant.com</domain>
|
<domain>arch.mirror.constant.com</domain>
|
||||||
<!--
|
<!--
|
||||||
If not specified,the protocol's default port will be used.
|
Optional; if not specified,the protocol's default port will be used.
|
||||||
-->
|
-->
|
||||||
<port>873</port>
|
<port>873</port>
|
||||||
<!--
|
<!--
|
||||||
The *remote* path part of the URI. The leading / is necessary. A trailing one will be assumed.
|
Required; the *remote* path part of the URI. The leading / is necessary. A trailing one will be assumed.
|
||||||
-->
|
-->
|
||||||
<path>/archlinux/</path>
|
<path>/archlinux/</path>
|
||||||
|
<!--
|
||||||
|
The speed to cap socket bandwidth at (in KiB). Decimals are okay.
|
||||||
|
Only valid for rsync; ignored for FTP. If not provided, the default is to not throttle.
|
||||||
|
-->
|
||||||
|
<bwlimit>7000</bwlimit>
|
||||||
</upstream>
|
</upstream>
|
||||||
<!--
|
<!--
|
||||||
Multiple upstreams can be specified. They are tried in order specified and if connection fails or times out,
|
Multiple upstreams can be specified. They are tried in order specified and if connection fails or times out,
|
||||||
@ -77,5 +128,14 @@
|
|||||||
<path>/distros/archlinux/</path>
|
<path>/distros/archlinux/</path>
|
||||||
</upstream>
|
</upstream>
|
||||||
</distro>
|
</distro>
|
||||||
<distro name="centos"/>
|
<distro name="centos">
|
||||||
|
<upstream>
|
||||||
|
<syncType>rsync</syncType>
|
||||||
|
<domain>mirrors.rit.edu</domain>
|
||||||
|
<path>/centos/</path>
|
||||||
|
</upstream>
|
||||||
|
<dest>/srv/repos/arch/.</dest>
|
||||||
|
<lastLocalCheck timeFormat="MICROSECOND_EPOCH">/srv/http/centos.lastcheck</lastLocalCheck>
|
||||||
|
<lastLocalSync timeFormat="UNIX_EPOCH">/srv/repos/arch/lastsync</lastLocalSync>
|
||||||
|
</distro>
|
||||||
</mirror>
|
</mirror>
|
||||||
|
59
utils/find_fastest_upstream/archlinux.py
Executable file
59
utils/find_fastest_upstream/archlinux.py
Executable file
@ -0,0 +1,59 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import re
|
||||||
|
##
|
||||||
|
import iso3166
|
||||||
|
##
|
||||||
|
import classes
|
||||||
|
|
||||||
|
|
||||||
|
_strip_re = re.compile(r'^\s*(?P<num>[0-9.]+).*$')
|
||||||
|
|
||||||
|
|
||||||
|
class Ranker(classes.Ranker):
|
||||||
|
mirrorlist_url = 'https://www.archlinux.org/mirrors/status/tier/1/'
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.get_mirrors()
|
||||||
|
self.mycountry = iso3166.countries_by_alpha2[self.my_info['country']].name
|
||||||
|
|
||||||
|
def extract_mirrors(self):
|
||||||
|
# Limit to only successful mirrors.
|
||||||
|
mirrors = self.bs.find('table', {'id': 'successful_mirrors'})
|
||||||
|
# Ayyy, thanks dude.
|
||||||
|
# Modified from https://stackoverflow.com/a/56835562/733214.
|
||||||
|
header = mirrors.find('thead').find('tr')
|
||||||
|
headers = [h.text if h.text != '' else 'details' for h in header.find_all('th')]
|
||||||
|
raw_rows = mirrors.find_all('tr')
|
||||||
|
# rows = [{headers[i]: cell.text for i, cell in enumerate(row.find_all('td'))} for row in raw_rows]
|
||||||
|
rows = [{headers[i]: cell for i, cell in enumerate(row.find_all('td'))} for row in raw_rows]
|
||||||
|
for r in rows:
|
||||||
|
for k, v in r.items():
|
||||||
|
print(v)
|
||||||
|
if k in ('Completion %', 'Mirror Score', 'μ Duration (s)', 'σ Duration (s)'):
|
||||||
|
r[k] = float(_strip_re.sub(r'\g<num>', v.text).strip())
|
||||||
|
elif k == 'μ Delay (hh:mm)':
|
||||||
|
# HOO boy. Wish they just did it in seconds.
|
||||||
|
# elif k == 'Country':
|
||||||
|
self.raw_mirrors.append(r)
|
||||||
|
# for row in rows:
|
||||||
|
# if not row:
|
||||||
|
# continue
|
||||||
|
# for k in ('Completion %', 'Mirror Score', 'μ Duration (s)', 'σ Duration (s)'):
|
||||||
|
# row[k] = float(_strip_re.sub(r'\g<num>', row[k]).strip())
|
||||||
|
|
||||||
|
return(None)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
r = Ranker()
|
||||||
|
r.extract_mirrors()
|
||||||
|
import pprint
|
||||||
|
pprint.pprint(r.raw_mirrors)
|
||||||
|
return(None)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
0
utils/find_fastest_upstream/centos.py
Executable file
0
utils/find_fastest_upstream/centos.py
Executable file
39
utils/find_fastest_upstream/classes.py
Normal file
39
utils/find_fastest_upstream/classes.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import socket
|
||||||
|
import time
|
||||||
|
##
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
##
|
||||||
|
import constants
|
||||||
|
|
||||||
|
|
||||||
|
class Ranker(object):
|
||||||
|
mirrorlist_url = None # This is replaced by subclasses
|
||||||
|
|
||||||
|
def __init__(self, parser = 'lxml', *args, **kwargs):
|
||||||
|
self.my_info = {}
|
||||||
|
self.raw_html = None
|
||||||
|
self.parser = parser
|
||||||
|
self.bs = None
|
||||||
|
self.get_myinfo()
|
||||||
|
self.raw_mirrors = []
|
||||||
|
|
||||||
|
def extract_mirrors(self):
|
||||||
|
# A dummy func. This should be overridden by subclasses.
|
||||||
|
pass
|
||||||
|
return(None)
|
||||||
|
|
||||||
|
def get_myinfo(self):
|
||||||
|
req = requests.get(constants.MYINFO_URL)
|
||||||
|
if not req.ok:
|
||||||
|
raise RuntimeError('Could not contact information gatherer')
|
||||||
|
self.my_info = req.json()
|
||||||
|
return(None)
|
||||||
|
|
||||||
|
def get_mirrors(self):
|
||||||
|
req = requests.get(self.mirrorlist_url)
|
||||||
|
if not req.ok:
|
||||||
|
raise RuntimeError('Could not contact information gatherer')
|
||||||
|
self.raw_html = req.content.decode('utf-8')
|
||||||
|
self.bs = BeautifulSoup(self.raw_html, self.parser)
|
||||||
|
return(None)
|
1
utils/find_fastest_upstream/constants.py
Normal file
1
utils/find_fastest_upstream/constants.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
MYINFO_URL = 'https://ipinfo.io'
|
20
utils/find_fastest_upstream/test.py
Executable file
20
utils/find_fastest_upstream/test.py
Executable file
@ -0,0 +1,20 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
country = 'US'
|
||||||
|
url = 'https://www.archlinux.org/mirrors/status/tier/1/'
|
||||||
|
|
||||||
|
req = requests.get(url)
|
||||||
|
html = req.content.decode('utf-8')
|
||||||
|
bs = BeautifulSoup(html, 'lxml')
|
||||||
|
|
||||||
|
mirrors = bs.find('table', {'id': 'successful_mirrors'})
|
||||||
|
header = mirrors.find('thead').find('tr')
|
||||||
|
headers = [h.text if h.text != '' else 'details' for h in header.find_all('th')]
|
||||||
|
|
||||||
|
results = [{headers[i]: cell.text for i, cell in enumerate(row.find_all('td'))} for row in mirrors.find_all('tr')]
|
||||||
|
|
||||||
|
import pprint
|
||||||
|
pprint.pprint(results)
|
Loading…
Reference in New Issue
Block a user