Compare commits
16 Commits
Author | SHA1 | Date |
---|---|---|
brent s. | b0d4332975 | |
brent s. | 649b2968b8 | |
brent s. | cd29b8d13f | |
brent s. | 1c8f4da1b1 | |
brent s. | 8a10f485f5 | |
brent s. | cddaa4bcc5 | |
brent s. | 8263de6875 | |
brent s. | 712dc8e4c7 | |
brent s. | 18024ba61a | |
brent s | 3676acfc00 | |
brent s. | a2e9075671 | |
brent s. | 803fb7c5fa | |
brent s | d751d14be4 | |
brent s | bc9a653234 | |
brent s | 4bdb31541b | |
brent s. | a3203ab03a |
1
README
1
README
|
@ -10,7 +10,6 @@ Features:
|
|||
* Changing of directory/file ownership per-distribution
|
||||
* Mount-checking per-distribution - a distribution will be skipped if its designated mountpoint is not mounted
|
||||
* Synchronization checks - timestamp files can be read and written and are used to determine if a sync should take place or not
|
||||
** TODO: customizable "staleness" of repos (e.g. sync if timestamp X is older than timestamp Y on server)
|
||||
|
||||
|
||||
Configuration/Deployment:
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
- add option to check for last update (local and remote) and quit
|
||||
- need to re-tool update checking - if local update and remote update, compare those two rather than syncs to updates
|
||||
- fix rsyncIgnores not being parsed?
|
|
@ -2,6 +2,7 @@
|
|||
<!--
|
||||
This is by default placed in ~/.config/repomirror.xml
|
||||
Remember to replace any necessary special characters (https://stackoverflow.com/a/1091953/733214).
|
||||
Also, your mirror server should definitely be using UTC. You're going to see a LOT of weirdness if not.
|
||||
-->
|
||||
<mirror xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns="https://git.square-r00t.net/RepoMirror/"
|
||||
|
@ -43,18 +44,22 @@
|
|||
<lastLocalSync timeFormat="UNIX_EPOCH">/srv/repos/arch/lastsync</lastLocalSync>
|
||||
<!--
|
||||
The path to a file on the upstream(s) that gives a time when it last updated.
|
||||
The optional timeFormat attribute behavior is the same as above.
|
||||
The syntax and options are the same as lastLocalCheck/lastLocalSync.
|
||||
If neither this nor lastRemoteSync is provided, a sync will be attempted regardless of when the last one was
|
||||
attempted.
|
||||
-->
|
||||
<!--
|
||||
Remote timestamps take an additional optional boolean attribute, "mtime". If true, the mtime of the remote file
|
||||
will be checked instead of the content of the file (and thus timeFormat is ignored).
|
||||
-->
|
||||
<lastRemoteUpdate timeFormat="UNIX_EPOCH">/lastupdate</lastRemoteUpdate>
|
||||
<!--
|
||||
The path to a file on the upstream(s) that gives a time when it last synced from its upstream.
|
||||
The optional timeFormat attribute behavior is the same as above.
|
||||
The syntax and options are the same as lastRemoteCheck.
|
||||
If neither this nor lastRemoteUpdate is provided, a sync will be attempted regardless of when the last one was
|
||||
attempted.
|
||||
attempted. It follows the same rules as lastRemoteUpdate for syntax.
|
||||
-->
|
||||
<lastRemoteSync timeFormat="UNIX_EPOCH">/lastsync</lastRemoteSync>
|
||||
<lastRemoteSync mtime="true" timeFormat="UNIX_EPOCH">/lastsync</lastRemoteSync>
|
||||
<!--
|
||||
The path that must be currently mounted for sync to proceed.
|
||||
This is required.
|
||||
|
@ -69,16 +74,15 @@
|
|||
<long>links</long>
|
||||
<long>hard-links</long>
|
||||
<long>delete-after</long>
|
||||
<long>perms</long>
|
||||
<long>delay-updates</long>
|
||||
<long>copy-links</long>
|
||||
<long>safe-links</long>
|
||||
<long>delete-excluded</long>
|
||||
<long value=".*">exclude</long>
|
||||
</rsyncArgs>
|
||||
These arguments should be sane for most, if not all, rsync-driven repository mirroring. The last one (exclude) may
|
||||
be removed in future versions.
|
||||
These arguments should be sane for most, if not all, rsync-driven repository mirroring.
|
||||
If arguments are provided, the defaults are overwritten so if you need the above, be sure to specify them.
|
||||
See the rsync man page (rsync(1)) for more details and a listing of supported flags on your system.
|
||||
See the rsync man page (rsync(1)) for more details and a listing of supported flags on your system
|
||||
(§ "OPTION SUMMARY", § "OPTIONS").
|
||||
-->
|
||||
<rsyncArgs>
|
||||
<!--
|
||||
|
@ -94,8 +98,34 @@
|
|||
A "short" option (single hyphen).
|
||||
-->
|
||||
<short>c</short><!-- checksum -->
|
||||
<!--
|
||||
An option that requires to be enclosed in quotes. (This one excludes hidden files/directories.)
|
||||
-->
|
||||
<long value="".*"">exclude</long>
|
||||
</rsyncArgs>
|
||||
<upstream>
|
||||
<!--
|
||||
rsyncIgnore is an optional element that ignores certain return codes/exit statuses of rsync (otherwise they are
|
||||
raised as warnings). It consists of a space-separated list of return codes that warnings should be suppressed for.
|
||||
(Return code 0 never raises a warning, as it is success.)
|
||||
See the rsync man page (rsync(1)) for a list of error codes and what they mean (§ "EXIT VALUES"), or refer to:
|
||||
repomirror/fetcher/rsync_returns.py
|
||||
-->
|
||||
<rsyncIgnore returns="23 24"/>
|
||||
<!--
|
||||
Upstreams have an optional attribute, "delayCheck", which is an ISO 8601 duration type.
|
||||
https://en.wikipedia.org/wiki/ISO_8601#Durations
|
||||
e.g. 5 minutes would be P0Y0M0DT0H5M0S or just PT5M.
|
||||
It is used to determine if your upstream is "out of date" (e.g. will be skipped if its last check date is older
|
||||
than the specified amount of time). Obviously this is only checked if you have a specified lastRemoteUpdate value.
|
||||
-->
|
||||
<!--
|
||||
You can optionally specify an offset via the "offset" attribute in the same format as "delayCheck" if your
|
||||
upstream's remote files are using a different timezone instead of UTC.
|
||||
e.g.:
|
||||
* If your upstream uses UTC-4 for its timestamp files, you would use "-PT4H".
|
||||
* If your upstream uses UTC+6 for its timestamp files, you would use either "+PT6H" or just "PT6H".
|
||||
-->
|
||||
<upstream delayCheck="P0Y0M2DT0H0M0S" offset="-PT0S">
|
||||
<!--
|
||||
The following example uses "rsync://arch.mirror.constant.com/archlinux/"
|
||||
(https://www.archlinux.org/mirrors/constant.com/1008/)
|
||||
|
|
|
@ -1,15 +1,27 @@
|
|||
PROTO_DEF_PORTS = {'ftp': 21,
|
||||
'rsync': 873}
|
||||
|
||||
RSYNC_DEF_ARGS = ['--recursive',
|
||||
'--times',
|
||||
'--links',
|
||||
'--hard-links',
|
||||
'--delete-after',
|
||||
'--perms',
|
||||
'--delay-updates',
|
||||
'--copy-links',
|
||||
'--safe-links',
|
||||
'--delete-excluded',
|
||||
'--exclude=.*']
|
||||
# How many days an upstream should have last synced by before it's considered stale.
|
||||
DAYS_WARN = 2
|
||||
VERSION = '1.0.3'
|
||||
'--exclude=".*"']
|
||||
|
||||
# These are needed to convert years/months to timedeltas.
|
||||
# The following are averaged definitions for time units *in days* according to Google Calculator.
|
||||
YEAR = 365.2422
|
||||
MONTH = 30.4167
|
||||
# The following are approximations based on ISO 8601 defintions *in days*.
|
||||
# https://webspace.science.uu.nl/~gent0113/calendar/isocalendar.htm
|
||||
# YEAR = 365.25
|
||||
# MONTH = 30.6
|
||||
|
||||
# We no longer do this by default.
|
||||
# # How many days an upstream should have last synced by before it's considered stale.
|
||||
# DAYS_WARN = 2
|
||||
VERSION = '1.1.0'
|
||||
|
|
|
@ -9,11 +9,23 @@ _logger = logging.getLogger()
|
|||
class BaseFetcher(object):
|
||||
type = None
|
||||
|
||||
def __init__(self, domain, port, path, dest, owner = None, filechecks = None, *args, **kwargs):
|
||||
def __init__(self,
|
||||
domain,
|
||||
port,
|
||||
path,
|
||||
dest,
|
||||
owner = None,
|
||||
filechecks = None,
|
||||
mtime = False,
|
||||
offset = None,
|
||||
*args,
|
||||
**kwargs):
|
||||
self.domain = domain
|
||||
self.port = int(port)
|
||||
self.path = path
|
||||
self.dest = os.path.abspath(os.path.expanduser(dest))
|
||||
self.mtime = mtime
|
||||
self.offset = offset
|
||||
self.url = '{0}://{1}:{2}/{3}'.format(self.type, self.domain, self.port, self.path.lstrip('/'))
|
||||
self.owner = owner
|
||||
self.filechecks = filechecks
|
||||
|
@ -25,15 +37,33 @@ class BaseFetcher(object):
|
|||
def check(self):
|
||||
for k, v in self.filechecks['remote'].items():
|
||||
if v:
|
||||
tstmp_raw = self.fetch_content(v.path).decode('utf-8').strip()
|
||||
if '%s' in v.fmt:
|
||||
tstmp = datetime.datetime.fromtimestamp(float(tstmp_raw))
|
||||
if self.mtime:
|
||||
tstmp = self.fetch_content(v.path, mtime_only = True)
|
||||
if not isinstance(tstmp, datetime.datetime):
|
||||
self.timestamps[k] = None
|
||||
continue
|
||||
else:
|
||||
self.timestamps[k] = datetime.datetime.fromtimestamp(float(tstmp))
|
||||
else:
|
||||
tstmp = datetime.datetime.strptime(tstmp_raw, v.fmt)
|
||||
self.timestamps[k] = tstmp
|
||||
tstmp_raw = self.fetch_content(v.path).decode('utf-8').strip()
|
||||
if tstmp_raw == '':
|
||||
self.timestamps[k] = None
|
||||
continue
|
||||
if '%s' in v.fmt:
|
||||
tstmp = datetime.datetime.fromtimestamp(float(tstmp_raw))
|
||||
else:
|
||||
tstmp = datetime.datetime.strptime(tstmp_raw, v.fmt)
|
||||
self.timestamps[k] = tstmp
|
||||
if self.offset:
|
||||
newval = None
|
||||
if self.offset.mod == '+' or not self.offset.mod:
|
||||
newval = self.timestamps[k] + self.offset.offset
|
||||
elif self.offset.mod == '-':
|
||||
newval = self.timestamps[k] - self.offset.offset
|
||||
self.timestamps[k] = newval
|
||||
_logger.debug('Updated upstream timestamps: {0}'.format(self.timestamps))
|
||||
return(None)
|
||||
|
||||
def fetch_content(self, path):
|
||||
def fetch_content(self, path, mtime_only = False):
|
||||
# Dummy func.
|
||||
return(b'')
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import datetime
|
||||
import ftplib
|
||||
import logging
|
||||
import io
|
||||
|
@ -13,8 +14,8 @@ _logger = logging.getLogger()
|
|||
class FTP(_base.BaseFetcher):
|
||||
type = 'ftp'
|
||||
|
||||
def __init__(self, domain, port, path, dest, owner = None, *args, **kwargs):
|
||||
super().__init__(domain, port, path, dest, owner = owner, *args, **kwargs)
|
||||
def __init__(self, domain, port, path, dest, owner = None, mtime = False, offset = None, *args, **kwargs):
|
||||
super().__init__(domain, port, path, dest, owner = owner, mtime = mtime, offset = offset, *args, **kwargs)
|
||||
_logger.debug('Instantiated FTP fetcher')
|
||||
self.handler = ftplib.FTP(self.domain)
|
||||
_logger.debug('Configured handler for {0}'.format(self.domain))
|
||||
|
@ -31,7 +32,7 @@ class FTP(_base.BaseFetcher):
|
|||
|
||||
def _disconnect(self):
|
||||
if self.connected:
|
||||
self.handler.quit()
|
||||
self.handler.close()
|
||||
_logger.debug('Disconnected from {0}:{1} as Anonymous'.format(self.domain, self.port))
|
||||
self.connected = False
|
||||
return(None)
|
||||
|
@ -90,13 +91,23 @@ class FTP(_base.BaseFetcher):
|
|||
self._disconnect()
|
||||
return(None)
|
||||
|
||||
def fetch_content(self, remote_filepath):
|
||||
def fetch_content(self, remote_filepath, mtime_only = False):
|
||||
self._connect()
|
||||
buf = io.BytesIO()
|
||||
self.handler.retrbinary('RETR {0}'.format(remote_filepath), buf.write)
|
||||
if mtime_only:
|
||||
directory, file = os.path.split(remote_filepath)
|
||||
parent = '/{0}'.format(directory.lstrip('/'))
|
||||
meta = self.handler.mlsd(parent)
|
||||
file_info = dict(meta)[file]
|
||||
tstmp = file_info['modify']
|
||||
content = datetime.datetime.strptime(tstmp, '%Y%m%d%H%M%S')
|
||||
else:
|
||||
buf = io.BytesIO()
|
||||
self.handler.retrbinary('RETR {0}'.format(remote_filepath), buf.write)
|
||||
self._disconnect()
|
||||
buf.seek(0, 0)
|
||||
content = buf.read()
|
||||
self._disconnect()
|
||||
buf.seek(0, 0)
|
||||
return(buf.read())
|
||||
return(content)
|
||||
|
||||
def fetch_dir(self, pathspec):
|
||||
self._connect()
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
|
@ -25,18 +26,31 @@ class RSync(_base.BaseFetcher):
|
|||
path,
|
||||
dest,
|
||||
rsync_args = None,
|
||||
rsync_ignores = None,
|
||||
owner = None,
|
||||
log = True,
|
||||
filechecks = None,
|
||||
offset = None,
|
||||
mtime = False,
|
||||
*args,
|
||||
**kwargs):
|
||||
super().__init__(domain, port, path, dest, owner = owner, filechecks = filechecks, *args, **kwargs)
|
||||
super().__init__(domain,
|
||||
port,
|
||||
path,
|
||||
dest,
|
||||
owner = owner,
|
||||
filechecks = filechecks,
|
||||
offset = offset,
|
||||
mtime = mtime
|
||||
*args,
|
||||
**kwargs)
|
||||
_logger.debug('Instantiated RSync fetcher')
|
||||
if rsync_args:
|
||||
self.rsync_args = rsync_args.args[:]
|
||||
else:
|
||||
self.rsync_args = constants.RSYNC_DEF_ARGS[:]
|
||||
_logger.debug('RSync args given: {0}'.format(self.rsync_args))
|
||||
self.rsync_ignores = rsync_ignores[:]
|
||||
if log:
|
||||
# Do I want to do this in subprocess + logging module? Or keep this?
|
||||
# It looks a little ugly in the log but it makes more sense than doing it via subprocess just to write it
|
||||
|
@ -68,21 +82,33 @@ class RSync(_base.BaseFetcher):
|
|||
stderr = subprocess.PIPE)
|
||||
stdout = cmd.stdout.decode('utf-8').strip()
|
||||
stderr = cmd.stderr.decode('utf-8').strip()
|
||||
rtrn = cmd.returncode
|
||||
if stdout != '':
|
||||
_logger.debug('STDOUT: {0}'.format(stdout))
|
||||
if stderr != '' or cmd.returncode != 0:
|
||||
rtrn = cmd.returncode
|
||||
if rtrn != 0 and rtrn not in self.rsync_ignores:
|
||||
err = rsync_returns.returns[rtrn]
|
||||
_logger.error(('Rsync to {0}:{1} returned exit status {2}: {3}').format(self.domain, self.port, rtrn, err))
|
||||
_logger.debug('STDERR: {0}'.format(stderr))
|
||||
warnings.warn('Rsync process returned non-zero {0} ({1}) for {2}'.format(rtrn, err, ' '.join(cmd_str)))
|
||||
errmsg = 'Rsync to {0}:{1} returned'.format(self.domain, self.port)
|
||||
debugmsg = 'Rsync command {0} returned'.format(' '.join(cmd_str))
|
||||
if stderr != '':
|
||||
errmsg += ' an error message: {0}'.format(stderr)
|
||||
debugmsg += ' an error message: {0}'.format(stderr)
|
||||
if rtrn != 0:
|
||||
errmsg += ' with exit status {0} ({1})'.format(rtrn, err)
|
||||
debugmsg += ' with exit status {0} ({1})'.format(rtrn, err)
|
||||
errmsg += '.'
|
||||
_logger.error(errmsg)
|
||||
_logger.debug(debugmsg)
|
||||
warnings.warn(errmsg)
|
||||
return(None)
|
||||
|
||||
def fetch_content(self, remote_filepath):
|
||||
def fetch_content(self, remote_filepath, mtime_only = False):
|
||||
tf = tempfile.mkstemp()[1]
|
||||
url = os.path.join(self.url.rstrip('/'),remote_filepath.lstrip('/'))
|
||||
url = os.path.join(self.url.rstrip('/'), remote_filepath.lstrip('/'))
|
||||
rsync_args = self.rsync_args[:]
|
||||
if mtime_only and not any((('--times' in rsync_args), ('-t' in rsync_args))):
|
||||
rsync_args.insert(0, '--times')
|
||||
cmd_str = ['rsync',
|
||||
*self.rsync_args,
|
||||
*rsync_args,
|
||||
url,
|
||||
tf]
|
||||
_logger.debug('Running command: {0}'.format(' '.join(cmd_str)))
|
||||
|
@ -91,16 +117,29 @@ class RSync(_base.BaseFetcher):
|
|||
stderr = subprocess.PIPE)
|
||||
stdout = cmd.stdout.decode('utf-8').strip()
|
||||
stderr = cmd.stderr.decode('utf-8').strip()
|
||||
rtrn = cmd.returncode
|
||||
if stdout != '':
|
||||
_logger.debug('STDOUT: {0}'.format(stdout))
|
||||
if stderr != '' or cmd.returncode != 0:
|
||||
rtrn = cmd.returncode
|
||||
err = rsync_returns.returns[rtrn]
|
||||
_logger.error(('Rsync to {0}:{1} returned exit status {2}: {3}').format(self.domain, self.port, rtrn, err))
|
||||
_logger.debug('STDERR: {0}'.format(stderr))
|
||||
warnings.warn('Rsync process returned non-zero {0} ({1}) for {2}'.format(rtrn, err, ' '.join(cmd_str)))
|
||||
with open(tf, 'rb') as fh:
|
||||
raw_content = fh.read()
|
||||
if rtrn != 0 and rtrn not in self.rsync_ignores:
|
||||
err = rsync_returns.returns.get(rtrn, '(UNKNOWN ERROR)')
|
||||
errmsg = 'Rsync to {0}:{1} returned'.format(self.domain, self.port)
|
||||
debugmsg = 'Rsync command {0} returned'.format(' '.join(cmd_str))
|
||||
if stderr != '':
|
||||
errmsg += ' an error message: {0}'.format(stderr)
|
||||
debugmsg += ' an error message: {0}'.format(stderr)
|
||||
if rtrn != 0:
|
||||
errmsg += ' with exit status {0} ({1})'.format(rtrn, err)
|
||||
debugmsg += ' with exit status {0} ({1})'.format(rtrn, err)
|
||||
errmsg += '.'
|
||||
_logger.error(errmsg)
|
||||
_logger.debug(debugmsg)
|
||||
warnings.warn(errmsg)
|
||||
return(b'')
|
||||
if mtime_only:
|
||||
raw_content = datetime.datetime.fromtimestamp(os.stat(tf).st_mtime)
|
||||
else:
|
||||
with open(tf, 'rb') as fh:
|
||||
raw_content = fh.read()
|
||||
os.remove(tf)
|
||||
return(raw_content)
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@ import logging
|
|||
import pwd
|
||||
import grp
|
||||
import os
|
||||
import re
|
||||
import socket
|
||||
import sys
|
||||
import warnings
|
||||
|
@ -23,6 +24,16 @@ if os.isatty(sys.stdin.fileno()):
|
|||
else:
|
||||
_is_cron = True
|
||||
|
||||
_duration_re = re.compile(('^(?P<mod>[-+])?P'
|
||||
'((?P<years>[0-9]+(\.[0-9]+)?)Y)?'
|
||||
'((?P<months>[0-9]+(\.[0-9]+)?)M)?'
|
||||
'((?P<days>[0-9]+(\.[0-9]+)?)D)?'
|
||||
'T?'
|
||||
'((?P<hours>[0-9]+(\.[0-9]+)?)H)?'
|
||||
'((?P<minutes>[0-9]+(\.[0-9]+)?)M)?'
|
||||
'((?P<seconds>[0-9]+(\.[0-9]+)?)S)?'
|
||||
'$'))
|
||||
|
||||
|
||||
def get_owner(owner_xml):
|
||||
owner = {}
|
||||
|
@ -46,6 +57,19 @@ def get_owner(owner_xml):
|
|||
return(owner)
|
||||
|
||||
|
||||
def get_duration(duration_str):
|
||||
r = _duration_re.search(duration_str)
|
||||
times = {k: (float(v) if v else 0.0) for k, v in r.groupdict().items()}
|
||||
mod = times.pop('mod')
|
||||
if not mod:
|
||||
mod = '+'
|
||||
years = float(times.pop('years'))
|
||||
months = float(times.pop('months'))
|
||||
times['days'] = (times['days'] + (years * constants.YEAR) + (months * constants.MONTH))
|
||||
delay = datetime.timedelta(**times)
|
||||
return((mod, delay))
|
||||
|
||||
|
||||
class Args(object):
|
||||
def __init__(self, args_xml):
|
||||
self.xml = args_xml
|
||||
|
@ -91,14 +115,22 @@ class Mount(object):
|
|||
return(None)
|
||||
|
||||
|
||||
class TimeOffset(object):
|
||||
def __init__(self, duration_str):
|
||||
self.mod, self.offset = get_duration(duration_str)
|
||||
|
||||
|
||||
class TimestampFile(object):
|
||||
def __init__(self, ts_xml, owner_xml = None):
|
||||
self.xml = ts_xml
|
||||
self.fmt = ts_xml.attrib.get('timeFormat', 'UNIX_EPOCH')
|
||||
if self.fmt == 'UNIX_EPOCH':
|
||||
self.fmt = '%s'
|
||||
elif self.fmt == 'MICROSECOND_EPOCH':
|
||||
self.fmt = '%s.%f'
|
||||
_logger.debug('Set timestamp format string to {0}'.format(self.fmt))
|
||||
self.mtime = (True if self.xml.attrib.get('mtime', 'false').lower().startswith(('t', '1')) else False)
|
||||
_logger.debug('Using mtime: {0}'.format(self.mtime))
|
||||
self.owner_xml = owner_xml
|
||||
self.owner = {}
|
||||
if self.owner_xml is not None:
|
||||
|
@ -115,13 +147,16 @@ class TimestampFile(object):
|
|||
else:
|
||||
path = self.path
|
||||
if os.path.isfile(path):
|
||||
with open(path, 'r') as fh:
|
||||
ts_raw = fh.read().strip()
|
||||
if '%s' in self.fmt:
|
||||
timestamp = datetime.datetime.fromtimestamp(float(ts_raw))
|
||||
if self.mtime:
|
||||
timestamp = datetime.datetime.fromtimestamp(float(os.stat(path).st_mtime))
|
||||
else:
|
||||
timestamp = datetime.datetime.strptime(ts_raw, self.fmt)
|
||||
_logger.debug('Read timestamp {0} from {1}'.format(str(timestamp), self.path))
|
||||
with open(path, 'r') as fh:
|
||||
ts_raw = fh.read().strip()
|
||||
if '%s' in self.fmt:
|
||||
timestamp = datetime.datetime.fromtimestamp(float(ts_raw))
|
||||
else:
|
||||
timestamp = datetime.datetime.strptime(ts_raw, self.fmt)
|
||||
_logger.debug('Read timestamp {0} from {1}'.format(str(timestamp), self.path))
|
||||
return(timestamp)
|
||||
|
||||
def write(self):
|
||||
|
@ -137,20 +172,27 @@ class TimestampFile(object):
|
|||
os.chmod(self.path, mode = 0o0644)
|
||||
if self.owner:
|
||||
os.chown(self.path, **self.owner)
|
||||
if self.mtime:
|
||||
now = float(datetime.datetime.utcnow().timestamp())
|
||||
os.utime(self.path, (now, now))
|
||||
_logger.debug('Wrote timestamp to {0}'.format(self.path))
|
||||
return(None)
|
||||
|
||||
|
||||
class Upstream(object):
|
||||
def __init__(self, upstream_xml, dest, rsync_args = None, owner = None, filechecks = None):
|
||||
def __init__(self, upstream_xml, dest, rsync_args = None, owner = None, filechecks = None, rsync_ignores = None):
|
||||
self.xml = upstream_xml
|
||||
# These are required for all upstreams.
|
||||
self.sync_type = self.xml.find('syncType').text.lower()
|
||||
self.domain = self.xml.find('domain').text
|
||||
self.path = self.xml.find('path').text
|
||||
self.dest = os.path.abspath(os.path.expanduser(dest))
|
||||
self.delay = None
|
||||
self.offset = None
|
||||
self.owner = owner
|
||||
self.filechecks = filechecks
|
||||
self._get_delaychk()
|
||||
self._get_offset()
|
||||
self.has_new = False
|
||||
# These are optional.
|
||||
port = self.xml.find('port')
|
||||
|
@ -160,15 +202,18 @@ class Upstream(object):
|
|||
self.port = constants.PROTO_DEF_PORTS[self.sync_type]
|
||||
self.available = None
|
||||
if self.sync_type == 'rsync':
|
||||
self.fetcher = fetcher.RSync(self.domain,
|
||||
self.port,
|
||||
self.path,
|
||||
self.dest,
|
||||
rsync_args = rsync_args,
|
||||
filechecks = self.filechecks,
|
||||
owner = self.owner)
|
||||
_fetcher = fetcher.RSync
|
||||
else:
|
||||
self.fetcher = fetcher.FTP(self.domain, self.port, self.path, self.dest, owner = self.owner)
|
||||
_fetcher = fetcher.FTP
|
||||
self.fetcher = _fetcher(self.domain,
|
||||
self.port,
|
||||
self.path,
|
||||
self.dest,
|
||||
rsync_args = rsync_args,
|
||||
rsync_ignores = rsync_ignores,
|
||||
filechecks = self.filechecks,
|
||||
offset = self.offset,
|
||||
owner = self.owner)
|
||||
self._check_conn()
|
||||
|
||||
def _check_conn(self):
|
||||
|
@ -182,6 +227,21 @@ class Upstream(object):
|
|||
self.available = False
|
||||
return(None)
|
||||
|
||||
def _get_delaychk(self):
|
||||
delay = self.xml.attrib.get('delayCheck')
|
||||
if not delay:
|
||||
return(None)
|
||||
delay = TimeOffset(delay)
|
||||
self.delay = delay.offset
|
||||
return(None)
|
||||
|
||||
def _get_offset(self):
|
||||
offset = self.xml.attrib.get('offset')
|
||||
if not offset:
|
||||
return(None)
|
||||
self.offset = TimeOffset(offset)
|
||||
return(None)
|
||||
|
||||
def sync(self):
|
||||
self.fetcher.fetch()
|
||||
return(None)
|
||||
|
@ -199,6 +259,7 @@ class Distro(object):
|
|||
'sync': None}}
|
||||
self.timestamps = {}
|
||||
self.rsync_args = None
|
||||
self.rsync_ignores = None
|
||||
self.owner = None
|
||||
self.upstreams = []
|
||||
self.lockfile = '/var/run/repomirror/{0}.lck'.format(self.name)
|
||||
|
@ -217,12 +278,17 @@ class Distro(object):
|
|||
e = self.xml.find('lastRemote{0}'.format(i))
|
||||
if e is not None:
|
||||
self.filechecks['remote'][i.lower()] = TimestampFile(e)
|
||||
self.rsync_ignores = []
|
||||
rsyncig_xml = self.xml.find('rsyncIgnore')
|
||||
if rsyncig_xml is not None:
|
||||
self.rsync_ignores = [int(i.strip()) for i in rsyncig_xml.attrib['returns'].split()]
|
||||
for u in self.xml.findall('upstream'):
|
||||
self.upstreams.append(Upstream(u,
|
||||
self.dest,
|
||||
rsync_args = self.rsync_args,
|
||||
owner = self.owner,
|
||||
filechecks = self.filechecks))
|
||||
filechecks = self.filechecks,
|
||||
rsync_ignores = self.rsync_ignores))
|
||||
|
||||
def check(self):
|
||||
for k, v in self.filechecks['local'].items():
|
||||
|
@ -248,22 +314,30 @@ class Distro(object):
|
|||
update = u.fetcher.timestamps.get('update')
|
||||
sync = u.fetcher.timestamps.get('sync')
|
||||
if update:
|
||||
if local_checks and (local_checks[-1] < update):
|
||||
if self.timestamps.get('update'):
|
||||
if self.timestamps['update'] < update:
|
||||
_logger.info('Local update timestamp is older than the remote update; syncing.')
|
||||
_logger.debug('Local update: {0}, remote update: {1}'.format(self.timestamps['update'],
|
||||
update))
|
||||
u.has_new = True
|
||||
elif local_checks and (local_checks[-1] < update):
|
||||
_logger.info('Newest local timestamp is older than the remote update; syncing.')
|
||||
_logger.debug('Newest local: {0}, remote update: {1}'.format(local_checks[-1], update))
|
||||
u.has_new = True
|
||||
elif not local_checks:
|
||||
_logger.info('No local timestamps; syncing.')
|
||||
u.has_new = True
|
||||
else:
|
||||
_logger.info('Local checks are newer than upstream.')
|
||||
_logger.info('Local checks are newer than upstream; not syncing.')
|
||||
_logger.debug('Newest local: {0}, remote update: {1}'.format(local_checks[-1], update))
|
||||
else:
|
||||
_logger.info('No remote update timestamp; syncing.')
|
||||
u.has_new = True
|
||||
if sync:
|
||||
if sync and u.delay:
|
||||
td = datetime.datetime.utcnow() - sync
|
||||
if td.days > constants.DAYS_WARN:
|
||||
_logger.warning(('Upstream {0} has not synced for {1} or more days; this '
|
||||
'repository may be out of date.').format(u.fetcher.url, constants.DAYS_WARN))
|
||||
if td > u.delay:
|
||||
_logger.warning(('Upstream {0} has not synced for {1} or longer; this '
|
||||
'repository may be out of date.').format(u.fetcher.url, u.delay))
|
||||
warnings.warn('Upstream may be out of date')
|
||||
return(None)
|
||||
|
||||
|
@ -294,7 +368,7 @@ class Distro(object):
|
|||
if not _is_cron:
|
||||
warnings.warn(warnmsg)
|
||||
if proc:
|
||||
proc_info = {k.lstrip('_'):v for k, v in vars(proc) if k not in ('_lock', '_proc')}
|
||||
proc_info = {k.lstrip('_'): v for k, v in vars(proc).items() if k not in ('_lock', '_proc')}
|
||||
import pprint
|
||||
print('Process information:')
|
||||
pprint.pprint(proc_info)
|
||||
|
|
|
@ -5,6 +5,8 @@ import csv
|
|||
import io
|
||||
import re
|
||||
##
|
||||
import iso3166
|
||||
##
|
||||
import classes
|
||||
|
||||
|
||||
|
@ -20,33 +22,41 @@ class Ranker(classes.Ranker):
|
|||
super().__init__(*args, **kwargs)
|
||||
self.get_mirrors()
|
||||
|
||||
def extract_mirrors(self, preferred_proto = 'rsync'):
|
||||
preferred_proto = preferred_proto.lower()
|
||||
if preferred_proto not in ('rsync', 'ftp'):
|
||||
raise ValueError('Invalid preferred_proto; must be one of rsync or ftp')
|
||||
non_preferred = ('rsync' if preferred_proto == 'ftp' else 'ftp')
|
||||
def extract_mirrors(self):
|
||||
# They removed FTP support.
|
||||
c = csv.DictReader(io.StringIO(self.raw_html), )
|
||||
my_country = iso3166.countries.get(self.my_info['country'])
|
||||
countrynames = iso3166.countries_by_name.keys()
|
||||
for row in c:
|
||||
if not row['Country'] or row['Country'].strip() == '':
|
||||
continue
|
||||
# GorRAM it, dudes. States are not countries.
|
||||
country = row['Country'].strip()
|
||||
region = row['Region'].strip()
|
||||
if region == 'US':
|
||||
country = region
|
||||
if country != self.my_info['country']:
|
||||
if not row.get('Region') or row['Region'].strip() == '':
|
||||
row['Location'] = row['Region']
|
||||
# They changed things. Again.
|
||||
country = row['Region'].strip()
|
||||
continent = row['Location'].strip()
|
||||
cu = country.upper()
|
||||
if continent in ('US', 'Canada'):
|
||||
country = continent
|
||||
try:
|
||||
country = iso3166.countries.get(country)
|
||||
except KeyError:
|
||||
country = iso3166.countries_by_name.get(cu)
|
||||
# Gorram it.
|
||||
if not country:
|
||||
for cs in countrynames:
|
||||
if cs.startswith(cu):
|
||||
country = iso3166.countries_by_name[cs]
|
||||
break
|
||||
if country != my_country:
|
||||
continue
|
||||
for k, v in row.items():
|
||||
if v.strip() == '':
|
||||
row[k] = None
|
||||
pref_url = row['{0} mirror link'.format(preferred_proto)]
|
||||
nonpref_url = row['{0} mirror link'.format(non_preferred)]
|
||||
if pref_url:
|
||||
url = _proto_re.sub(r'{0}\g<uri>'.format(preferred_proto), pref_url)
|
||||
pref_url = row.get('Rsync link')
|
||||
pref_url = str(pref_url).strip()
|
||||
if pref_url not in ('', None, 'None'):
|
||||
url = _proto_re.sub(r'\g<uri>', pref_url)
|
||||
else:
|
||||
if not nonpref_url:
|
||||
continue
|
||||
url = _proto_re.sub(r'{0}\g<uri>'.format(non_preferred), nonpref_url)
|
||||
continue
|
||||
self.raw_mirrors.append(row)
|
||||
self.mirror_candidates.append(url)
|
||||
return(None)
|
||||
|
|
|
@ -74,6 +74,18 @@ class Ranker(classes.Ranker):
|
|||
# self.mirror_candidates.append(mirror['url'])
|
||||
# return(None)
|
||||
|
||||
def speedcheck(self):
|
||||
# Ignore because EPEL can't really work.
|
||||
return(None)
|
||||
|
||||
def gen_xml(self):
|
||||
# Ignore because EPEL can't really work.
|
||||
return(None)
|
||||
|
||||
def print(self):
|
||||
# Ignore because EPEL can't really work.
|
||||
return(None)
|
||||
|
||||
|
||||
def parseArgs():
|
||||
args = argparse.ArgumentParser(description = 'Generate a list of suitable EPEL upstream mirrors in order of '
|
||||
|
|
56
reposync
56
reposync
|
@ -1,56 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
##
|
||||
##
|
||||
_cur_file = pathlib.Path(os.path.abspath(os.path.expanduser(__file__))).resolve()
|
||||
_cur_path = os.path.dirname(_cur_file)
|
||||
sys.path.insert(1, _cur_path)
|
||||
import repomirror
|
||||
|
||||
|
||||
if os.geteuid() == 0:
|
||||
_def_logdir = '/var/log/repo'
|
||||
else:
|
||||
_def_logdir = '~/.cache/repologs'
|
||||
|
||||
|
||||
def parseArgs():
|
||||
args = argparse.ArgumentParser(description = 'Sync repositories for various distributions to local paths')
|
||||
args.add_argument('-c', '--config',
|
||||
default = '~/.config/repomirror.xml',
|
||||
dest = 'cfg',
|
||||
help = ('The path to the config file. If it does not exist, a bare version will be created. '
|
||||
'Default: ~/.config/repomirror.xml'))
|
||||
# args.add_argument('-n', '--dry-run',
|
||||
# action = 'store_true',
|
||||
# dest = 'dummy',
|
||||
# help = ('If specified, do not actually sync anything (other than timestamp files if '
|
||||
# 'applicable to determine logic); do not actually sync any repositories. Useful for '
|
||||
# 'generating logs to determine potential issues before they happen'))
|
||||
args.add_argument('-d', '--distro',
|
||||
dest = 'distro',
|
||||
action = 'append',
|
||||
help = ('If specified, only sync the specified distro in the config file (otherwise sync all '
|
||||
'specified). May be given multiple times'))
|
||||
args.add_argument('-l', '--logdir',
|
||||
default = _def_logdir,
|
||||
dest = 'logdir',
|
||||
help = ('The path to the directory where logs should be written. The actual log files will be '
|
||||
'named after their respective distro names in the config file. '
|
||||
'Default: {0}'.format(_def_logdir)))
|
||||
return(args)
|
||||
|
||||
|
||||
def main():
|
||||
args = parseArgs().parse_args()
|
||||
r = repomirror.Sync(**vars(args))
|
||||
r.sync()
|
||||
return(None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1 +0,0 @@
|
|||
reposync
|
|
@ -0,0 +1,56 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
##
|
||||
##
|
||||
_cur_file = pathlib.Path(os.path.abspath(os.path.expanduser(__file__))).resolve()
|
||||
_cur_path = os.path.dirname(_cur_file)
|
||||
sys.path.insert(1, _cur_path)
|
||||
import repomirror
|
||||
|
||||
|
||||
if os.geteuid() == 0:
|
||||
_def_logdir = '/var/log/repo'
|
||||
else:
|
||||
_def_logdir = '~/.cache/repologs'
|
||||
|
||||
|
||||
def parseArgs():
|
||||
args = argparse.ArgumentParser(description = 'Sync repositories for various distributions to local paths')
|
||||
args.add_argument('-c', '--config',
|
||||
default = '~/.config/repomirror.xml',
|
||||
dest = 'cfg',
|
||||
help = ('The path to the config file. If it does not exist, a bare version will be created. '
|
||||
'Default: ~/.config/repomirror.xml'))
|
||||
# args.add_argument('-n', '--dry-run',
|
||||
# action = 'store_true',
|
||||
# dest = 'dummy',
|
||||
# help = ('If specified, do not actually sync anything (other than timestamp files if '
|
||||
# 'applicable to determine logic); do not actually sync any repositories. Useful for '
|
||||
# 'generating logs to determine potential issues before they happen'))
|
||||
args.add_argument('-d', '--distro',
|
||||
dest = 'distro',
|
||||
action = 'append',
|
||||
help = ('If specified, only sync the specified distro in the config file (otherwise sync all '
|
||||
'specified). May be given multiple times'))
|
||||
args.add_argument('-l', '--logdir',
|
||||
default = _def_logdir,
|
||||
dest = 'logdir',
|
||||
help = ('The path to the directory where logs should be written. The actual log files will be '
|
||||
'named after their respective distro names in the config file. '
|
||||
'Default: {0}'.format(_def_logdir)))
|
||||
return(args)
|
||||
|
||||
|
||||
def main():
|
||||
args = parseArgs().parse_args()
|
||||
r = repomirror.Sync(**vars(args))
|
||||
r.sync()
|
||||
return(None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -2,6 +2,10 @@
|
|||
lxml>=4.5.1
|
||||
psutil>=5.7.0
|
||||
requests>=2.23.0
|
||||
## TODO: support non-UTC local timezone.
|
||||
# https://stackoverflow.com/questions/13218506/how-to-get-system-timezone-setting-and-pass-it-to-pytz-timezone
|
||||
# https://stackoverflow.com/questions/5067218/get-utc-timestamp-in-python-with-datetime
|
||||
#tzlocal>=2.1
|
||||
# The below are optional.
|
||||
## Provides journald logging on systemd machines.
|
||||
## However, it isn't in PyPi; look for something like "python-systemd" or "python3-systemd" in your distro's repositories (it's built from the systemd source).
|
||||
|
|
Loading…
Reference in New Issue