2017-09-04 20:45:08 -04:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import argparse
|
2017-09-15 09:10:37 -04:00
|
|
|
import configparser
|
2018-11-07 12:40:37 -05:00
|
|
|
import copy
|
2017-09-04 20:45:08 -04:00
|
|
|
import datetime
|
|
|
|
import os
|
2017-09-15 09:10:37 -04:00
|
|
|
import pprint
|
2017-09-04 20:45:08 -04:00
|
|
|
import subprocess
|
2017-09-15 09:56:11 -04:00
|
|
|
import sys
|
2017-09-04 20:45:08 -04:00
|
|
|
|
2018-04-15 11:36:41 -04:00
|
|
|
# TODO: convert .ini to treat [section]s as repositories, with a [DEFAULT]
|
|
|
|
# section for URL etc.
|
|
|
|
|
2018-02-13 00:09:36 -05:00
|
|
|
cfgfile = os.path.join(os.environ['HOME'],
|
|
|
|
'.config',
|
|
|
|
'optools',
|
|
|
|
'repoclone',
|
|
|
|
'arch.ini')
|
2017-09-15 09:10:37 -04:00
|
|
|
|
2017-09-04 20:45:08 -04:00
|
|
|
# Rsync options
|
|
|
|
opts = [
|
|
|
|
'--recursive', # recurse into directories
|
|
|
|
'--times', # preserve modification times
|
|
|
|
'--links', # copy symlinks as symlinks
|
|
|
|
'--hard-links', # preserve hard links
|
|
|
|
'--quiet', # suppress non-error messages
|
|
|
|
'--delete-after', # receiver deletes after transfer, not during
|
|
|
|
'--delay-updates', # put all updated files into place at end
|
|
|
|
'--copy-links', # transform symlink into referent file/dir
|
|
|
|
'--safe-links', # ignore symlinks that point outside the tree
|
|
|
|
#'--max-delete', # don't delete more than NUM files
|
|
|
|
'--delete-excluded', # also delete excluded files from dest dirs
|
|
|
|
'--exclude=.*' # exclude files matching PATTERN
|
|
|
|
]
|
|
|
|
|
2017-09-15 09:10:37 -04:00
|
|
|
def sync(args):
|
2018-11-07 12:40:37 -05:00
|
|
|
# TODO: this should be a class, probably, instead as there's a lot of shared data across what should be multiple
|
|
|
|
# functions.
|
2017-09-15 09:15:44 -04:00
|
|
|
with open(os.devnull, 'w') as devnull:
|
|
|
|
mntchk = subprocess.run(['findmnt', args['mount']], stdout = devnull, stderr = devnull)
|
2017-09-15 09:10:37 -04:00
|
|
|
if mntchk.returncode != 0:
|
|
|
|
exit('!! BAILING OUT; {0} isn\'t mounted !!'.format(args['mount']))
|
|
|
|
if args['bwlimit'] >= 1:
|
|
|
|
opts.insert(10, '--bwlimit=' + str(args['bwlimit'])) # limit socket I/O bandwidth
|
|
|
|
for k in ('destination', 'logfile', 'lockfile'):
|
2017-09-15 09:19:14 -04:00
|
|
|
os.makedirs(os.path.dirname(args[k]), exist_ok = True)
|
2017-09-04 20:45:08 -04:00
|
|
|
paths = os.environ['PATH'].split(':')
|
2017-09-15 09:10:37 -04:00
|
|
|
rsync = '/usr/bin/rsync' # set the default
|
2017-09-04 20:45:08 -04:00
|
|
|
for p in paths:
|
|
|
|
testpath = os.path.join(p, 'rsync')
|
|
|
|
if os.path.isfile(testpath):
|
|
|
|
rsync = testpath # in case rsync isn't in /usr/bin/rsync
|
|
|
|
break
|
|
|
|
cmd = [rsync] # the path to the binary
|
|
|
|
cmd.extend(opts) # the arguments
|
2017-09-15 09:10:37 -04:00
|
|
|
# TODO: implement repos here?
|
2018-11-07 12:40:37 -05:00
|
|
|
# end TODO
|
|
|
|
# The https://git.server-speed.net/users/flo/bin/tree/syncrepo.sh script uses http(s). to check for lastupdate.
|
|
|
|
# I don't, because not all mirrors *have* http(s).
|
|
|
|
check_cmd = copy.deepcopy(cmd)
|
|
|
|
check_cmd.append(os.path.join(args['mirror'], 'lastupdate'))
|
|
|
|
check_cmd.append(os.path.join(args['destination'], 'lastupdate'))
|
|
|
|
update_cmd = copy.deepcopy(cmd)
|
|
|
|
update_cmd.append(os.path.join(args['mirror'], 'lastsync'))
|
|
|
|
update_cmd.append(os.path.join(args['destination'], 'lastsync'))
|
|
|
|
cmd.append(os.path.join(args['mirror'], '.')) # the path on the remote mirror (full sync)
|
|
|
|
cmd.append(os.path.join(args['destination'], '.')) # the local destination (full sync)
|
2017-09-15 09:17:34 -04:00
|
|
|
if os.path.isfile(args['lockfile']):
|
|
|
|
with open(args['lockfile'], 'r') as f:
|
2017-09-04 20:45:08 -04:00
|
|
|
existingpid = f.read().strip()
|
2017-09-15 09:56:11 -04:00
|
|
|
if os.isatty(sys.stdin.fileno()):
|
|
|
|
# Running from shell
|
|
|
|
exit('!! A repo synchronization seems to already be running (PID: {0}). Quitting. !!'.format(existingpid))
|
|
|
|
else:
|
|
|
|
exit() # we're running in cron, shut the hell up.
|
2017-09-04 20:45:08 -04:00
|
|
|
else:
|
2017-09-15 09:17:34 -04:00
|
|
|
with open(args['lockfile'], 'w') as f:
|
2017-09-04 20:45:08 -04:00
|
|
|
f.write(str(os.getpid()))
|
2018-11-07 12:40:37 -05:00
|
|
|
# determine if we need to do a full sync.
|
|
|
|
# TODO: clean this up. there's a lot of code duplication here, and it should really be a function.
|
|
|
|
with open(os.path.join(args['destination'], 'lastupdate'), 'r') as f:
|
|
|
|
oldupdate = datetime.datetime.utcfromtimestamp(int(f.read().strip()))
|
|
|
|
with open(os.devnull, 'wb') as devnull:
|
|
|
|
# TODO: when i clean this up, change this to do error detection
|
|
|
|
c = subprocess.run(check_cmd, stdout = devnull, stderr = devnull)
|
|
|
|
c2 = subprocess.run(update_cmd, stdout = devnull, stderr = devnull)
|
|
|
|
with open(os.path.join(args['destination'], 'lastupdate'), 'r') as f:
|
|
|
|
newupdate = datetime.datetime.utcfromtimestamp(int(f.read().strip()))
|
|
|
|
if newupdate > oldupdate:
|
|
|
|
with open(args['logfile'], 'a') as log:
|
|
|
|
c = subprocess.run(cmd, stdout = log, stderr = subprocess.PIPE)
|
2017-09-15 09:47:05 -04:00
|
|
|
now = int(datetime.datetime.timestamp(datetime.datetime.utcnow()))
|
2017-09-15 09:48:32 -04:00
|
|
|
with open(os.path.join(args['destination'], 'lastsync'), 'w') as f:
|
2017-09-15 09:47:05 -04:00
|
|
|
f.write(str(now) + '\n')
|
2018-11-07 12:40:37 -05:00
|
|
|
else:
|
|
|
|
# No-op. Stderr should be empty.
|
|
|
|
c = subprocess.run(['echo'], stdout = subprocess.PIPE, stderr = subprocess.PIPE)
|
|
|
|
now = int(datetime.datetime.timestamp(datetime.datetime.utcnow()))
|
2018-11-08 19:04:11 -05:00
|
|
|
with open(args['lastcheck'], 'w') as f:
|
2018-11-07 12:40:37 -05:00
|
|
|
f.write(str(now) + '\n')
|
|
|
|
os.remove(args['lockfile'])
|
|
|
|
# Only report errors at the end of the run if we aren't running in cron. Otherwise, log them.
|
|
|
|
errors = c.stderr.decode('utf-8').splitlines()
|
|
|
|
if os.isatty(sys.stdin.fileno()) and errors:
|
|
|
|
print('We encountered some errors:')
|
|
|
|
for e in errors:
|
|
|
|
if e.startswith('symlink has no referent: '):
|
|
|
|
print('Broken upstream symlink: {0}'.format(e.split()[1].replace('"', '')))
|
|
|
|
else:
|
|
|
|
print(e)
|
|
|
|
elif errors:
|
|
|
|
with open(args['logfile'], 'a') as f:
|
2017-09-15 10:24:06 -04:00
|
|
|
for e in errors:
|
2018-11-07 12:40:37 -05:00
|
|
|
f.write('{0}\n'.format(e))
|
2017-09-04 20:45:08 -04:00
|
|
|
return()
|
|
|
|
|
2017-09-15 09:10:37 -04:00
|
|
|
def getDefaults():
|
|
|
|
# Hardcoded defaults
|
|
|
|
dflt = {'mirror': 'rsync://mirror.square-r00t.net/arch/',
|
|
|
|
'repos': 'core,extra,community,multilib,iso/latest',
|
|
|
|
'destination': '/srv/repos/arch',
|
2018-11-08 19:04:11 -05:00
|
|
|
'lastcheck': '/srv/http/arch.lastcheck',
|
2017-09-15 09:10:37 -04:00
|
|
|
'mount': '/',
|
|
|
|
'bwlimit': 0,
|
2018-04-15 11:36:41 -04:00
|
|
|
'lockfile': '/var/run/repo-sync_arch.lck',
|
2017-09-15 09:10:37 -04:00
|
|
|
'logfile': '/var/log/repo/arch.log'}
|
|
|
|
realcfg = configparser.ConfigParser(defaults = dflt)
|
|
|
|
if not os.path.isfile(cfgfile):
|
|
|
|
with open(cfgfile, 'w') as f:
|
|
|
|
realcfg.write(f)
|
|
|
|
realcfg.read(cfgfile)
|
|
|
|
return(realcfg)
|
|
|
|
|
2017-09-04 20:45:08 -04:00
|
|
|
def parseArgs():
|
2017-09-15 09:10:37 -04:00
|
|
|
cfg = getDefaults()
|
|
|
|
liveopts = cfg['DEFAULT']
|
|
|
|
args = argparse.ArgumentParser(description = 'Synchronization for a remote Arch repository to a local one.',
|
|
|
|
epilog = ('This program will write a default configuration file to {0} ' +
|
2017-10-05 00:39:29 -04:00
|
|
|
'if one is not found.').format(cfgfile))
|
2017-09-15 09:10:37 -04:00
|
|
|
args.add_argument('-m',
|
|
|
|
'--mirror',
|
|
|
|
dest = 'mirror',
|
|
|
|
default = liveopts['mirror'],
|
|
|
|
help = ('The upstream mirror to sync from, must be an rsync URI '+
|
|
|
|
'(Default: {0}').format(liveopts['mirror']))
|
|
|
|
# TODO: can we do this?
|
2018-02-13 00:09:36 -05:00
|
|
|
# We can; we need to .format() a repo in, probably, on the src and dest.
|
|
|
|
# Problem is the last updated/last synced files.
|
2017-09-15 09:10:37 -04:00
|
|
|
# args.add_argument('-r',
|
|
|
|
# '--repos',
|
|
|
|
# dest = 'repos',
|
|
|
|
# default = liveopts['repos'],
|
|
|
|
# help = ('The repositories to sync; must be a comma-separated list. ' +
|
|
|
|
# '(Currently not used.) Default: {0}').format(','.join(liveopts['repos'])))
|
|
|
|
args.add_argument('-d',
|
|
|
|
'--destination',
|
|
|
|
dest = 'destination',
|
|
|
|
default = liveopts['destination'],
|
|
|
|
help = 'The destination directory to sync to. Default: {0}'.format(liveopts['destination']))
|
2018-11-08 19:04:11 -05:00
|
|
|
args.add_argument('-c', '--last-check',
|
|
|
|
dest = 'lastcheck',
|
|
|
|
default = liveopts['lastcheck'],
|
|
|
|
help = ('The file to update with a timestamp on every run. Per spec, this must be outside the '
|
|
|
|
'repository webroot'))
|
2017-09-15 09:10:37 -04:00
|
|
|
args.add_argument('-b',
|
|
|
|
'--bwlimit',
|
|
|
|
dest = 'bwlimit',
|
|
|
|
default = liveopts['bwlimit'],
|
|
|
|
type = int,
|
2018-11-08 19:04:11 -05:00
|
|
|
help = ('The amount, in Kilobytes per second, to throttle the sync to. Default is to not '
|
|
|
|
'throttle (0).'))
|
2017-09-15 09:15:44 -04:00
|
|
|
args.add_argument('-l',
|
|
|
|
'--log',
|
|
|
|
dest = 'logfile',
|
|
|
|
default = liveopts['logfile'],
|
|
|
|
help = 'The path to the logfile. Default: {0}'.format(liveopts['logfile']))
|
|
|
|
args.add_argument('-L',
|
|
|
|
'--lock',
|
|
|
|
dest = 'lockfile',
|
|
|
|
default = liveopts['lockfile'],
|
|
|
|
help = 'The path to the lockfile. Default: {0}'.format(liveopts['lockfile']))
|
2017-09-15 09:10:37 -04:00
|
|
|
args.add_argument('-M',
|
|
|
|
'--mount',
|
|
|
|
dest = 'mount',
|
|
|
|
default = liveopts['mount'],
|
|
|
|
help = 'The mountpoint for your --destination. The script will exit if this point is not mounted. ' +
|
|
|
|
'If you don\'t need mount checking, just use /. Default: {0}'.format(liveopts['mount']))
|
|
|
|
return(args)
|
2017-09-04 20:45:08 -04:00
|
|
|
|
|
|
|
def main():
|
2017-09-15 09:10:37 -04:00
|
|
|
args = vars(parseArgs().parse_args())
|
2017-09-15 09:12:37 -04:00
|
|
|
sync(args)
|
2018-01-31 20:42:46 -05:00
|
|
|
return()
|
2017-09-04 20:45:08 -04:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|