optools/storage/backups/borg/backup.py

550 lines
27 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# TODO: https://borgbackup.readthedocs.io/en/latest/internals/frontends.html
# will they EVER release a public API? for now we'll just use subprocess since
# we import it for various prep stuff anyways.
import argparse
import configparser
import datetime
import json
import logging
import logging.handlers
import os
import subprocess
import sys
try:
import pymysql # not stdlib; "python-pymysql" in Arch's AUR
has_mysql = True
except ImportError:
has_mysql = False
try:
# https://www.freedesktop.org/software/systemd/python-systemd/journal.html#journalhandler-class
from systemd import journal
has_systemd = True
except ImportError:
has_systemd = False
### LOG LEVEL MAPPINGS ###
loglvls = {'critical': logging.CRITICAL,
'error': logging.ERROR,
'warning': logging.WARNING,
'info': logging.INFO,
'debug': logging.DEBUG}
### THE GUTS ###
class Backup(object):
def __init__(self, args):
self.args = args
### DIRECTORIES ###
if self.args['oper'] == 'backup':
for d in (self.args['mysqldir'], self.args['stagedir']):
os.makedirs(d, exist_ok = True, mode = 0o700)
### LOGGING ###
# Thanks to:
# https://web.archive.org/web/20170726052946/http://www.lexev.org/en/2013/python-logging-every-day/
# https://stackoverflow.com/a/42604392
# https://plumberjack.blogspot.com/2010/10/supporting-alternative-formatting.html
# and user K900_ on r/python for entertaining my very silly question.
self.logger = logging.getLogger(__name__)
self.logger.setLevel(loglvls[self.args['loglevel']])
_logfmt = logging.Formatter(fmt = '{levelname}:{name}: {message} ({asctime}; {filename}:{lineno})',
style = '{',
datefmt = '%Y-%m-%d %H:%M:%S')
_journalfmt = logging.Formatter(fmt = '{levelname}:{name}: {message} ({filename}:{lineno})',
style = '{',
datefmt = '%Y-%m-%d %H:%M:%S')
handlers = []
if self.args['disklog']:
os.makedirs(os.path.dirname(self.args['logfile']),
exist_ok = True,
mode = 0o700)
handlers.append(logging.handlers.RotatingFileHandler(self.args['logfile'],
encoding = 'utf8',
maxBytes = 100000,
backupCount = 1))
if self.args['verbose']:
handlers.append(logging.StreamHandler())
if has_systemd:
h = journal.JournalHandler()
h.setFormatter(_journalfmt)
h.setLevel(loglvls[self.args['loglevel']])
self.logger.addHandler(h)
for h in handlers:
h.setFormatter(_logfmt)
h.setLevel(loglvls[self.args['loglevel']])
self.logger.addHandler(h)
self.logger.debug('BEGIN INITIALIZATION')
### CONFIG ###
if not os.path.isfile(self.args['cfgfile']):
self.logger.error('{0} does not exist'.format(self.args['cfgfile']))
exit(1)
with open(self.args['cfgfile'], 'r') as f:
self.cfg = json.loads(f.read())
### END LOGGING ###
### ARGS CLEANUP ###
self.logger.debug('VARS (before args cleanup): {0}'.format(vars()))
self.args['repo'] = [i.strip() for i in self.args['repo'].split(',')]
if 'all' in self.args['repo']:
self.args['repo'] = list(self.cfg['repos'].keys())
for r in self.args['repo'][:]:
if r == 'all':
self.args['repo'].remove(r)
elif r not in self.cfg['repos'].keys():
self.logger.warning('Repository {0} is not configured; skipping.'.format(r))
self.args['repo'].remove(r)
self.logger.debug('VARS (after args cleanup): {0}'.format(vars()))
self.logger.debug('END INITIALIZATION')
### CHECK ENVIRONMENT ###
# If we're running from cron, we want to print errors to stdout.
if os.isatty(sys.stdin.fileno()):
self.cron = False
else:
self.cron = True
### END INIT ###
def cmdExec(self, cmd, stdoutfh = None):
self.logger.debug('Running command: {0}'.format(' '.join(cmd)))
2017-10-25 03:22:05 -04:00
if self.args['dryrun']:
return() # no-op
if stdoutfh:
_cmd = subprocess.run(cmd, stdout = stdoutfh, stderr = subprocess.PIPE)
else:
_cmd = subprocess.run(cmd,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE)
_out = _cmd.stdout.decode('utf-8').strip()
_err = _cmd.stderr.decode('utf-8').strip()
_returncode = _cmd.returncode
if _returncode != 0:
self.logger.error('STDERR: ({1})\n{0}'.format(_err, ' '.join(cmd)))
if _err != '' and self.cron:
2017-10-26 06:13:31 -04:00
self.logger.warning('Command {0} failed: {1}'.format(' '.join(cmd), _err))
return()
def createRepo(self):
_env = os.environ.copy()
_env['BORG_RSH'] = self.cfg['config']['ctx']
for r in self.args['repo']:
self.logger.info('[{0}]: BEGIN INITIALIZATION'.format(r))
_cmd = ['borg',
'init',
'-v',
'{0}@{1}:{2}'.format(self.cfg['config']['user'],
self.cfg['config']['host'],
r)]
_env['BORG_PASSPHRASE'] = self.cfg['repos'][r]['password']
# We don't use self.cmdExec() here either because
# again, custom env, etc.
self.logger.debug('VARS: {0}'.format(vars()))
if not self.args['dryrun']:
_out = subprocess.run(_cmd,
env = _env,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE)
_stdout = _out.stdout.decode('utf-8').strip()
_stderr = _out.stderr.decode('utf-8').strip()
2017-10-25 16:28:08 -04:00
_returncode = _out.returncode
self.logger.debug('[{0}]: (RESULT) {1}'.format(r, _stdout))
2017-10-25 16:28:08 -04:00
# sigh. borg uses stderr for verbose output.
self.logger.debug('[{0}]: STDERR: ({2})\n{1}'.format(r,
_stderr,
' '.join(_cmd)))
if _returncode != 0:
self.logger.error('[{0}]: FAILED: {1}'.format(r, ' '.join(_cmd)))
if _err != '' and self.cron and _returncode != 0:
2017-10-26 06:13:31 -04:00
self.logger.warning('Command {0} failed: {1}'.format(' '.join(cmd), _err))
del(_env['BORG_PASSPHRASE'])
self.logger.info('[{0}]: END INITIALIZATION'.format(r))
return()
def create(self):
_env = os.environ.copy()
_env['BORG_RSH'] = self.cfg['config']['ctx']
self.logger.info('START: backup')
for r in self.args['repo']:
self.logger.info('[{0}]: BEGIN BACKUP'.format(r))
if 'prep' in self.cfg['repos'][r].keys():
for prep in self.cfg['repos'][r]['prep']:
self.logger.info('[{0}]: Running prepfunc {1}'.format(r, prep))
eval('self.{0}'.format(prep)) # I KNOW, IT'S TERRIBLE. so sue me.
self.logger.info('[{0}]: Finished prepfunc {1}'.format(r, prep))
_cmd = ['borg',
'create',
'-v', '--stats',
'--compression', 'lzma,9']
if 'excludes' in self.cfg['repos'][r].keys():
for e in self.cfg['repos'][r]['excludes']:
_cmd.extend(['--exclude', e])
_cmd.append('{0}@{1}:{2}::{3}'.format(self.cfg['config']['user'],
self.cfg['config']['host'],
r,
self.args['archive']))
2017-10-25 03:10:35 -04:00
for p in self.cfg['repos'][r]['paths']:
_cmd.append(p)
_env['BORG_PASSPHRASE'] = self.cfg['repos'][r]['password']
self.logger.debug('VARS: {0}'.format(vars()))
# We don't use self.cmdExec() here because we want to explicitly pass the env
# and format the log line differently.
self.logger.debug('[{0}]: Running command: {1}'.format(r, ' '.join(_cmd)))
if not self.args['dryrun']:
_out = subprocess.run(_cmd,
env = _env,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE)
_stdout = _out.stdout.decode('utf-8').strip()
_stderr = _out.stderr.decode('utf-8').strip()
2017-10-25 16:28:08 -04:00
_returncode = _out.returncode
self.logger.debug('[{0}]: (RESULT) {1}'.format(r, _stdout))
2017-10-25 16:28:08 -04:00
self.logger.error('[{0}]: STDERR: ({2})\n{1}'.format(r,
_stderr,
' '.join(_cmd)))
if _returncode != 0:
self.logger.error('[{0}]: FAILED: {1}'.format(r, ' '.join(_cmd)))
if _err != '' and self.cron and _returncode != 0:
2017-10-26 06:14:05 -04:00
self.logger.warning('Command {0} failed: {1}'.format(' '.join(cmd), _err))
del(_env['BORG_PASSPHRASE'])
self.logger.info('[{0}]: END BACKUP'.format(r))
self.logger.info('END: backup')
return()
def miscBak(self, pkgr):
self.logger.info('BEGIN: miscBak()')
_cmd = None
for p in os.environ['PATH'].split(':'):
d = os.path.expanduser(p)
2017-10-25 03:14:33 -04:00
if os.path.isfile(os.path.join(d, pkgr)):
_pkgr = pkgr
self.logger.debug('Package tool found at {0}'.format(_pkgr))
else:
_pkgr = 'pacman'
self.logger.debug('Using {0} as package tool'.format(_pkgr))
with open(os.path.join(self.args['stagedir'], 'pkg.lst'), 'w') as f:
_cmd = [_pkgr,
'-Qet',
'--color',
'never']
self.cmdExec(_cmd, stdoutfh = f)
self.logger.info('END: miscBak()')
return()
def mysqlBak(self):
self.logger.info('BEGIN: mysqlBak()')
if not has_mysql:
self.logger.error('You need to install the PyMySQL module to back up MySQL databases. Skipping.')
return()
# These are mysqldump options shared by ALL databases
_mysqlopts = ['--routines',
'--add-drop-database',
'--add-drop-table',
'--allow-keywords',
'--complete-insert',
'--create-options',
'--extended-insert']
_DBs = []
_mycnf = os.path.expanduser(os.path.join('~', '.my.cnf'))
if not os.path.isfile(_mycnf):
exit('{0}: ERROR: Cannot get credentials for MySQL (cannot find ~/.my.cnf)!')
_mycfg = configparser.ConfigParser()
_mycfg._interpolation = configparser.ExtendedInterpolation()
_mycfg.read(_mycnf)
_sqlcfg = {s:dict(_mycfg.items(s)) for s in _mycfg.sections()}
if 'host' not in _sqlcfg.keys():
_socketpath = '/var/run/mysqld/mysqld.sock' # correct for Arch, YMMV.
_mysql = pymysql.connect(unix_socket = _socketpath,
user = _sqlcfg['client']['user'],
passwd = _sqlcfg['client']['password'])
else:
_mysql = pymysql.connect(host = _sqlcfg['client']['host'],
user = _sqlcfg['client']['user'],
port = _sqlcfg['client']['port'],
passwd = _sqlcfg['client']['password'])
_cur = _mysql.cursor()
_cur.execute('SHOW DATABASES')
for row in _cur.fetchall():
_DBs.append(row[0])
self.logger.debug('Databases: {0}'.format(', '.join(_DBs)))
for db in _DBs:
_cmd = ['mysqldump',
'--result-file={0}.sql'.format(os.path.join(mysqldir, db))]
# These are database-specific options
if db in ('information_schema', 'performance_schema'):
_cmd.append('--skip-lock-tables')
elif db == 'mysql':
_cmd.append('--flush-privileges')
_cmd.extend(_mysqlopts)
_cmd.append(db)
self.cmdExec(_cmd)
self.logger.info('END: mysqlBak()')
return()
def listRepos(self):
print('\n\033[1mCurrently configured repositories are:\033[0m\n')
print('\t{0}\n'.format(', '.join(self.cfg['repos'].keys())))
if self.args['verbose']:
print('\033[1mDETAILS:\033[0m\n')
for r in self.args['repo']:
print('\t\033[1m{0}:\033[0m\n\t\t\033[1mPath(s):\033[0m\t'.format(r.upper()), end = '')
for p in self.cfg['repos'][r]['paths']:
print(p, end = ' ')
if 'prep' in self.cfg['repos'][r].keys():
print('\n\t\t\033[1mPrep:\033[0m\t\t', end = '')
for p in self.cfg['repos'][r]['prep']:
print(p, end = ' ')
if 'excludes' in self.cfg['repos'][r].keys():
print('\n\t\t\033[1mExclude(s):\033[0m\t', end = '')
for p in self.cfg['repos'][r]['excludes']:
print(p, end = ' ')
print('\n')
return()
def printer(self):
2017-10-25 03:07:18 -04:00
# TODO: better alignment. https://stackoverflow.com/a/5676884
_results = self.lister()
if not self.args['archive']: # It's a listing of archives
print('\033[1mREPO:\tSNAPSHOT:\t\tTIMESTAMP:\033[0m\n')
for r in _results.keys():
print(r, end = '')
for line in _results[r]:
_snapshot = line.split()
2017-10-25 03:07:18 -04:00
print('\t{0}\t\t{1}'.format(_snapshot[0], ' '.join(_snapshot[1:])))
print()
else: # It's a listing inside an archive
if self.args['verbose']:
2017-10-25 03:07:18 -04:00
_fields = ['REPO:', 'PERMS:', 'OWNERSHIP:', 'SIZE:', 'TIMESTAMP:', 'PATH:']
for r in _results.keys():
print('\033[1m{0}\t{1}\033[0m'.format(_fields[0], r))
# https://docs.python.org/3/library/string.html#formatspec
2017-10-25 03:07:18 -04:00
print('{0[1]:<15}\t{0[2]:<15}\t{0[3]:<15}\t{0[4]:<24}\t{0[5]:<15}'.format(_fields))
for line in _results[r]:
_fline = line.split()
2017-10-25 03:07:18 -04:00
_perms = _fline[0]
_ownership = '{0}:{1}'.format(_fline[1], _fline[2])
_size = _fline[3]
_time = ' '.join(_fline[4:7])
_path = ' '.join(_fline[7:])
print('{0:<15}\t{1:<15}\t{2:<15}\t{3:<24}\t{4:<15}'.format(_perms,
_ownership,
_size,
_time,
_path))
else:
print('\033[1mREPO:\tPATH:\033[0m\n')
for r in _results.keys():
print(r, end = '')
for line in _results[r]:
_fline = line.split()
print('\t{0}'.format(' '.join(_fline[7:])))
return()
def lister(self):
output = {}
_env = os.environ.copy()
2017-10-25 03:07:18 -04:00
self.logger.debug('START: lister')
_env['BORG_RSH'] = self.cfg['config']['ctx']
for r in self.args['repo']:
if self.args['archive']:
_cmd = ['borg',
'list',
'{0}@{1}:{2}::{3}'.format(self.cfg['config']['user'],
self.cfg['config']['host'],
r,
self.args['archive'])]
else:
_cmd = ['borg',
'list',
'{0}@{1}:{2}'.format(self.cfg['config']['user'],
self.cfg['config']['host'],
r)]
_env['BORG_PASSPHRASE'] = self.cfg['repos'][r]['password']
if not self.args['dryrun']:
_out = subprocess.run(_cmd,
env = _env,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE)
_stdout = [i.strip() for i in _out.stdout.decode('utf-8').splitlines()]
_stderr = _out.stderr.decode('utf-8').strip()
_returncode = _out.returncode
output[r] = _stdout
self.logger.debug('[{0}]: (RESULT) {1}'.format(r,
'\n'.join(_stdout)))
if _returncode != 0:
self.logger.error('[{0}]: STDERR: ({2}) ({1})'.format(r,
_stderr,
' '.join(_cmd)))
if _err != '' and self.cron and _returncode != 0:
2017-10-26 06:13:31 -04:00
self.logger.warning('Command {0} failed: {1}'.format(' '.join(cmd), _err))
del(_env['BORG_PASSPHRASE'])
if not self.args['archive']:
if self.args['numlimit'] > 0:
if self.args['old']:
output[r] = output[r][:self.args['numlimit']]
else:
output[r] = list(reversed(output[r]))[:self.args['numlimit']]
if self.args['invert']:
output[r] = reversed(output[r])
2017-10-25 03:07:18 -04:00
self.logger.debug('END: lister')
return(output)
def printMoarHelp():
_helpstr = ('\n\tNOTE: Sorting only applies to listing archives, NOT the contents!\n\n' +
'In order to efficiently display results, there are several options to handle it. ' +
'Namely, these are:\n\n\t\t-s/--sort [direction]\n\t\t-l/--limit [number]\n\t\t-x/--invert\n\n' +
'For example, if you want to list the 5 most recently *taken* snapshots, you would use:\n\n\t\t-l 5\n\n' +
'If you would want those SAME results SORTED in the reverse order (i.e. the 5 most recently ' +
'taken snapshots sorted from newest to oldest), then it would be: \n\n\t\t-l 5 -x\n\n' +
'Lastly, if you wanted to list the 7 OLDEST TAKEN snapshots in reverse order (that is, ' +
'sorted from newest to oldest), that\'d be: \n\n\t\t-o -l 7 -x\n')
print(_helpstr)
exit(0)
def parseArgs():
### DEFAULTS ###
_date = datetime.datetime.now().strftime("%Y_%m_%d.%H_%M")
_logfile = '/var/log/borg/{0}'.format(_date)
_mysqldir = os.path.abspath(os.path.join(os.path.expanduser('~'), 'bak', 'mysql'))
_stagedir = os.path.abspath(os.path.join(os.path.expanduser('~'), '.bak', 'misc'))
_cfgfile = os.path.abspath(os.path.join(os.path.expanduser('~'), '.config', 'optools', 'backup.json'))
_defloglvl = 'info'
######
args = argparse.ArgumentParser(description = 'Backups manager',
epilog = 'TIP: this program has context-specific help. e.g. try "%(prog)s list --help"')
args.add_argument('-c', '--config',
dest = 'cfgfile',
default = _cfgfile,
help = ('The path to the config file. Default: \033[1m{0}\033[0m'.format(_cfgfile)))
args.add_argument('-Ll', '--loglevel',
dest = 'loglevel',
default = _defloglvl,
choices = list(loglvls.keys()),
help = ('The level of logging to perform. \033[1mWARNING:\033[0m \033[1mdebug\033[0m will log ' +
'VERY sensitive information such as passwords! Default: \033[1m{0}\033[0m'.format(_defloglvl)))
args.add_argument('-Ld', '--log-to-disk',
dest = 'disklog',
action = 'store_true',
help = ('If specified, log to a specific file (-Lf/--logfile)' +
' instead of the system logger.'))
args.add_argument('-Lf', '--logfile',
dest = 'logfile',
default = _logfile,
help = ('The path to the logfile, only used if -Ld/--log-to-disk ' +
'is specified. Default: \033[1m{0}\033[0m (dynamic)').format(_logfile))
args.add_argument('-v', '--verbose',
dest = 'verbose',
action = 'store_true',
help = ('If specified, log messages will be printed to STDERR ' +
'in addition to the other configured log system(s), and verbosity for printing ' +
'functions is increased. \033[1mWARNING:\033[0m This may display VERY sensitive information ' +
'such as passwords!'))
### ARGS FOR ALL OPERATIONS ###
commonargs = argparse.ArgumentParser(add_help = False)
commonargs.add_argument('-r', '--repo',
dest = 'repo',
default = 'all',
help = ('The repository to perform the operation for. ' +
'The default is \033[1mall\033[0m, a special value that specifies all known ' +
'repositories. Can also accept a comma-separated list.'))
remoteargs = argparse.ArgumentParser(add_help = False)
remoteargs.add_argument('-d', '--dry-run',
dest = 'dryrun',
action = 'store_true',
help = ('Act as if we are performing tasks, but none will actually be executed ' +
'(useful for testing logging)'))
### OPERATIONS ###
subparsers = args.add_subparsers(help = 'Operation to perform',
dest = 'oper')
backupargs = subparsers.add_parser('backup',
help = 'Perform a backup.',
parents = [commonargs, remoteargs])
listargs = subparsers.add_parser('list',
help = 'List available backups.',
parents = [commonargs, remoteargs])
listrepoargs = subparsers.add_parser('listrepos',
help = 'List availabile/configured repositories.',
parents = [commonargs])
initargs = subparsers.add_parser('init',
help = 'Initialise a repository.',
parents = [commonargs, remoteargs])
### OPERATION-SPECIFIC OPTIONS ###
# CREATE ("backup") #
backupargs.add_argument('-a',
'--archive',
default = _date,
dest = 'archive',
help = ('The name of the archive. Default: \033[1m{0}\033[0m (dynamic)').format(_date))
backupargs.add_argument('-s',
'--stagedir',
default = _stagedir,
dest = 'stagedir',
help = ('The directory used for staging temporary files, ' +
'if necessary. Default: \033[1m{0}\033[0m').format(_stagedir))
backupargs.add_argument('-m',
'--mysqldir',
default = _mysqldir,
dest = 'mysqldir',
help = ('The path to where MySQL dumps should go. ' +
'Default: \033[1m{0}\033[0m').format(_mysqldir))
# DISPLAY/OUTPUT ("list") #
listargs.add_argument('-a',
'--archive',
dest = 'archive',
default = False,
help = 'If specified, will list the *contents* of the given archive name.')
listargs.add_argument('-l',
'--limit',
dest = 'numlimit',
type = int,
default = '5',
help = ('If specified, constrain the outout to this number of ' +
'results each repo. Default is \033[1m5\033[0m, use 0 for unlimited. ' +
'See \033[1m-H/--list-help\033[0m'))
listargs.add_argument('-s',
'--sort',
dest = 'sortby',
choices = ['newest', 'oldest'],
default = 'oldest',
help = ('The order to sort the results by. See \033[1m-H/--list-help\033[0m. ' +
'Default: \033[1moldest\033[0m'))
listargs.add_argument('-x',
'--invert',
dest = 'invert',
action = 'store_true',
help = 'Invert the order of results. See \033[1m-H/--list-help\033[0m.')
listargs.add_argument('-o',
'--old',
dest = 'old',
action = 'store_true',
help = ('Instead of grabbing the latest results, grab the earliest results. ' +
'This differs from \033[1m-s/--sort\033[0m. See \033[1m-H/--list-help\033[0m.'))
listargs.add_argument('-H',
'--list-help',
dest = 'moarhelp',
action = 'store_true',
help = 'Print extended information about how to manage the output of listing and exit.')
return(args)
def main():
rawargs = parseArgs()
args = vars(rawargs.parse_args())
args['cfgfile'] = os.path.abspath(os.path.expanduser(args['cfgfile']))
if not args['oper']:
rawargs.print_help()
exit(0)
if 'moarhelp' in args.keys() and args['moarhelp']:
printMoarHelp()
# The "Do stuff" part
bak = Backup(args)
if args['oper'] == 'list':
bak.printer()
elif args['oper'] == 'listrepos':
bak.listRepos()
elif args['oper'] == 'backup':
2017-10-25 03:08:56 -04:00
bak.create()
elif args['oper'] == 'init':
bak.createRepo()
return()
if __name__ == '__main__':
main()