#!/usr/bin/python3 """ Copy files from one hierarchy to another, but only if the files don't yet exist, or have different lengths. Really, rsync should do this, but I have a situation in which it doesn't. """ import os import sys import errno import shutil # import pprint def usage(retval): """Output a usage message and exit with appropriate exit code.""" sys.stderr.write('Sync one filesystem hierarchy to another, based on presence/absence and file lengths\n') sys.stderr.write('Usage: %s\n' % (sys.argv[0], )) sys.stderr.write('\t--source /path/to/source\n') sys.stderr.write('\t--destination /path/to/destination\n') sys.stderr.write('\t--dry-run\n') sys.stderr.write('\t--verbose\n') sys.stderr.write('\t--quiet-inhale\n') sys.stderr.write('\t--extensions mp3,ogg,flac (do not give a ".")\n') sys.stderr.write('\t--help\n') sys.exit(retval) def file_to_process(filename, extensions): """ Return True if this is a file to process. More specifically, True if filename ends with one of the extensions in extensions variable, or extensions variable is None. """ if extensions is None: return True else: for extension in extensions: if filename.endswith('.%s' % extension): return True return False def make_used(*args): """Persuade linters that args are used.""" assert True or args def obtain_files(name, base, extensions, quiet_inhale): """Get a list of files in one of our hierarchies.""" files = set() fileno = 0 os.chdir(base) for root, directories, filenames in os.walk('.'): make_used(directories) for filename in filenames: if file_to_process(filename, extensions): relative_path = os.path.join(root, filename) files.add(relative_path) fileno += 1 if fileno % 100 == 0 and not quiet_inhale: sys.stderr.write('Inhaled %d %s files...\n' % (fileno, name)) return files def safe_mkdir(pathname): """Create a directory. If it already exists, don't stress about it.""" try: os.mkdir(pathname) except OSError as extra: if extra.errno == errno.EEXIST: pass else: raise def my_makedirs(pathname): """Create a directory and all parent directories. If they parent directories already exist, no big deal.""" if pathname.startswith('/'): # absolute path dirs = ['/'] dirs.extend(pathname.split('/')) else: dirs = pathname.split('/') aggregate = [] for directory in dirs: aggregate.append(directory) safe_mkdir('/'.join(aggregate)) def copy(verbose, dry_run, why, source_base, source_file, destination_base, destination_file): # pylint: disable=too-many-arguments """Copy one file to another.""" file_to_create = os.path.join(destination_base, destination_file) file_to_copy_from = os.path.join(source_base, source_file) if dry_run: sys.stderr.write('%s: Would copy (%s) %s\n' % (sys.argv[0], why, source_file)) else: dirname = os.path.dirname(file_to_create) my_makedirs(dirname) if verbose: sys.stderr.write('%s: copying (%s) %s\n' % (sys.argv[0], why, source_file)) shutil.copy(file_to_copy_from, file_to_create) def delete(verbose, dry_run, why, destination_base, destination_file): """Delete a file.""" file_to_delete = os.path.join(destination_base, destination_file) if dry_run: sys.stderr.write('%s: Would delete (%s) %s\n' % (sys.argv[0], why, destination_file)) else: if verbose: sys.stderr.write('%s: deleting (%s) %s\n' % (sys.argv[0], why, destination_file)) os.unlink(file_to_delete) def do_deletions(verbose, dry_run, destination_base, only_in_dest): """We delete files first to clear up space.""" print('processing deletions') for filename in sorted(list(only_in_dest)): delete(verbose, dry_run, 'only in dest', destination_base, filename) def do_missing_copies(verbose, dry_run, source_base, destination_base, only_in_source): """Copy files that are in the source but not in the destination.""" print('processing copies due to presence only in source') for filename in sorted(list(only_in_source)): copy(verbose, dry_run, 'only in source', source_base, filename, destination_base, filename) def do_different_lengths(verbose, dry_run, source_base, destination_base, in_both): """Copy files that exist in both but have different lengths.""" print('processing copies due to presence in both but different lengths') for filename in sorted(list(in_both)): source_filename = os.path.join(source_base, filename) destination_filename = os.path.join(destination_base, filename) source_length = os.path.getsize(source_filename) destination_length = os.path.getsize(destination_filename) if source_length == destination_length: # They already have the same length - just pass through pass else: copy(verbose, dry_run, 'length unequal', source_base, source_filename, destination_base, destination_filename) def main(): # pylint: disable=too-many-locals,too-many-branches,too-many-statements """Get the ball rolling.""" source_base = None destination_base = None dry_run = False quiet_inhale = False verbose = False extensions = None while sys.argv[1:]: if sys.argv[1] == '--source': source_base = sys.argv[2] del sys.argv[1] elif sys.argv[1] == '--destination': destination_base = sys.argv[2] del sys.argv[1] elif sys.argv[1] == '--extensions': extensions = sys.argv[2].split(',') del sys.argv[2] elif sys.argv[1] == '--dry-run': dry_run = True elif sys.argv[1] == '--quiet-inhale': quiet_inhale = True elif sys.argv[1] == '--verbose': verbose = True elif sys.argv[1] in ('-h', '--help'): usage(0) else: sys.stderr.write('%s: No such option: %s\n' % (sys.argv[0], sys.argv[1])) usage(1) del sys.argv[1] preflight_check_good = True if source_base is None: sys.stderr.write('%s: --source is a required option\n' % (sys.argv[0], )) preflight_check_good = False if destination_base is None: sys.stderr.write('%s: --destination is a required option\n' % (sys.argv[0], )) preflight_check_good = False if not os.path.isabs(source_base): sys.stderr.write('%s: --source must give an absolute path\n' % (sys.argv[0], )) preflight_check_good = False if not os.path.isabs(destination_base): sys.stderr.write('%s: --destination must give an absolute path\n' % (sys.argv[0], )) preflight_check_good = False if not preflight_check_good: raise SystemExit('One or more items in the preflight check failed') source_files = obtain_files('source', source_base, extensions, quiet_inhale) destination_files = obtain_files('destination', destination_base, extensions, quiet_inhale) sys.stderr.write('Got a total of %d source files and %d destination files\n' % (len(source_files), len(destination_files))) only_in_source = source_files - destination_files only_in_dest = destination_files - source_files in_both = source_files & destination_files del source_files del destination_files do_deletions(verbose, dry_run, destination_base, only_in_dest) do_missing_copies(verbose, dry_run, source_base, destination_base, only_in_source) do_different_lengths(verbose, dry_run, source_base, destination_base, in_both) main()