#!/usr/bin/python3

"""
Copy files from one hierarchy to another, but only if the files don't yet exist, or have different lengths.

Really, rsync should do this, but I have a situation in which it doesn't.
"""

import os
import sys
import errno
import shutil
# import pprint


def usage(retval):
    """Output a usage message and exit with appropriate exit code."""
    sys.stderr.write('Sync one filesystem hierarchy to another, based on presence/absence and file lengths\n')
    sys.stderr.write('Usage: %s\n' % (sys.argv[0], ))
    sys.stderr.write('\t--source /path/to/source\n')
    sys.stderr.write('\t--destination /path/to/destination\n')
    sys.stderr.write('\t--dry-run\n')
    sys.stderr.write('\t--verbose\n')
    sys.stderr.write('\t--quiet-inhale\n')
    sys.stderr.write('\t--extensions mp3,ogg,flac (do not give a ".")\n')
    sys.stderr.write('\t--help\n')
    sys.exit(retval)


def file_to_process(filename, extensions):
    """
    Return True if this is a file to process.

    More specifically, True if filename ends with one of the extensions in extensions variable, or extensions variable is None.
    """
    if extensions is None:
        return True
    else:
        for extension in extensions:
            if filename.endswith('.%s' % extension):
                return True
        return False


def make_used(*args):
    """Persuade linters that args are used."""
    assert True or args


def obtain_files(name, base, extensions, quiet_inhale):
    """Get a list of files in one of our hierarchies."""
    files = set()

    fileno = 0
    os.chdir(base)
    for root, directories, filenames in os.walk('.'):
        make_used(directories)
        for filename in filenames:
            if file_to_process(filename, extensions):
                relative_path = os.path.join(root, filename)
                files.add(relative_path)
                fileno += 1
                if fileno % 100 == 0 and not quiet_inhale:
                    sys.stderr.write('Inhaled %d %s files...\n' % (fileno, name))

    return files


def safe_mkdir(pathname):
    """Create a directory.  If it already exists, don't stress about it."""
    try:
        os.mkdir(pathname)
    except OSError as extra:
        if extra.errno == errno.EEXIST:
            pass
        else:
            raise


def my_makedirs(pathname):
    """Create a directory and all parent directories.  If they parent directories already exist, no big deal."""
    if pathname.startswith('/'):
        # absolute path
        dirs = ['/']
        dirs.extend(pathname.split('/'))
    else:
        dirs = pathname.split('/')

    aggregate = []
    for directory in dirs:
        aggregate.append(directory)
        safe_mkdir('/'.join(aggregate))


def copy(verbose, dry_run, why, source_base, source_file, destination_base, destination_file):
    # pylint: disable=too-many-arguments
    """Copy one file to another."""
    file_to_create = os.path.join(destination_base, destination_file)
    file_to_copy_from = os.path.join(source_base, source_file)
    if dry_run:
        sys.stderr.write('%s: Would copy (%s) %s\n' % (sys.argv[0], why, source_file))
    else:
        dirname = os.path.dirname(file_to_create)
        my_makedirs(dirname)
        if verbose:
            sys.stderr.write('%s: copying (%s) %s\n' % (sys.argv[0], why, source_file))
        shutil.copy(file_to_copy_from, file_to_create)


def delete(verbose, dry_run, why, destination_base, destination_file):
    """Delete a file."""
    file_to_delete = os.path.join(destination_base, destination_file)
    if dry_run:
        sys.stderr.write('%s: Would delete (%s) %s\n' % (sys.argv[0], why, destination_file))
    else:
        if verbose:
            sys.stderr.write('%s: deleting (%s) %s\n' % (sys.argv[0], why, destination_file))
        os.unlink(file_to_delete)


def do_deletions(verbose, dry_run, destination_base, only_in_dest):
    """We delete files first to clear up space."""
    print('processing deletions')
    for filename in sorted(list(only_in_dest)):
        delete(verbose, dry_run, 'only in dest', destination_base, filename)


def do_missing_copies(verbose, dry_run, source_base, destination_base, only_in_source):
    """Copy files that are in the source but not in the destination."""
    print('processing copies due to presence only in source')
    for filename in sorted(list(only_in_source)):
        copy(verbose, dry_run, 'only in source', source_base, filename, destination_base, filename)


def do_different_lengths(verbose, dry_run, source_base, destination_base, in_both):
    """Copy files that exist in both but have different lengths."""
    print('processing copies due to presence in both but different lengths')
    for filename in sorted(list(in_both)):
        source_filename = os.path.join(source_base, filename)
        destination_filename = os.path.join(destination_base, filename)

        source_length = os.path.getsize(source_filename)
        destination_length = os.path.getsize(destination_filename)

        if source_length == destination_length:
            # They already have the same length - just pass through
            pass
        else:
            copy(verbose, dry_run, 'length unequal', source_base, source_filename, destination_base, destination_filename)


def main():
    # pylint: disable=too-many-locals,too-many-branches,too-many-statements
    """Get the ball rolling."""
    source_base = None
    destination_base = None
    dry_run = False
    quiet_inhale = False
    verbose = False
    extensions = None
    while sys.argv[1:]:
        if sys.argv[1] == '--source':
            source_base = sys.argv[2]
            del sys.argv[1]
        elif sys.argv[1] == '--destination':
            destination_base = sys.argv[2]
            del sys.argv[1]
        elif sys.argv[1] == '--extensions':
            extensions = sys.argv[2].split(',')
            del sys.argv[2]
        elif sys.argv[1] == '--dry-run':
            dry_run = True
        elif sys.argv[1] == '--quiet-inhale':
            quiet_inhale = True
        elif sys.argv[1] == '--verbose':
            verbose = True
        elif sys.argv[1] in ('-h', '--help'):
            usage(0)
        else:
            sys.stderr.write('%s: No such option: %s\n' % (sys.argv[0], sys.argv[1]))
            usage(1)
        del sys.argv[1]

    preflight_check_good = True

    if source_base is None:
        sys.stderr.write('%s: --source is a required option\n' % (sys.argv[0], ))
        preflight_check_good = False

    if destination_base is None:
        sys.stderr.write('%s: --destination is a required option\n' % (sys.argv[0], ))
        preflight_check_good = False

    if not os.path.isabs(source_base):
        sys.stderr.write('%s: --source must give an absolute path\n' % (sys.argv[0], ))
        preflight_check_good = False

    if not os.path.isabs(destination_base):
        sys.stderr.write('%s: --destination must give an absolute path\n' % (sys.argv[0], ))
        preflight_check_good = False

    if not preflight_check_good:
        raise SystemExit('One or more items in the preflight check failed')

    source_files = obtain_files('source', source_base, extensions, quiet_inhale)
    destination_files = obtain_files('destination', destination_base, extensions, quiet_inhale)

    sys.stderr.write('Got a total of %d source files and %d destination files\n' % (len(source_files), len(destination_files)))

    only_in_source = source_files - destination_files
    only_in_dest = destination_files - source_files
    in_both = source_files & destination_files

    del source_files
    del destination_files

    do_deletions(verbose, dry_run, destination_base, only_in_dest)

    do_missing_copies(verbose, dry_run, source_base, destination_base, only_in_source)

    do_different_lengths(verbose, dry_run, source_base, destination_base, in_both)


main()