#!/bin/bash

# We use ssh-copy-id

# this is far from a perfect estimate, but it's usually pretty decent

set -eu
set -o pipefail

host=""
directory=""
compressor="cat"
do_ssh_copy_id=True

function usage
{
    retval="$1"
	case "$retval" in
		0)
			;;
		*)
			exec 1>&2
			;;
	esac
	echo "Usage: $0"
	echo "--host                The hostname to ssh to"
	echo "--directory           A directory to ssh to and du+tar. Note that ~ does not work in this"
	echo "--compressor          Specify what program to compress with, if any. Valid options include cat, gzip, bunzip2 and xz"
	echo "--skip-ssh-copy-id    Do not try to establish credentials on remote host"
	echo "--help                This stuff"
	echo
	echo "Tar up a remote directory hierarchy and pipe it through gprog, using du to get an estimate of how much data will need"
	echo "to be copied."
	echo
	echo "Please note that this script will ssh-copy-id, so it has the side effect of setting up passwordless ssh."
    exit "$retval"
}

while [ "$#" -ge 1 ]
do
    if [ "$1" = --host ]
    then
		host="$2"
        shift
    elif [ "$1" = --directory ]
    then
		directory="$2"
        shift
    elif [ "$1" = --compressor ]
    then
		compressor="$2"
        shift
    elif [ "$1" = --skip-ssh-copy-id ]
    then
        do_ssh_copy_id=False
    elif [ "$1" = --help ]
    then
        usage 0
    else
        echo "$0: Illegal option: $1" 1>&2
        usage 1
    fi
    shift
done

case "$compressor" in
	cat|gzip|bunzip2|xz)
		# Good, we have a valid compression program.
		;;
	*)
		echo "$0: Error: Unrecognized compressor specified" 1>&2
		exit 1
		;;
esac

case "$do_ssh_copy_id" in
	True)
		ssh-copy-id "$host"
		;;
	False)
		;;
	*)
		echo "$0: internal error: \$do_ssh_copy_id neither True nor False" 1>&2
		exit 1
		;;
esac

# We want to expand on the client
# shellcheck disable=SC2029
if ! ssh "$host" "[ -d \"$directory\" ]"
then
	echo "$0: error: $directory on $host is not a directory" 1>&2
	exit 1
fi

# shellcheck disable=SC2029
estimate=$(ssh "$host" "du -skx \"$directory\"" \
	| python3 -c '
import sys
total = 0
for line in sys.stdin:
    total += int(line.split()[0])
print(total * 1024)')

echo "Estimate: $estimate bytes" 1>&2

tar=$(ssh "$host" 'PATH="$PATH":/usr/local/bin which gtar || which tar')

case "$compressor" in
	'cat')
		# shellcheck disable=SC2029
		ssh "$host" "cd \"$directory\" && $tar --create --sparse --one-file-system ." \
			| gprog --size-estimate "$estimate" --title "gprog-ssh-du-tar $directory"
		;;
	gzip|bzip2|xz)
		# We take advantage of the fact that most compression+uncompression tools support compressor -d for uncompression.
		# shellcheck disable=SC2029
		ssh "$host" "cd \"$directory\" && $tar --create --sparse --one-file-system . | \"$compressor\"" \
			| "$compressor" -d \
			| gprog --size-estimate "$estimate" --title "gprog-ssh-du-tar $directory"
		;;
	*)
		echo "$0: unrecognized compressor: $compressor" 1>&2
		exit 1
		;;
esac