#!/bin/bash

# this is far from a perfect estimate, but it's usually pretty decent

compressor=cat

function usage
{
    retval="$1"
    (
        echo "Usage: $0"
        echo "--compress            Tar output is compressed using gzip"
        echo "--compressor c        Tar output is compressed using compression command 'c'"
        echo "--directories         A list of directories to du and tar - must be the last option"
        echo "--help                This stuff"
    ) 1>&2
    exit "$retval"
}

while [ "$#" -ge 1 ]
do
    if [ "$1" = --directories ]
    then
        shift
        break
    elif [ "$1" = --compress ]
    then
        compressor=gzip
    elif [ "$1" = --compressor ]
    then
        compressor="$2"
        shift
    elif [ "$1" = --help ]
    then
        usage 0
    else
        echo "$0: Illegal option: $1" 1>&2
        usage 1
    fi
    shift
done

estimate=$(for i in "$@"
do
    du -skx "$i"
done | \
    awk ' { print $1 } ' | \
    (
        echo '(' | tr -d '\012'
        tr '\012' '+'
        echo '0) * 1024'
    ) | \
    bc)

echo "Estimate: $estimate bytes" 1>&2

case "$compressor" in
    cat)
        tar --create --sparse --one-file-system "$@" | \
            reblock -e "$estimate" -b 65536 -t 300
        ;;
    *)
        tar --create --sparse --one-file-system "$@" | \
            reblock -e "$estimate" -b 65536 -t 300 | \
            eval "$compressor"
        ;;
esac