#!/usr/bin/env bash2 # for debugging, makes bash verbose #et -x # treat unset variables as an error - really helps with debugging, and doesn't hurt the rest of the time either set -u # future enhancements: # # 1) verify # 2) Since sometimes permissions get messed up and not file content, # it'd possibly be valuable to: # - be able to verify permissions too, not just file content # - be able to verify permissions alone, and no file content. EG, # we might count how many files are owned by each user and # compare, and same for groups # 3) bzip2 compression # 4) rzip compression with chunkup # 5) metacompress? # 6) encryption? # 7) slowdown? # 8) Already have a -c mode for doing a check-stamp to figure out which files # to save, but these additional modes might be really nice: # I) Use an ultra-stale tripwire database to find what to backup # II) Use a user-provided date cutoff, and back up everything newer # III) Use a guessed date cutoff, based on the oldest # modification time in the root filesystem or something, and # back up everything newer # 9) local transfers? # # local, slice to slice transfers: # (cd /opt && gtar cflS - .) | reblock -e $(cd /opt && du -sk . | awk ' { print $1 }') $(expr 1024 \* 1024) 300 | (cd /newopt && gtar xfp -) # better rsync: # rsync -a --numeric-ids --relative --delete --progress --stats --one-file-system --rsh=ssh / /var/ /opt/ /spare/ /export/home/ root@seki.nac.uci.edu:~strombrg/backups/ark.oas/root/ # It's important to do all filesystems in one command, at least if you're putting them all into the same hierarchy. Also the --numeric-ids option keeps ownerships from getting messed up # --relative keeps all those filesystems from getting merged together # /dcs/packages/gnu/bin/rsync -aSv --one-file-system --delete --progress --stats --relative # --rsync-path=/dcs/packages/gnu/bin/rsync --rsh=ssh / /usr /var /export/home /users /oas /operations /naims /data # root@seki.nac.uci.edu:~strombrg/backups/sherlock1.oas/ # more specifically, we need GNU du for prog in awk rsync ssh reblock gzip gunzip df-local sort du do # note that we're using type, because which didn't give a useful exit # status if type "$prog" > /dev/null 2>&1 then : Good, we have it else echo Sorry, you will need "$prog" on your '$PATH' for "$0" to work 1>&2 echo properly.... 1>&2 exit 1 fi done echo Good, you appear to have all the required programs on your '$PATH'. 1>&2 if type gtar > /dev/null 2>&1 then TAR=gtar elif type tar > /dev/null 2>&1 then TAR=tar else echo Sorry, no tar program found '(I prefer gtar, but may be able use just "tar")' 1>&2 exit 1 fi export TAR # note that XPG du uses -x just like GNU du, but we're assuming that if we don't have a GNU du, then use -d instead of -x if [ -x /dcs/packages/gnu/bin/du ] then DU=/dcs/packages/gnu/bin/du dux=1 elif [ -x /usr/xpg4/bin/du ] then DU=/usr/xpg4/bin/du dux=1 else # try to determine -x-ability empirically DU="$(type -path du)" if "$DU" -skx /etc/passwd 2>&1 | grep -i illegal > /dev/null 2>&1 then dux=1 else dux=0 fi fi if [ "$dux" = 0 ] then echo Assuming "$DU" -skx does not work - will try du -skd instead '(-d unverified!)'. 1>&2 else echo Assuming "$DU" -skx works. 1>&2 fi if type stamp > /dev/null && type check-stamp > /dev/null && type elim-dirs > /dev/null && [ -f /var/adm/stamp.gz ] then check_stamp_ok=1 else check_stamp_ok=0 fi export dest="" export compress=0 export zopt="" export splitup=1 export incremental=0 export numfs=0 export delete=0 export f_given=0 export all=0 export tst=0 export check_stamp=0 export reblock_blocksize=-1 declare -a fs function usage { ( echo "Usage: $0 [-a] [-f filesystem_mount_point_or_other_directory] [-d user@dest.host.uci.edu:/dest/dir] [-z]" echo ' -a means to backup all local filesystems (as determined by df-local, which lists all filesystems of known local types)' if [ "$check_stamp_ok" != 0 ] then echo ' -c use check-stamp to only transmit files on system partitions that have changed since initial install time' else echo ' (disabled - prereqs not satisfied) -c use check-stamp to only transmit files on system partitions that have changed since initial install time' fi echo ' -f means to backup a specific filesystem. Repeat to backup additional filesystems at once' echo ' -d specifies where to do the backup to, EG root@foo.nac.uci.edu' echo ' -z says to compress the filetransfer, whether it is faster that way or not :). Right now, this -only- uses gzip, and -only- on the source host' echo ' -s says to -not- do a split-up copy, instead do one large GNU tar archive. The sense of this option was inverted Aug 24, 2005' echo ' -i says to do an incremental transfer (requires that -s not have been specified. Will give a running progress update, but not as clearly as non-incremental transfer)' echo ' -b blocksize says to use a reblock block size of blocksize. Conflicts with -i. Default is 256 megabytes' echo ' -D says incremental transfers should delete files that do not exist in the destination directory. Does not apply to absolute (non-incremental) transfers' echo ' -t test. just output the list of filesystems to back up, do not actually back up anything' echo echo 'Note that both -a and -f backups will stop at filesystem boundaries.' ) 1>&2 exit 1 } while [ "$#" -gt 0 ] do if [ "$1" = "-a" ] then if [ "$f_given" = 0 ] then all=1 shift else echo 'Sorry, you can use -f or -a, but not both' 1>&2 usage fi elif [ "$1" = "-c" ] then if [ "$check_stamp_ok" = 0 ] then echo Sorry, -c is disabled because I could not find the required programs and input exit 1 else check_stamp=1 fi shift elif [ "$1" = "-s" ] then splitup=0 shift elif [ "$1" = "-D" ] then delete=1 shift elif [ "$1" = "-i" ] then incremental=1 shift elif [ "$1" = "-t" ] then tst=1 shift elif [ "$1" = "-z" ] then compress=1 zopt="z" shift elif [ "$1" = "-f" ] then if [ "$all" = "0" ] then fs[$numfs]="$2" numfs=$[$numfs+1] shift shift f_given=1 else echo 'Sorry, you can use -f or -a, but not both' 1>&2 usage fi elif [ "$1" = "-b" ] then reblock_blocksize="$2" shift shift elif [ "$1" = "-d" ] then dest="$2" shift shift else usage fi done if [ "$[$all+$f_given+$check_stamp]" != 1 ] then echo 'Must specify exactly one of -a, -f or -c, not more than one, not none' 1>&2 usage fi if [ "$dest" = "" ] then echo 'Must specify a destinition account with -d' usage fi if [ "$delete" = 1 ] then delopt="--delete" if [ "$incremental" = 0 ] then echo "The deletion option, -D, only applies to incremental copies" 1>&2 exit 1 fi else delopt="" fi # a quick test to make sure the permissions aren't going to get all messed up by ssh'ing to an unprivileged account. This way # both gtar and rsync should "do the right thing" if [ "$splitup" != 0 ] then case "$dest" in root@*) ;; *) case "`whoami`" in root) # this really needs to be fixed. If you are root, but you ssh as someone else, things could still get screwed up : ;; *) ( echo echo 'Warning: you are not sshing into a root account, so permissions may get munged!!!' echo ) 1>&2 ;; esac ;; esac fi if [ "$incremental" = 1 ] && [ "$splitup" = 0 ] then echo 'Incremental transfer requires splitup (-i requires -s)' usage fi if [ "$incremental" = 1 ] && [ "$reblock_blocksize" != -1 ] then echo 'Sorry, -i conflicts with -b' usage fi if [ "$reblock_blocksize" = -1 ] then reblock_blocksize=$[256*1024] fi case "$dest" in *:*) ;; *) echo 'Must give a directory on -d option, EG: username@remote.host.uci.edu:/big/disk/directory' 1>&2 usage ;; esac if [ "$all" = 1 ] then # this should stuff these directories (mount points) into array fs, but it'll # probably choke on mount points with whitespace in them. You may still be able # to do mount points with whitespace in them via -f though fs=($(df-local | \ sort -r -n -k 3 | \ awk ' { print $6 }' | \ egrep '^/')) fi function toast_uname { if [ 1 = 1 ] then # another reason to recode this in python someday... python -c ' import sys line=sys.stdin.readline() if line[0:1] == "/": sys.stdout.write(line) while 1: line=sys.stdin.readline() if not line: break sys.stdout.write(line) ' else # is this causing a hang? # -sometimes-, the first line of check-stamp output will # have a machine description, starting with the machine's # uname. Here, were assuming that if the first line # doesn't start with a /, then it's a machine description # line that should not be echo'd read first_line case "$first_line" in /*) ( echo "$first_line" cat ) ;; *) cat ;; esac fi } if [ "$check_stamp" = 1 ] then echo ( echo "Generating a list of changed files. This can take a" echo "long time. Suggest using 'notify-when-up -s check-stamp'" echo "so you will know when this has completed, so you" echo "will be ready to issue any passwords required..." ) | fmt fs=($(check-stamp | \ toast_uname | \ elim-dirs)) fi # calculate $numfs numfs="${#fs[@]}" if [ "$check_stamp" = 1 ] then echo echo Backing up "$numfs" individual files, not a list of filesystems 1>&2 else echo echo Number of filesystems to backup is "$numfs" 1>&2 fi #function fses #{ # if [ "$check_stamp" = 0 ] # then # # if this breaks on your system, rewrite it to an awk loop :). Or better, get GNU "seq". # # if it weren't for the need for the trailing slash with rsync, we could just use ${fs[@]} ! # for i in `seq 0 $[$numfs-1]` # do # echo "${fs[i]}"/ # done # else # check-stamp | elim-dirs # fi # echo #} function files_or_dirs { # if this breaks on your system, rewrite it to an awk loop :). Or better, get GNU "seq". # if it weren't for the need for the trailing slash with rsync, we could just use ${fs[@]} ! if [ "$check_stamp" != 0 ] then # du may get confused if we use a trailing / on files that aren't directories, but none of these should be directories due # to the preceeding elim-dirs # # be careful here - the number of files to handle may be too long to generate a list of and iterate over. So don't use # `seq`! i=0 while : do echo "${fs[i]}" i=$[$i+1] if [ "$i" = $numfs ] then break fi done else # rsync may get confused if we do not use a trailing / on directories. `seq` is fine here too. echo To back up ${fs[@]} in files_or_dirs 1>&2 for i in `seq 0 $[$numfs-1]` do echo "${fs[i]}"/ echo "${fs[i]}"/ 1>&2 done #echo fi } function total { awk 'BEGIN { total=0 } { total=total+$1 } END { print total }' } function estimate { # (GNU) du -skx says to: # -s: give a total # -s: report in kilobytes - which are the units that reblock -e expects # -x: stay in a single filesystem if [ "$dux" = 1 ] then # apply a scaling factor, since estimating transfer size based on stat'ing tons of little files is much less accurate than # estimating based on du'ing a small number of file hierarchies. This factor was determined empirically on a Solaris 8 # 10/01 system unadjusted="$(cd / && files_or_dirs | xargs "$DU" -sxk | total)" scale=1.149 # yet another reason to rewrite this in python someday :). Of course, we could use bc and sed for this... python -c "print int($unadjusted*$scale)" else cd / && files_or_dirs | xargs "$DU" -sdk | total fi } if [ "$tst" = 1 ] then echo Filesystems that would have been backed up are, in order from largest usage to least usage: echo "${fs[@]}" exit 0 fi function announce { ( # variables: # 1) filesystems/dirs or changed files? # 2) splitup or one large archive? # 3) compressed/uncompressed? # 4) incremental/absolute? # 1) filesystems/dirs or changed files? if [ "$check_stamp" != 0 ] then echo Backing up "${#fs[@]}" changed files else echo Backing up local filesystems/directories "$(files_or_dirs)" '(and below)' fi echo to remote account "$dstaccount", directory "$dstdir", # 2) splitup or one large archive? if [ "$splitup" = 1 ] then echo as individual files, # 4) incremental/absolute? if [ "$incremental" != 0 ] then echo incrementally, else echo absolutely, fi if [ "$compress" != 0 ] then echo transmitted compressed but uncompressed again on the other end else echo not transmitted with compression, and not compressed on the other end fi else echo as one large if [ "$compress" != 0 ] then echo compressed archive named "$dstfile.gz" else echo not compressed archive named "$dstfile" fi fi if [ "$check_stamp" != 0 ] && [ "$incremental" = 0 ] then echo "(size estimate is scaled due to estimation error when du'ing lots of small files. Scaling factor determined on" echo "a Solaris 8 10/01 machine - YMMV :)" fi ) | fmt echo } dstaccount="$(echo $dest | awk -F':' ' { print $1 }')" dstdir="$(echo $dest | awk -F':' ' { print $2 }')" echo echo echo if [ "$splitup" = 1 ] then : split up if [ "$incremental" = 1 ] then : incremental announce # if [ "$compress" = 1 ] # then # rsync -a$zopt --numeric-ids --relative $delopt --progress --stats --one-file-system --rsync-path=/dcs/packages/gnu/bin/rsync --rsh=ssh $(files_or_dirs) "$dstaccount":"$dstdir" # else # rsync -a --numeric-ids --relative $delopt --progress --stats --one-file-system --rsync-path=/dcs/packages/gnu/bin/rsync --rsh=ssh $(files_or_dirs) "$dstaccount":"$dstdir" # fi #rsync -a$zopt --numeric-ids --relative $delopt --progress --stats --one-file-system --rsync-path=/dcs/packages/gnu/bin/rsync --rsh=ssh $(files_or_dirs) "$dstaccount":"$dstdir" #set -x # actually, --files-from is supposed to imply --relative I guess, but it shouldn't hurt to give both # -p, --perms preserve permissions # -o, --owner preserve owner (root only) # -g, --group preserve group # -D, --devices preserve devices (root only) # -t, --times preserve times # -O, --omit-dir-times omit directories when preserving times # -S, --sparse handle sparse files efficiently files_or_dirs | rsync -a$zopt --perms --owner --group --devices --times --omit-dir-times --sparse --recursive --numeric-ids --relative $delopt --files-from=- --progress --stats --one-file-system --rsync-path=/dcs/packages/gnu/bin/rsync --rsh=ssh / "$dstaccount":"$dstdir" #set +x else : not incremental # not incremental announce # could this if be moved inside the pipelines, without introducing a cat when there isn't a compression to be done? if [ "$compress" = 1 ] then : compress # we cannot use (cd / && files_or_dirs... because files_or_dirs is a shell function, which is not available in the # subshell. Could we export the shell function? Probably not, and I don't feel like experimenting just now if ! cd / then echo Failed to cd / 1>&2 exit 1 fi files_or_dirs | $TAR -S --numeric-owner --one-file-system -f - --files-from - -c | \ reblock -e $(estimate) $reblock_blocksize 300 | \ gzip | \ ssh "$dstaccount" "PATH=/dcs/bin:$PATH cd $dstdir && gunzip -c | $TAR xp --numeric-owner -f -" else : do not compress if ! cd / then echo Failed to cd / 1>&2 exit 1 fi # set -x files_or_dirs | tee /dev/tty | $TAR -S --numeric-owner --one-file-system -f - --files-from - -c | \ reblock -e $(estimate) $reblock_blocksize 300 | \ ssh "$dstaccount" "PATH=/dcs/bin:$PATH cd $dstdir && $TAR xp --numeric-owner -f -" # set +x fi fi else : do not split up # don't split up the files dstfile="$(fses) | sed -e 's#^/$#root#' -e 's#/#-#g' -e 's/-/,/g'" announce if [ "$compress" = 1 ] then : compress (cd / && files_or_dirs | \ $TAR -S --numeric-owner --one-file-system -f - --files-from - -c) | \ reblock -e $(estimate) $reblock_blocksize 300 | \ gzip | \ ssh "$dstaccount" "cd $dstdir && cat > $dstfile.gz" else : do not compress (cd / && files_or_dirs | \ $TAR -S --numeric-owner --one-file-system -f - --files-from - -c) | \ reblock -e $(estimate) $reblock_blocksize 300 | \ ssh "$dstaccount" "cd $dstdir && cat > $dstfile" fi fi echo "transfer ending at `date`"