#!/usr/bin/env bash # Reclaim storage from a backshift repo, by deleting files older than a user-specified threshold set -eu set -o pipefail > /dev/null 2>&1 || true save_directory="" days="" # BTW, we intentionally don't quote $verbose throughout, because we don't want it to expand to a null string when # empty. verbose="" function usage { retval="$1" ( echo "Usage: $0 --save-directory /save/directory --days num_days_to_keep -v --help" echo "Traverse /save/directory looking for storage that can be reclaimed" echo echo " --save-directory /r/e/p/o specify where the repository is" echo " --days days eliminate saveset summaries and chunks older than days." echo " 0 means expire everything." echo " 1 means expire things 1 day old or more." echo " --verbose operate in verbose mode. Also -v" echo " --help output this message. Also -h" ) 1>&2 exit "$retval" } while [ "$#" -ge 1 ] do case "$1" in --save-directory) save_directory="$2" shift ;; --days) days="$2" shift ;; --verbose|-v) verbose="-v" ;; --help|-h) usage 0 ;; *) echo "$0: Illegal option: $1" 1>&2 usage 1 ;; esac shift done if [ "$save_directory" = "" ] then echo "$0: --save-directory is a required option" 1>&2 usage 1 fi if [ "$days" = "" ] then echo "$0: --days is a required option" 1>&2 usage 1 fi # We don't really _need_ to chdir here - but it's a good way to test for a directory or a symlink pointing at a directory. # Hence the subshell; that way, we chdir, but the change is immediately lost. if ! (set -eu; cd "$save_directory" > /dev/null 2>&1) then echo "$0: $save_directory does not exist" 1>&2 usage 1 fi # Deal with summaries and files directories seconds_old=$(echo "$days" \* 24 \* 60 \* 60 | bc) time_now=$(python -c 'import time; print(int(time.time()))') threshold_seconds=$(echo "$time_now" - "$seconds_old" | bc) if ! find "$save_directory"/summaries -maxdepth 1 -name '*_*_*_*_*' -print | \ while read summaries_filename do set -eu set -o pipefail > /dev/null 2>&1 || true files_directory="$save_directory/files/""$(echo $summaries_filename | sed 's#^.*/\([^/]*\)$#\1#')" use_mtime=False finish_time=$(egrep '^finish_time\>' "$summaries_filename" | awk ' { print $2 }' | sed 's/\..*$//') case "$finish_time" in None|"") # No finish_time was defined, so use the modification time of the summaries file instead. use_mtime=True ;; esac if [ "$use_mtime" = True ] then finish_time=$(python -c 'import os; print(os.stat("'$summaries_filename'").st_mtime)' | sed 's/\..*$//') fi # This needs to be an integer comparison, hence the truncation(s) above. if [ "$finish_time" -lt "$threshold_seconds" ] then if ! rm $verbose "$summaries_filename" then echo "$0: rm $summaries_filename failed" 1>&2 exit 1 fi if ! rm $verbose -rf "$files_directory" then echo "$0: rm $files_directory failed" 1>&2 exit 1 fi fi done then echo "$0: Expiring summary files failed" 1>&2 exit 1 fi # Now deal with the chunk files, which is where most of the storage is. # # This is the part that takes quite a while. # # This comes pretty close to what we really want - except it won't remove the recently-emptied parent of an empty child - that is, # not until the next time this script is run. If we used -exec instead of xargs it might, but that'd be much slower for large # repositories (save directories). days_minus_1=$(($days - 1)) find "$save_directory"/chunks -mindepth 1 \( \( -type f -mtime +"$days_minus_1" -print0 \) -o \( -type d -empty -print0 \) \) | \ xargs -0 rm $verbose -rf