#!/usr/local/bin/bash

#######################################################################
# SHELL PORTABILITY
#######################################################################
# Should run with Bash 3.0 or 2.05b, and likely other bash versions as well.
#
# ksh doesn't grok bash arrays, so "#!/usr/bin/env ksh" is a no-go
#
# I'm guessing most to all Bourne shells will barf on this, since even
# ksh doesn't like it

#######################################################################
# KNOWN BUGS
#######################################################################
# 1) If a write times out, then the corresponding read will be skipped -
#    but it may prove that the corresponding read would've been a good
#    number
# 2) No effort is made at all to ensure that the mount options requested
#    are the ones that actually end up in effect.  If soemthing strange
#    happens on the mount, then something strange will happen on the
#    performance test
# 3) A lot of terminal emulators, including mrxvt and gnome-terminal,
#    appear to have problems with programs like reblock that output many
#    many lines.  The problem may or may not be due to the use of
#    carriage returns again and again.  konsole -might- not have the
#    problem, but it's just not as convenient as mrxvt IMO

#######################################################################
# TUNABLEs
#######################################################################
# (some of these should become command line options someday)
#
# "$remote" should be host:path on the NFS server
# 
# "$transaction_maxsecs" should be the number of seconds to wait for a
# mount, or a data transfer or a umount
#
# "$RESULTDIR" should be the directory to put results of the tests in
#
# "$nummeg" should be the number of megabytes to read or write for
# performance testing.  You need to be able to read or write roughly
# "$nummeg" megabytes of data in "$transaction_maxsecs" in the good
# cases.  If you cannot write this much that fast, then the write and
# read will be ignored.  If you cannot read this much that fast, the
# read will be ignored.
#
# The "verify_mount" function can verify that your filesystem is mounted
# with the correct rsize and wsize, at least on AIX 5.1 (NFS client).
# You may want to adapt it to your *ix variant, or you can just change
# the case statement to always return true.
#
# The "verify_network_quiesence" function can tell when your NIC has
# gone (mostly) silent.  You may want to adapt it to run on your system
# (mostly just a matter of changing the interface name on the -i option
# to tcpdump to a/the name seen in ifconfig -a), or as with
# verify_mount, just change the case statement to always return true.

#et -x

set -u

# with these options:
# bg,hard,intr,rsize=8192,wsize=8192
# we got 50Mbps over NFS to ext3 from esmf04d to esmft2
# I suspect UDP may be faster; we're using tcp here.  Not sure why AIX
# isn't reporting that.

function usage
{
	{
		echo "Usage: $0 [-g] [-s]"
		echo '-h says to give usage help'
		echo '-g says to generate results'
		echo '-s says to summarize the results, as they are generated'
#		echo '-r says to rank the results'
		exit "$1"
	} 1>&2
}

generate=0
summarize=0
rank=0

while [ "$#" -gt 0 ]
do
	if [ "$1" = "-g" ]
	then
		generate=1
		shift
	elif [ "$1" = "-s" ]
	then
		summarize=1
		shift
#	elif [ "$1" = "-r" ]
#	then
#		rank=1
#		shift
	elif [ "$1" = "-h" ]
	then
		shift
		usage 0
	else
		usage 1
	fi
done

if [ "$[$generate+$summarize+$rank]" != 1 ]
then
	echo "$0: Must specify exactly one of -g or -s" 1>&2
	usage 1
fi

# the filesystem to mount, test, umount again and again:
remote=esmft1d:/qfs1/thestuff
#remote=esmft1d:/nfs_test
#remote=seki.nac.uci.edu:/sdb1/foo

transaction_maxsecs=$[60*15]

export PATH=/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin:$HOME/bin:/usr/local/bin
case "`uname -n`" in
	@esmf04m)
		# pick up some 64 bit executables
		export PATH=/u/strombrg/src/fileutils/fileutils-4.1/src:$PATH
		type -all ls
		type -all rm
		;;
esac

function get_mount_point
{
	#export NTTMPDIR=/mnt2/double-up/qfs+nfs+test1
	dirno=1
	while :
	do
		NTTMPDIR=/mnt2/qfs+nfs+test${dirno}
		if [ -d $NTTMPDIR ]
		then
			dirno=$[$dirno+1]
		else
			break
		fi
	done
	echo Using NTTMPDIR of $NTTMPDIR 1>&2
	if ! mkdir -p "$NTTMPDIR"
	then
		echo Erroring creating directory "$NTTMPDIR" 1>&2
		exit 1
	fi
	echo "$NTTMPDIR"
}

NTTMPDIR="$(get_mount_point)"

if [ "$TMPDIR" = "" ]
then
	TMPDIR=/tmp
fi
export RESULTDIR="$TMPDIR/NFS-TEST-RESULTS"
mkdir -p "$RESULTDIR"

function ranking1
{
	# this is my first attempt at a function that will rank (6,6) higher
	# than (10,2) or (2,10)
	# function returns avg(readtime,writetime) + abs(readtime-writetime)
	if [ "$1" = "" ]
	then
		readtime="0"
	else
		readtime="$1"
	fi
	if [ "$2" = "" ]
	then
		writetime="0"
	else
		writetime="$2"
	fi
	sum="$( ( echo scale=2; echo $readtime + $writetime ) | bc)"
#	echo sum is $sum 1>&2
	average="$( ( echo scale=3; echo $sum / 2 ) | bc)"
#	echo average is $average 1>&2
	abssum="$( ( echo scale=2; echo $readtime - $writetime ) | bc | sed 's/^-//')"
#	echo abssum is $abssum 1>&2
	result="$( ( echo scale=2; echo $average + $abssum ) | bc)"
#	echo result is $result 1>&2
	echo "$result"
}

function numeric
{
	string="$1"
	if [ "$string" = "" ]
	then
		return 1
	fi
	if [ "$(echo $string | sed 's/[0-9\.]*//')" = "" ]
	then
		return 0
	fi
	return 1
}

if [ "$summarize" != 0 ]
then
	if ! cd "$RESULTDIR"
	then
		echo Sorry, failed to cd to "$RESULDIR" 1>&2
		exit
	fi
	for prog in grep python clear grep mtee wc cut modtime highest cat egrep awk sleep
	do 
		# note that we're using type, because which didn't give a useful exit
		# status
		if type "$prog" > /dev/null 2>&1
		then
			: Good, we have it
		else
			echo Sorry, you will need "$prog" on your '$PATH' for $0 to work 1>&2
			echo properly.... 1>&2
			exit 1
		fi
	done
	echo Good, you appear to have all the required programs on your '$PATH'. 1>&2
	while : 
	do
		clear
		pwd
		echo
		for i in Writing Reading
		do
			echo "======> $i in isolation (read protocol!=write protocol, read version!=write version, rsize!=wsize)"
			egrep . $(ls *$i*) /dev/null | \
				egrep -vi "timed out|failed" | \
				mtee \
					'echo Number of measurements: $(wc -l)' \
					'echo Average number of seconds: $(cut -d " " -f 4 | avg -i)' \
					'echo Average time: $(cut -d " " -f 4 | avg -i | modtime -i)' \
					'sleep 1; echo Best time: $(cut -d " " -f 4 | highest -s $(expr 1024 \* 1024) -r -n 1 | modtime)' \
					'sleep 2; echo Best numbers:; highest -s $(expr 1024 \* 1024) -r -f 2 -n 5'
			echo
		done
		# get all size's, versions and protocols actually measured
		# xfer-result-Writing-16384-3-udp
		# xfer-result-Reading-16384-3-tcp
		found_sizes="$(ls -f | egrep 'xfer-result-Reading|xfer-result-Writing' | sed 's/xfer-result-[A-Za-z]*-\([0-9]*\)-.*$/\1/' | sort -n | uniq)"
		found_versions="$(ls -f | egrep 'xfer-result-Reading|xfer-result-Writing' | sed 's/xfer-result-[A-Za-z]*-[0-9]*-\([0-9]*\)-.*$/\1/' | sort -n | uniq)"
		found_protocols="$(ls -f | egrep 'xfer-result-Reading|xfer-result-Writing' | sed 's/xfer-result-[A-Za-z]*-[0-9]*-[0-9]*-\([a-zA-Z]*\).*$/\1/' | sort | uniq)"
#		echo found_sizes are $found_sizes
#		echo found_versions are $found_versions
#		echo found_protocols are $found_protocols
		# Initially, we'll try consistent versions and protocols, but not sizes - IE, we need to use the same version and protocol
		# when mounting, but we're going to try letting rsize != wsize.
#		echo 4 4 $(ranking1 4 4)
#		echo 5 5 $(ranking1 5 5)
#		echo 6 6 $(ranking1 6 6)
#		echo 10 2 $(ranking1 10 2)
#		echo 9 3 $(ranking1 9 3)
#		echo 3 9 $(ranking1 3 9)
		echo "======> Best composite of read and write (read protocol==write protocol, read version==write version, rsize!=wsize)"
		for p in $found_protocols
		do
			for v in $found_versions
			do
				# hmmm...  so this is actually proportionate to the number of rsize's * the number of wsize's.   This could
				# really take a long time, if we check a lot of sizes
				for rs in $found_sizes
				do
					for ws in $found_sizes
					do
						if cat "xfer-result-Reading-$rs-$v-$p" > /dev/null 2>&1 && cat "xfer-result-Writing-$ws-$v-$p" > /dev/null 2>&1
						then
							readtime=$(awk ' { print $3 } ' < "xfer-result-Reading-$rs-$v-$p")
							writetime=$(awk ' { print $3 } ' < "xfer-result-Writing-$ws-$v-$p")
							if numeric "$readtime" && numeric "$writetime"
							then
								echo "$p $v rsize: $rs readtime: $readtime wsize: $ws writetime: $writetime composite: $(ranking1 $readtime $writetime)"
							fi
						fi
					done
				done | \
					highest -r -n 5 -f 11 -s 999999
				echo '/\/\/\'
			done
		done
		echo
		echo "======> Best composite of read and write (read protocol==write protocol, read version==write version, rsize==wsize)"
		for p in $found_protocols
		do
			for v in $found_versions
			do
				for s in $found_sizes
				do
					if cat "xfer-result-Reading-$s-$v-$p" > /dev/null 2>&1 && cat "xfer-result-Writing-$s-$v-$p" > /dev/null 2>&1
					then
						readtime=$(awk ' { print $3 } ' < "xfer-result-Reading-$s-$v-$p")
						writetime=$(awk ' { print $3 } ' < "xfer-result-Writing-$s-$v-$p")
						if numeric "$readtime" && numeric "$writetime"
						then
							echo "$p $v $s both sizes: $s readtime: $readtime writetime: $writetime composite: $(ranking1 $readtime $writetime)"
						fi
					fi
				done
			done
		done | \
			highest -r -n 5 -f 11 -s 999999
		echo
		sleep 30
  	done
	# we never actually reach this since the above is an infinite loop...
	exit 0
fi

# if we don't do the summarization endless loop, then we fall through to this generation code

for prog in rm mkdir dd reblock mount umount egrep touch python
do 
   # note that we're using type, because which didn't give a useful exit
	# status
	if type "$prog" > /dev/null 2>&1
	then
		: Good, we have it
	else
		echo Sorry, you will need "$prog" on your '$PATH' for $0 to work 1>&2
		echo properly.... 1>&2
		exit 1
	fi
done
echo Good, you appear to have all the required programs on your '$PATH'. 1>&2

export testfn="$NTTMPDIR/testfile"

# If you set $nummeg to a number over 2047, then NFSv2 likely won't work
# due to being limited to 2 gigabyte files!  It appears that NFSv2 will
# allow you to create a file > 2 gigabytes, but then you cannot rm it, >
# it, nor fopen(,"w") it.
#export nummeg=2047 # IE, 2 gigabytes, likely the largest NFSv2 safe value
#export nummeg=1536 # IE, 1.5 gigabytes
#export nummeg=$[1024*64] # IE, 64 gigabytes - should be enough to invalidate the buffer cache...
#export nummeg=$[1024*4] # IE, 4 gigabytes
#export nummeg=$[1024*64] # IE, 64 gigabytes - should be enough to invalidate the buffer cache...
export nummeg=$[1024*16] # IE, 64 gigabytes - should be enough to invalidate the buffer cache...
#export nummeg=768 # with this setting, performance numbers were poorly correlated with NFS transfer size
#export nummeg=256
#export nummeg=16

#export datasource=/dev/urandom
export datasource=/dev/zero

# pertains to $both - number of array elements per "record", since bash
# doesn't do 2D arrays
export reclen=4
# this'll be iterated over twice.  Cannot export bash arrays!
both=(Writing 'Write time' "$datasource" "$testfn" 			Reading 'Read time' "$testfn" /dev/null)

if ! mkdir -p "$NTTMPDIR"
then
	echo Sorry, mkdir failed 1>&2
	exit 1
fi

if ! mkdir -p "$RESULTDIR"
then
	echo Sorry, mkdir failed 1>&2
	exit 1
fi

function umt
{
	# the cd helps ensure the filesystem won't be "busy".  Do -not- use
	# umount -f - it may fail to flush your buffers correctly for the
	# purposes of this test
	cd /
	set -x
	maxtime "$transaction_maxsecs" umount "$NTTMPDIR"
	retval="$?"
	set +x
	case "$retval" in
		0)
			return 0
			;;
		1)
			echo umount of $remote failed 1>&2
			return 1
			;;
		254)
			echo umount of $remote timed out 1>&2
			return 254
			;;
		*)
			echo umount of $remote returned a weird value 1>&2
			return 254
			;;
	esac
}

function verify_mount
{
	# in practice/for now, these will be the same value, but it'll be easier to
	# make them independent later...
	rsize="$1"
	wsize="$2"

	# first just show the user what's up in terms of mount options
	nfsstat -m | /usr/local/bin/grep -A 10 "$NTTMPDIR" | sed '/^$/q'

	# it's not the end of the world if this function does nothing but
	# report, but it can help you catch errors earlier (if at all,
	# actually).  If your nfsstat doesn't behave this way (like on AIX 5.1),
	# don't stress about it, just use case 2
	case 1 in
		1)
			# the format we expect from nfsstat - but yours may be
			# different!:
			# /mnt2/qfs+nfs+test17 from /mnt2/qfs+nfs+test17:esmft1d
			#  Flags:   vers=3,proto=tcp,auth=unix,hard,intr,link,symlink,rsize=16384,wsize=16384,retrans=5
			#  All:     srtt=0 (0ms), dev=0 (0ms), cur=0 (0ms)
			real_rsize="$(nfsstat -m | \
				/usr/local/bin/grep -A 10 "$NTTMPDIR" | \
				sed '/^$/q' | \
				grep rsize | \
				sed 's/^.*rsize=\([0-9][0-9]*\),.*$/\1/')"
			real_wsize="$(nfsstat -m | \
				/usr/local/bin/grep -A 10 "$NTTMPDIR" | \
				sed '/^$/q' | \
				grep wsize | \
				sed 's/^.*wsize=\([0-9][0-9]*\),.*$/\1/')"
			retval=0
			if [ "$real_rsize" != "$rsize" ]
			then
				echo Bummer, rsize is "$real_rsize", but should be "$rsize" 1>&2
				retval=1
			fi
			if [ "$real_wsize" != "$wsize" ]
			then
				echo Bummer, wsize is "$real_wsize", but should be "$wsize" 1>&2
				retval=1
			fi
			return "$retval"
			;;
		2)
			# just return true
			return 0
			;;
	esac
}

function verify_network_quiesence
{
	case 1 in
		1)
			# actually check the network - but you'll need to specify the
			# right network interface, EG "en2" on AIX 5.1, eth2 on
			# Solaris, etcetera
			for i in 1 2 3 4 5 6 7 8 9 0
			do
				# you could also use tethereal, which should support
				# identical options, I believe
				if [ $(maxtime 30 tcpdump -c 100 -i en2 host esmft1d | wc -l) -lt 3 ]
				then
					# network is quiet, return true
					return 0
				fi
				sleep 30
			done
			# network did not quiet down, return false
			return 0
			;;
		2)
			# just return true without verifying anything
			return 0
			;;
	esac
}

function mt
{
	#if ! maxtime $[60*30] mount -o vers="$vers",proto="$proto",rsize="$size",wsize="$size" $remote "$NTTMPDIR"
	set -x
	hostname="$(echo $remote | sed 's/:.*$//')"
	if verify_network_quiesence
	then
		echo Good, network is quiet, proceeding 1>&2
	else
		echo Sorry, the network seems kind of busy, so it will not be that good for benchmarking 1>&2
		echo Exiting prematurely 1>&2
		exit 1
	fi
	maxtime "$transaction_maxsecs" mount -o vers="$vers",proto="$proto",rsize="$size",wsize="$size" $remote "$NTTMPDIR"
	retval="$?"
	set +x
	case "$retval" in
		0)
			if verify_mount "$size" "$size"
			then
				echo
				echo Mount verified OK 1>&2
				return 0
			else
				echo Mount failed to verify 1>&2
				return 1
			fi
			;;
		1)
			echo mount of $remote failed 1>&2
			exit 1
			;;
		254)
			echo mount of $remote timed out 1>&2
			return 254
			;;
		*)
			echo mount of $remote returned a weird value 1>&2
			return 254
			;;
	esac
}

function xfer
{
	srcfile="$1"
	dstfile="$2"
	echo From $(ls -l "$srcfile")
	if [ -f "$dstfile" ]
	then
		rm -f "$dstfile"
	fi
	# dang 32 bit bash I guess
	> "$dstfile"
	df "$dstfile"
	#truncate "$dstfile"
	echo To'   ' | tr -d '\012'
	if ! ls -l "$dstfile"
	then
		echo ls "$dstfile" failed under implausible circumstances 1>&2
		return 1
	fi
	# here we're using reblock in two different ways.  The first gives a
	# running tally of the data transferred as the user watches this
	# script being run.  The second just outputs summary information, for
	# the benefit of a subsequent report for loop (typed manually by the
	# enduser :)
	if cd "$NTTMPDIR"
	then
		if [ -f "$dstfile" ]
		then
			rm -f "$dstfile"
		fi
		> "$dstfile"
		#truncate "$dstfile"
		# if it takes more than 30 minutes, give up - that's too long to
		# be worth pursuing.  We have lots of results that are far shorter
		# than that already
		set -x
		maxtime "$transaction_maxsecs" dd if="$srcfile" bs=1024k count=$[$nummeg] 2> /dev/null | \
			reblock -e $[1024*$nummeg] $[1024*1024] 300 > "$dstfile"
		#retval="$?" # this gets the exit status from reblock, not from maxtime!
		# should use value #1, even though dd should be a child of maxtime
		# - maxtime is 0, and the dd doesn't get an element in the array
		copystatus=${PIPESTATUS[@]}
		echo "${copystatus[@]}"
		retval="$(echo $copystatus | awk ' { print $1 }')"
#		echo "${copystatus[0]}"
#		echo "${copystatus[1]}"
#		retval="${copystatus[0]}"
		set +x
		case "$retval" in
			0)
				return 0
				;;
			1)
				echo transfer of $remote failed 1>&2
				exit 1
				;;
			254)
				echo transfer of $remote timed out 1>&2
				return 254
				;;
			*)
				echo transfer of $remote returned a weird value 1>&2
				return 254
				;;
		esac
	else
		echo "cd $NTTMPDIR failed" 1>&2
		return 1
	fi
}

function gettime
{
	# this may need to be rewritten in python someday...
	python -c 'import time; print time.time()'
}

# just in case - but we won't always need this really.  Intentionally
# ignore the return value
umt

# if we iterate by one, this'll take FOREVER.  So let's try 512 :)
# the process got wedged on 16384 udp 3 from esmf04 to esmft1!
# for size in $(seq 1024 256 65536)
# for size in $(seq 512 512 65536)
# for size in $(seq 4096 512 65536)
# the size 16384 with AIX 5.1 NFS client and Solaris 9 NFS server seems
# to have a high frequency of problems.  nfsmnthelp on the AIX side gets
# stuck on mount's or umount's

# in normal operation, this should not be commented out.  Only comment
# it if an nfs-test -g run exits prematurely or gets stuck, and you want to
# restart nfs-test, adding new results to the old results
rm "$RESULTDIR"/*

if ! touch "$RESULTDIR"/simple-touch
then
	echo "Simple touch test failed" 1>&2
	exit 1
fi

#for size in $(seq 4096 1024 65536)
for size in $(seq 4096 1024 65536)
do
	for proto in tcp udp
	do
		# vers==2 likely cannot do files over 2 gigabytes!
		for vers in 3
		do
			#resultfn="$RESULTDIR/xfer-result-$vers-$proto-$size"
			numelem=$[${#both[@]}/${reclen}]
			#echo "numelem is $numelem"
			echo This remove can take a while...  Please wait...
			rm -f "$testfn"
			echo Remove completed...
			# lucky 7 :)
			for i in `seq 7`
			do
				echo
			done
			#rm -f "$resultfn"
			# write, then read
			for index in $(seq 0 $[${numelem}-1])
			do
				breakout=0
				base=$[${index}*${reclen}]
				resultfn="$RESULTDIR/xfer-result-${both[${base}]}-$size-$vers-$proto"
				rm -f "$resultfn"
				> "$resultfn"
				#truncate "$resultfn"
				ls -l "$resultfn"
				#starttime=$(python -c 'import time; print time.time()')
				starttime=$(gettime)
				mt
				retval="$?"
				case "$retval" in
					0)
						;;
					1)
						echo Sorry, mount failed, skipping this one 1>&2
						echo Sorry, mount failed, skipping this one > "$resultfn"
						# it appears that if AIX 5.1 (as an NFS client) times out mounting or umounting an NFS mount, then subsequent
						# mounts and/or umounts on the same mount point will all fail...  So get a new mount point!
						NTTMPDIR="$(get_mount_point)"
						breakout=1
						;;
					254)
						echo Sorry, mount timed out, skipping this one 1>&2
						echo Sorry, mount timed out, skipping this one > "$resultfn"
						# it appears that if AIX 5.1 (as an NFS client) times out mounting or umounting an NFS mount, then subsequent
						# mounts and/or umounts on the same mount point will all fail...  So get a new mount point!
						NTTMPDIR="$(get_mount_point)"
						breakout=1
						;;
					*)
						echo Sorry, mount did not work in a strange way, skipping this one 1>&2
						echo Sorry, mount did not work in a strange way, skipping this one > "$resultfn"
						# it appears that if AIX 5.1 (as an NFS client) times out mounting or umounting an NFS mount, then subsequent
						# mounts and/or umounts on the same mount point will all fail...  So get a new mount point!
						NTTMPDIR="$(get_mount_point)"
						breakout=1
						;;
				esac
				if [ "$breakout" = 0 ]
				then
					# transfer $nummeg kilobytes
					#echo index is "$index", base is "$base"
					echo
					echo "${both[${base}+0]} $nummeg megabytes: $size $vers $proto"
					xfer "${both[${base}+2]}" "${both[${base}+3]}"
					retval="$?"
					if [ "$retval" = 254 ]
					then
						echo Sorry, ${both[${base}+0]} timed out, skipping this one 1>&2
						echo Sorry, ${both[${base}+0]} timed out, skipping this one > "$resultfn"
						# "breakout" logic is to ensure that an attempt is made
						# to umount a mount with parameters that caused problems, so they aren't
						# left in place forevermore
						breakout=1
					fi
				fi
				umt
				retval="$?"
				if [ "$retval" = 254 ]
				then
					echo Sorry, umount timed out, skipping this one 1>&2
					echo Sorry, umount timed out, skipping this one > "$resultfn"
					# it appears that if AIX 5.1 (as an NFS client) times out mounting or umounting an NFS mount, then subsequent
					# mounts and/or umounts on the same mount point will all fail...  So get a new mount point!
					NTTMPDIR="$(get_mount_point)"
					break
				fi
				if [ "$breakout" != 0 ]
				then
					break
				fi
				#endtime=$(python -c 'import time; print time.time()')
				endtime=$(gettime)
				echo "Start time was $starttime, End time was $endtime"
				v=$(echo $endtime - $starttime | bc)
				echo Difference is "$v"
				echo Writing "${both[${base}+1]}"': ' "$v" to "$resultfn"
				echo "${both[${base}+1]}"': ' $v > "$resultfn"
			done
			echo This remove can take a while...  Please wait...
			rm -f "$testfn"
			echo Remove completed...
		done
	done
done