#!/usr/local/bin/bash

case "`uname -n`" in
	esmf04m)
		;;
	*)
		echo Run me only on esmf04m 1>&2
		;;
esac

function delay
{
	set +x
	for sec in `seq $1 -1 1`
	do
		echo $sec of $1...
		sleep 1
	done
	set -x
}

set -x

# support for esmfsn02 is broken!  But then, we'll likely never need it
# again
#do_esmfsn02=true
do_esmfsn02=false

case "$1" in
	test_delay)
		delay 10
		;;
	start)
		"$0" assemble
		# fsck'ing takes forever!  Normally we don't want to do it
		#"$0" fsck
		#
		# pause a while.  It seems like maybe GFS needs a while to ponder,
		# before doing mounts
		delay 30
		"$0" mount
		;;
	assemble)
		# this could easily be improved by just using a list of commands
		# with (potentially multiple) hostname prefixes.  This way is very
		# simpleminded, I'm afraid
		#
		#[root@esmfsn04 root]# sh /root/start-gnbd-export.sh
		if ! ssh root@esmft1 'ssh root@esmfsn04 "sh /root/start-gnbd-export.sh"'
		then
			echo Error starting gnbd exports on esmfsn04 1>&2
			exit 1
		fi

		#[root@esmft1 root]# sh /root/GFS/start-gfs-modules.sh
		if ! ssh root@esmft1 'sh /root/GFS/start-gfs-modules.sh'
		then
			echo Error starting gfs modules esmft1 1>&2
			exit 1
		fi

		#[root@esmft1 root]# sh /root/GFS/start_ccs.sh
		if ! ssh root@esmft1 'sh /root/GFS/start_ccs.sh'
		then
			echo Error starting ccsd esmft1 1>&2
			exit 1
		fi

		#[root@esmfsn02 root]# sh /root/start-lock-daemon.sh
		#[root@esmfsn04 root]# sh /root/start-lock-daemon.sh
		if ! ssh root@esmft1 'ssh root@esmfsn04 "sh /root/start-lock-daemon.sh"'
		then
			echo Error starting lock daemon esmfsn04 1>&2
			exit 1
		fi

		if [ "$do_esmfsn02" = true ]
		then
			# esmfsn02 has been borrowed by promicro for testing at their
			# workplace
			if ! ssh root@esmft1 'ssh root@esmfsn02 "sh /root/start-lock-daemon.sh"'
			then
				echo Error starting gnbd exports on esmfsn02 1>&2
				exit 1
			fi
		fi
		echo GFS assembled
		;;
	fsck)
		echo This can take quite a long time...  Like days, or weeks
		time ssh esmft1 gfs_fsck -v -y /dev/pool/pool041
		time ssh esmft1 gfs_fsck -v -y /dev/pool/pool044
		time ssh esmft1 gfs_fsck -v -y /dev/pool/pool045
		(
		 	echo ESMF gfs_fsck done
			date
		) | mail -s 'ESMF gfs_fsck done' strombrg@dcs.nac.uci.edu
		;;
	mount)
		#[root@esmft1 root]# mount -t gfs /dev/pool/pool045 /data/gfs045
		#[root@esmft1 root]# mount -t gfs /dev/pool/pool044 /data/gfs044
		#[root@esmft1 root]# mount -t gfs /dev/pool/pool041 /data/gfs041
		if ! ssh root@esmft1 'mount -t gfs /dev/pool/pool045 /data/gfs045'
		then
			echo Error mounting gfs045 esmft1 1>&2
			exit 1
		fi

		if ! ssh root@esmft1 'mount -t gfs /dev/pool/pool044 /data/gfs044'
		then
			echo Error mounting gfs044 esmft1 1>&2
			exit 1
		fi

		if ! ssh root@esmft1 'mount -t gfs /dev/pool/pool041 /data/gfs041'
		then
			echo Error mounting gfs041 esmft1 1>&2
			exit 1
		fi

#		dont try to fire up NFS; we aren't using it right now
#		# we don't really know if NFS is running, but shut it down anyway.
#		# we don't care about the exit status
#		Also, NFS tends to fail to start up anyway until it's retried
#		ssh root@esmft1 '/etc/init.d/nfs stop'
#
#		# I once saw the nfs start fail: Mon Oct 11 12:52:34 PDT 2004
#		# Hypothesis: NFS needs some settling time before we try to fire
#		# it back up again.
#		delay 10
#
#		# fire up NFS.  I tried this three times over ssh, and failed each
#		# time.  Then I tried it locally, and it succeeded on the first
#		# try. :-S
#		if ! ssh root@esmft1 '/etc/init.d/nfs start'
#		then
#			echo Error mounting gfs041 esmft1 1>&2
#			exit 1
#		fi

		#[root@esmf04m /]# mount esmft1:/data/gfs045 /data_mnt/gfs045
		#[root@esmf04m /]# mount esmft1:/data/gfs044 /data_mnt/gfs044
		#[root@esmf04m /]# mount esmft1:/data/gfs041 /data_mnt/gfs041
		# clusterfs would prefer tcp, but right now, t1 only supports udp
		# we wanted v3, but it was having problems.  Trying v2
		#mountopts="-o rsize=8192,wsize=8192,proto=udp,vers=3"
#		mountopts="-o rsize=8192,wsize=8192,proto=udp,vers=3"
#		if ! mount $mountopts esmft1:/data/gfs045 /data_mnt/gfs045
#		then
#			echo Erroring mounting gfs045 on esmf04m 1>&2
#			exit 1
#		fi
#
#		if ! mount $mountopts esmft1:/data/gfs044 /data_mnt/gfs044
#		then
#			echo Erroring mounting gfs044 on esmf04m 1>&2
#			exit 1
#		fi
#
#		if ! mount $mountopts esmft1:/data/gfs041 /data_mnt/gfs041
#		then
#			echo Erroring mounting gfs041 on esmf04m 1>&2
#			exit 1
#		fi

#		not mounting on esmf04m, because gfs is crashing so much
#		for i in 04{1,4,5}
#		do
#			mount /data_mnt/gfs$i
#		done

		echo GFS mounted
		;;
	umount)
#		-bash-2.05b# umount -f /data/gfs041
#		umount2: Device or resource busy
#		umount: /data/gfs041: device is busy
#		-bash-2.05b# umount -f /data/gfs044
#		umount2: Device or resource busy
#		umount: /data/gfs044: device is busy
#		-bash-2.05b# umount -f /data/gfs045
#		umount2: Device or resource busy
#		umount: /data/gfs045: device is busy
#		-bash-2.05b# 
		echo Do not worry too much if these umount -f\'s fail...
		for i in 1 4 5
		do
			ssh esmft1 "umount -f /data/gfs04${i}"
		done
		;;
	stop)
		"$0" umount

		for host in esmfsn04
		do
			ssh esmft1 "ssh $host /sbin/gulm_tool shutdown $host"
		done

		for host in esmfsn04
		do
			ssh esmft1 "ssh $host killall -v ccsd"
			delay 1
			ssh esmft1 "ssh $host killall -v ccsd"
		done

		# deactivate pools on all nodes (clients)
		ssh esmft1 "/sbin/pool_assemble -r"
		echo
		echo Now you will probably want to reboot esmfsn04 and esmft1
		echo You probably want to use:
		echo $0 reboot
		echo
		echo Alternatively, you can:
		echo /usr/local/bin/safe-reboot
		echo ...on each relevant host.
		;;
	reboot)
		ssh esmft1 "ssh -f esmfsn04 /usr/local/bin/safe-reboot"
		ssh -f esmft1 /usr/local/bin/safe-reboot
		# delay 4 minutes, check uptime.  I used to delay 10 minutes, but
		# I noticed that both machines had been up for 8 minutes when the
		# uptime's were run :)
		delay 240
		"$0" uptime
		;;
	uptime)
		ssh esmft1 uptime
		ssh esmft1 "ssh esmfsn04 uptime"
		;;
	restart)
		"$0" uptime
		"$0" stop
		"$0" reboot
		"$0" start
		"$0" uptime
		;;
esac