#!/usr/bin/env python

import sys
import os
#this was for pdialog - good riddance :)
#sys.path.insert(0,'/usr/local/lib')
#sys.path.insert(0,'/dcslib/allsys/packages/python/local-lib')
#sys.path.insert(0,os.path.expanduser('~/lib'))
import string
import socket
import time
try:
	import pygtk
	import gtk
except:
	have_graphics=0
else:
	have_graphics=1
import pwd
import socket
import signal

def give_unique_id(t):
	unique_id = '/'+os.uname()[1]+'/'+str(time.time())+'/'+str(os.getpid())+'/'+time.ctime(t)+'/'
	return unique_id

class apa:
	def __init__(self):
		pass

	def accident_proof_acknowledge(self, description, columns, rows):
		self.window = gtk.Window(gtk.WINDOW_TOPLEVEL)
		self.window.set_title('Accident proof acknowledge')
		self.window.connect("delete_event", self.delete_event)
		self.window.show()
		self.vbox = gtk.VBox()
		self.vbox.show()
		self.window.add(self.vbox)
		self.label = gtk.Label(description)
		self.label.show()
		self.vbox.pack_start(self.label)
		self.hbox = gtk.HBox()
		self.hbox.show()
		self.vbox.pack_start(self.hbox)
		self.acknowledge_button = gtk.Button('Acknowledge')
		self.acknowledge_button.show()
		self.acknowledge_button.connect("clicked", self.acknowledge_callback)
		self.hbox.pack_start(self.acknowledge_button)
		gtk.main()

	def delete_event(self):
		gtk.main_quit()
		sys.exit(0)

	def acknowledge_callback(self, widget=None):
		gtk.main_quit()

if have_graphics:
	pd = apa()

def signal_received(signum, frame):
	global logfile_glitch
	if not logfile_glitch:
		global logfile
		logfile.write('Terminating on signal %d %s %s\n' % (signum, give_unique_id(time.time()), string.join(argvcopy)))
		logfile.close()
	sys.stdout.write('Terminating on signal %d %s %s\n' % (signum, give_unique_id(time.time()), string.join(argvcopy)))
	sys.exit(0)

def send_to_console(email,summary,full_description):
	sys.stdout.write(full_description+'\n')

def send_email(email,summary,full_description):
	try:
		pipe = os.popen('/usr/lib/sendmail %s' % email,'w')
	except:
		try:
			pipe = os.popen('/usr/sbin/sendmail %s' % email,'w')
		except:
			sys.stderr.write('Warning!  Opening a pipe to sendmail failed!\n')
			return
	pipe.write('Subject: %s\n' % summary)
	pipe.write('\n')
	pipe.write(full_description)
	pipe.write('.\n')
	pipe.close()

def send_page(user,host,subject,summary,full_description):
	sys.stderr.write('Sending page....\n')
	pipe = os.popen('/usr/lib/sendmail %s' % user+'@'+host,'w')
	pipe.write('To: %s@%s\n' % (user, host))
	pipe.write('Subject: %s\n' % subject)
	pipe.write('\n')
	pipe.write(summary)
	pipe.write('\n')
	pipe.write(full_description)
	pipe.write('\n.\n')
	pipe.close()

def send_apa(email,summary,full_description):
	#p.msgbox(full_description,12,80)
	if have_graphics:
		global pd
		pd.accident_proof_acknowledge(full_description,80,12)

def send_all(do_not_wait,email,summary,full_description):
	global logfile_glitch
	global should_send_page
	if not logfile_glitch:
		global logfile
		logfile.write('Finish %s %s\n' % (give_unique_id(time.time()), string.join(argvcopy)))
		logfile.close()
	# Note that summary will be ignored by some reporting mechanisms, so all
	# info that is in the summary, should also be in the full_description
	# yeah, it's a double negative :)  So fix it
	wait = not do_not_wait
	#print 'wait is %d, do_not_wait is %d\n' % (wait, do_not_wait)
	send_to_console(email,summary,full_description)
	send_email(email,summary,full_description)
	if should_send_page:
		send_page(page_list[0],page_list[1],page_list[2],summary,full_description)
	if wait:
		# this one blocks, so only send it if we're willing to wait
		send_apa(email,summary,full_description)

def less_than_notify(do_not_wait,email,hostname,command,numeric,less_than,secs):
	ts = tm(secs)
	summary = 'host %s command less than %d\n' % (hostname,less_than)
	full_description = 'Command %s\nHost %s\nReturned %d\nWhich is less than %d\nAt %s\n' % \
		(command,hostname,numeric,less_than,time.ctime(time.time()))
	send_all(do_not_wait,email,summary,full_description)
	sys.exit(0)

def greater_than_notify(do_not_wait,email,hostname,command,numeric,greater_than,secs):
	ts = tm(secs)
	summary = 'host %s command greater than %d\n' % (hostname,greater_than)
	full_description = 'Command %s\nHost %s\nReturned %d\nWhich is greater than %d\nAt %s\n' % \
		(command,hostname,numeric,greater_than,time.ctime(time.time()))
	send_all(do_not_wait,email,summary,full_description)
	sys.exit(0)
 
def port_notify(do_not_wait,email,host,port,secs):
	ts = tm(secs)
	summary = 'Port %d on host %s listening' % (port,host)
	full_description = 'Host %s\nPort %d\nBack up after %s\nAt %s\n' % (host,port,ts,time.ctime(time.time()))
	send_all(do_not_wait,email,summary,full_description)
	sys.exit(0)

def command_notify(do_not_wait,email,host,command,truth,secs):
	ts = tm(secs)
	summary = 'Host %s, command %s done' % (host,command)
	full_description = 'Host %s\nCommand %s\nReturned %d\nDone after %s\nAt %s\n' % (host,command,truth,ts,time.ctime(time.time()))
	send_all(do_not_wait,email,summary,full_description)
	sys.exit(0)

def process_notify(do_not_wait,email,host,pid,command,secs):
	ts = tm(secs)
	summary = 'Host %s, pid %d, command %s done' % (host,pid,command)
	full_description = 'Host %s\nPid %d\nCommand %s\nDone after %s\nAt %s\n' % (host,pid,command,ts,time.ctime(time.time()))
	send_all(do_not_wait,email,summary,full_description)
	sys.exit(0)

def usage(retval):
	sys.stderr.write("%s: usage examples follow\n"%sys.argv[0])
	sys.stderr.write("\t%s: -n host port\n" % sys.argv[0])
	sys.stderr.write("\t%s: -t command that must return true\n" % sys.argv[0])
	sys.stderr.write("\t%s: -f command that must return false\n" % sys.argv[0])
	sys.stderr.write("\t%s: -p pid pid that must no longer be running\n" % sys.argv[0])
	sys.stderr.write("\t%s: -s searchstring find searchstring in ps output, use the corresponding pid\n" % sys.argv[0])
	sys.stderr.write("\t%s: -l number 'command that must return less than number'\n" % sys.argv[0])
	sys.stderr.write("\t%s: -g number 'command that must return more than number'\n" % sys.argv[0])
	sys.stderr.write("\t%s: -F Only use the first field in -l and -g\n" % sys.argv[0])
	sys.stderr.write("\t%s: -e emailaddress (used in combination with other options)\n" % sys.argv[0])
	sys.stderr.write("\t%s: -w do not block (do not wait)\n" % sys.argv[0])
	sys.stderr.write("\t%s: -P send a page (but only if NWUPAGE has a good value)\n" % sys.argv[0])
	sys.stderr.write("\t%s: -m seconds Specify maximum number of seconds to wait (but will be somewhat less precise than this)\n" % sys.argv[0])
	sys.stderr.write("\t%s: -i seconds Specify interval betweens tests\n" % sys.argv[0])
	sys.stderr.write("\t%s: -c command_to_run_and_watch\n" % sys.argv[0])
	sys.exit(retval)

def check_host(host):
	try:
		socket.gethostbyname(host)
	except:
		sys.stderr.write("%s: bad hostname\n"%sys.argv[0])
		usage(1)

def getportno(s):
	try:
		result = socket.getservbyname(s,'tcp')
	except:
		notfound=1
	else:
		notfound=0
	if notfound:
		try:
			result = string.atoi(s)
		except:
			sys.stderr.write("%s: bad port\n"%sys.argv[0])
			usage(1)
	return result

def tm(deltat):
	(remainder,seconds) = divmod(deltat,60)
	(remainder,minutes) = divmod(remainder,60)
	(days,hours) = divmod(remainder,24)
	result = ''
	if days > 0:
		result += "%d days " % days
	if hours > 0:
		result += "%d hours " % hours
	if minutes > 0:
		result += "%d minutes " % minutes
	if seconds > 0:
		result += "%d seconds " % seconds
	return result

if len(sys.argv) == 1:
	usage(0)

have_less_than = 0
have_greater_than = 0
should_send_page=0
page=''
first=0

pid = -1
if os.environ.has_key('NWUEMAIL'):
	email = os.environ['NWUEMAIL']
else:
	email = pwd.getpwuid(os.geteuid())[0]+'@'+os.uname()[1]

if os.environ.has_key('NWUPAGE'):
	page = os.environ['NWUPAGE']
	#sys.stderr.write('Got NWUPAGE value of %s\n' % page)

sys.stderr.write('Using default e-mail address %s\n' % email)
command_to_run=command=search=''
port = -1
max_duration=do_not_wait = 0
interval=60.0

argvcopy = sys.argv[0:]

while sys.argv[1:]:
	if sys.argv[2:] and sys.argv[1] == '-e':
		email = sys.argv[2]
		del sys.argv[1]
		del sys.argv[1]
	elif sys.argv[2:] and sys.argv[1] == '-m':
		max_duration=string.atoi(sys.argv[2])
		del sys.argv[1]
		del sys.argv[1]
	elif sys.argv[2:] and sys.argv[1] == '-i':
		interval=string.atoi(sys.argv[2])
		del sys.argv[1]
		del sys.argv[1]
	elif sys.argv[1:] and sys.argv[1] == '-h':
		usage(0)
	elif sys.argv[1:] and sys.argv[1] == '-w':
		do_not_wait = 1
		del sys.argv[1]
	elif sys.argv[1:] and sys.argv[1] == '-P':
		if page == '':
			sys.stderr.write('Paging requested, but not NWUPAGE environment variable\n')
			sys.stderr.write('NWUPAGE must have user@host@subject\n')
			sys.stderr.write('NWUPAGE has no value\n')
			usage(1)
		page_list=string.splitfields(page,'@')
		if len(page_list) != 3:
			sys.stderr.write('Paging requested, but not NWUPAGE environment variable\n')
			sys.stderr.write('NWUPAGE must have user@host@subject\n')
			sys.stderr.write("NWUPAGE does not have two @'s %d\n" % len(page_list))
			usage(1)
		should_send_page = 1
		del sys.argv[1]
	elif sys.argv[2:] and sys.argv[1] == '-t':
		truth=1
		if command != '':
			sys.stderr.write('%s: -t and -f are mutually exclusive, command is already %s\n' % (sys.argv[0], command))
			usage(1)
		command = sys.argv[2]
		del sys.argv[1]
		del sys.argv[1]
	elif sys.argv[2:] and sys.argv[1] == '-f':
		truth=0
		if command != '':
			sys.stderr.write('%s: -t and -f are mutually exclusive, command is already %s\n' % (sys.argv[0], command))
			usage(1)
		command = sys.argv[2]
		del sys.argv[1]
		del sys.argv[1]
	elif sys.argv[2:] and sys.argv[1] == '-p':
		pid = string.atoi(sys.argv[2])
		del sys.argv[1]
		del sys.argv[1]
	elif sys.argv[2:] and sys.argv[1] == '-p':
		first = 1
		del sys.argv[1]
	elif sys.argv[2:] and sys.argv[1] == '-s':
		search = sys.argv[2]
		del sys.argv[1]
		del sys.argv[1]
	elif sys.argv[1] == '-l' and sys.argv[3:]:
		have_less_than = 1
		less_than = string.atof(sys.argv[2])
		ltcommand = sys.argv[3]
		del sys.argv[1]
		del sys.argv[1]
		del sys.argv[1]
	elif sys.argv[1] == '-g' and sys.argv[3:]:
		have_greater_than = 1
		greater_than = string.atof(sys.argv[2])
		gtcommand = sys.argv[3]
		del sys.argv[1]
		del sys.argv[1]
		del sys.argv[1]
	elif sys.argv[3:] and sys.argv[1] == '-n':
		command = ''
		host = sys.argv[2]
		check_host(host)
		port = getportno(sys.argv[3])
		del sys.argv[1]
		del sys.argv[1]
		del sys.argv[1]
	elif sys.argv[1:] and sys.argv[1] == '-c' and sys.argv[2:]:
		command_to_run=sys.argv[2]
		del sys.argv[1]
		del sys.argv[1]
	else:
		usage(1)

n = (not (command == '')) + (not (pid == -1)) + (not (port == -1)) + (not (search == '')) + (not (command_to_run == '')) + have_less_than + have_greater_than
if n == 1:
	pass
else:
	sys.stderr.write('number of behaviors selected is is %d, but must be 1\n' % n)
	usage(1)
	
t0 = time.time()

if first and (not have_less_than and not have_greater_than):
	sys.stderr.write('-F requires either -l or -g\n')
	usage(1)
	
def get_process_description(pid):
	process_description = '"'+string.strip(os.popen("ps -eo pid,args | grep '^ *%d  *' | sed 's/^ *[0-9]*  *//'" % pid,'r').readline())+'"'
	if not process_description:
		process_description = '"'+string.strip(os.popen("process-list2 | grep '^ *%d  *' | sed 's/^ *[0-9]*  *//'" % pid,'r').readline())+'"'
	return process_description

class ps_entry:
	def __init__(self,line):
		fields=string.split(line)
		try:
			pid = string.atoi(fields[0])
		except:
			pid = -1
		self.pid = pid
		self.text = line
	def __cmp__(self,other):
		# the pids must be unique, so we don't really need to sort on anything else - but note that some operating systems will
		# intentionally randomize their pids, and all systems I'm aware of will experience "pid wrap" from time to time
		if self.pid < other.pid:
			return -1
		if self.pid > other.pid:
			return 1
		return 0
	def __str__(self):
		return self.text
	__repr__=__str__

#def get_ps():
#	processes = os.popen("ps -eo pid,args",'r').readlines()
#	if not processes[0:]:
#		processes = os.popen("process-list2",'r').readlines()
#	return processes

def get_ps():
	processes=[]
	try:
		pipe = os.popen("ps -eo pid,args",'r')
	except:
		try:
			pipe = os.popen("process-list2",'r')
		except:
			sys.stderr.write('Sorry, I could not get a process list!\n')
			sys.exit(1)
	while 1:
		line = pipe.readline()
		if not line:
			break
		processes.append(ps_entry(line))
	processes.sort()
	return map(str,processes)

def do_pid(pid):
	process_description = get_process_description(pid)
	iteration=0
	while 1:
		try:
			sb = os.stat('/proc/%d' % pid)
		except:
			break
		print 'pid %d, command %s not yet done, iteration %d, elapsed time %s' % (pid,process_description,iteration,tm(time.time()-t0))
		if max_duration != 0 and time.time() - t0 > max_duration:
			process_notify(do_not_wait,email,os.uname()[1],pid,'Timed out!!!!!!!!!!!!!!!!!!!! '+process_description,time.time()-t0)
		iteration+=1
		time.sleep(interval)
	process_notify(do_not_wait,email,os.uname()[1],pid,process_description,time.time()-t0)

#bad=0
#try:
#	logfile=open(os.path.expanduser('~/.notify-when-up.log'),'a')
#except:
#	# hmmmm... nested exceptions copacetic?
#	try:
#		# don't bother with the euid - just assume that we're root, and
#		# we don't have write access to the homedir due to NFS
#		fbfn=os.path.expanduser('~root/.notify-when-up.log')
#		logfile=open(fbfn,'a')
#	except:
#		logfile_glitch=1
#		sys.stderr.write('Could not open logfile %s, not logging\n' % os.path.expanduser('~/.notify-when-up.log'))
#		bad=1
#	else:
#		sys.stderr.write('Falling back to %s for logging' % fbfn)
#if bad:
#	
#else:
#	logfile_glitch=0
#	logfile.write('Start %s %s\n' % (unique_id, string.join(argvcopy)))
#	logfile.flush()
#	signal.signal(signal.SIGTERM, signal_received)
#	signal.signal(signal.SIGHUP, signal_received)
#	signal.signal(signal.SIGINT, signal_received)

def ewho():
	try:
		pwent = pwd.getpwuid(os.geteuid())
	except:
		return '#'+str(pwd.geteuid())
	else:
		return pwent[0]

def who():
	try:
		pwent = pwd.getpwuid(os.getuid())
	except:
		return '#'+str(pwd.getuid())
	else:
		return pwent[0]

logfile_glitch=1
for logfn in [ os.path.expanduser('~/.notify-when-up.log'), \
	os.path.expanduser('~%s/.notify-when-up.log' % ewho()) ]:
	try:
		logfile=open(logfn,'a')
	except:
		sys.stderr.write('Logging to %s failed\n' % logfn)
	else:
		logfile_glitch=0
	if not logfile_glitch:
		break

if not logfile_glitch:
	sys.stderr.write('Logging to %s initiated\n' % logfn)
	logfile.write('Start %s %s\n' % (give_unique_id(time.time()), string.join(argvcopy)))
	logfile.flush()
	signal.signal(signal.SIGTERM, signal_received)
	signal.signal(signal.SIGHUP, signal_received)
	signal.signal(signal.SIGINT, signal_received)

maxlines=24
if os.environ.has_key('LINES'):
	sys.stderr.write('Found LINES environment variable\n')
	maxlines = string.atoi(os.environ['LINES'])
else:
	sys.stderr.write('Did not find LINES environment variable\n')

if pid != -1:
	do_pid(pid)
elif command_to_run != '':
	# this one involves a pid alright, but it's very different from the do_pid stuff
	pid = os.fork()
	if pid == 0:
		status = os.system(command_to_run)
		if status == None:
			sys.exit(0)
		else:
			sys.exit(status/256)
	else:
		# give a moment for the process to start up
		process_description = command_to_run
		iteration=0
		#interval = min(interval,1.0)
		subinterval = min(interval, 1.0)
		prior_full_interval_no = -1
		while 1:
			p,v = os.waitpid(pid,os.WNOHANG)
			if p == pid:
				break
			full_interval_no = int((time.time() - t0) / interval)
			if full_interval_no != prior_full_interval_no:
				print 'pid %d, command %s not yet done, iteration %d, elapsed time %s' % (pid,process_description,iteration,tm(time.time()-t0))
				prior_full_interval_no = full_interval_no
				iteration+=1
			if max_duration != 0 and time.time() - t0 > max_duration:
				process_notify(do_not_wait,email,os.uname()[1],pid,'Timed out!!!!!!!!!!!!!!!!!!!! '+process_description,time.time()-t0)
			time.sleep(subinterval)
		process_notify(do_not_wait,email,os.uname()[1],pid,process_description+" exited with value %d" % v,time.time()-t0)
elif command:
	iteration=0
	while 1:
		exit_status = not (os.system(command) / 256)
		sys.stderr.write('We have %d and %d, expecting to notify when they are not equal\n' % (not exit_status, truth))
		# the not, of course, converts from bourne shell truth values to 0=false and 1=true, as is more customary
		if (not exit_status == truth):
			print 'command %s not yet done, iteration %d, elapsed time %s' % (command,iteration,tm(time.time()-t0))
		else:
			command_notify(do_not_wait,email,os.uname()[1],command,truth,time.time()-t0)
			break
		if max_duration != 0 and time.time() - t0 > max_duration:
			command_notify(do_not_wait,email,os.uname()[1],'Timed out!!!!!!!!!!!!!!!!!!!! '+command,truth,time.time()-t0)
		time.sleep(interval)
		iteration+=1
elif have_less_than:
	iteration=0
	while 1:
		pipe = os.popen(ltcommand,'r')
		line = pipe.readline()
		if first:
			line = string.fields(line)[0]
		numeric = string.atof(string.strip(line))
		if numeric < less_than:
			less_than_notify(do_not_wait,email,os.uname()[1],ltcommand,numeric,less_than,time.time()-t0)
		#print 'command "%s" returned %f, which is not less than %f' % (ltcommand,numeric,less_than)
		print 'command %s returned %f, which is not less than %f, iteration %d, elapsed time %s' % (ltcommand,numeric,less_than,iteration,tm(time.time()-t0))
		if max_duration != 0 and time.time() - t0 > max_duration:
			less_than_notify(do_not_wait,email,os.uname()[1],'Timed out!!!!!!!!!!!!!!!!!!!! '+ltcommand,numeric,less_than,time.time()-t0)
		time.sleep(interval)
		iteration+=1
elif have_greater_than:
	iteration=0
	while 1:
		pipe = os.popen(gtcommand,'r')
		line = pipe.readline()
		if first:
			line = string.fields(line)[0]
		numeric = string.atof(string.strip(line))
		if numeric > greater_than:
			greater_than_notify(do_not_wait,email,os.uname()[1],gtcommand,numeric,greater_than,time.time()-t0)
		print 'command %s returned %f, which is not greater than %f, iteration %d, elapsed time %s' % (gtcommand,numeric,greater_than,iteration,tm(time.time()-t0))
		#print 'command %s returned %f, which is not greater than %f' % (gtcommand,numeric,greater_than)
		if max_duration != 0 and time.time() - t0 > max_duration:
			greater_than_notify(do_not_wait,email,os.uname()[1],'Timed out!!!!!!!!!!!!!!!!!!!! '+gtcommand,numeric,greater_than,time.time()-t0)
		time.sleep(interval)
		iteration+=1
elif search != '':
	if os.isatty(0):
		processes = get_ps()
		hits=[]
		for line in processes:
			if line.find(search) != -1 and line.find(sys.argv[0]) == -1:
				hits.append(line)
		length = len(hits)
		if length == 0:
			sys.stderr.write('%s: Sorry, no matches on %s found\n' % (sys.argv[0], search))
			sys.exit(1)
		elif length == 1:
			sys.stderr.write('%s: Single match found: "%s", monitoring\n' % (sys.argv[0], string.strip(hits[0])))
			fields = string.splitfields(string.strip(hits[0]))
			do_pid(string.atoi(fields[0]))
		else:
			if length > maxlines:
				sys.stderr.write('%s: Sorry, too many matches: %d\n' % (sys.argv[0], length))
				sys.exit(1)
			# multiple hits on the search string
			for i in range(length):
				sys.stdout.write("%d) %s" % (i, hits[i]))
			sys.stdout.write('\nEnter number corresponding to process to monitor: ')
			ind = string.atoi(string.strip(sys.stdin.readline()))
			fields = string.splitfields(hits[ind])
			do_pid(string.atoi(fields[0]))
	else:
		sys.stderr.write('%s: Sorry, use of -s is illegal when not running on a tty\n' % sys.argv[0])
		sys.exit(1)
else:
	sock = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
	iteration=0
	while 1:
		try:
			sock.connect((host,port))
		except socket.error:
			print 'host %s, port %s not yet up, iteration %d, elapsed time %s' % (host,port,iteration,tm(time.time()-t0))
		else:
			port_notify(do_not_wait,email,host,port,time.time()-t0)
		if max_duration != 0 and time.time() - t0 > max_duration:
			port_notify(do_not_wait,email,'Timed out!!!!!!!!!!!!!!!!!!!! '+host,port,time.time()-t0)
		time.sleep(interval)
		iteration+=1

