#!/usr/bin/python

import os
import cgi
import sys
import time
import gdbm
import string
import socket
import urllib2
import traceback

def do_ct(first_time=True):
	if first_time == True:
		first_time = False
		print 'Content-type: text/html'
		print

sys.stderr = sys.stdout
try:
	sys.path.insert(0,os.path.expanduser('~strombrg/lib'))

	import BeautifulSoup
	import cachedb

	first_separator = chr(1)
	second_separator = chr(2)

	class url_class:

		def __init__(self, url, filename, memotitle, comment):
			self.url = string.strip(url)
			self.urltitle = ''
			self.filename = filename
			self.memotitle = string.strip(memotitle)
			self.hidden = 0
			self.comment = comment
			self.category = ''

		def __cmp__(self, other):
			if self.memotitle < other.memotitle:
				return -1
			elif self.memotitle > other.memotitle:
				return 1
			if self.urltitle < other.urltitle:
				return -1
			elif self.urltitle > other.urltitle:
				return 1
			if self.url < other.url:
				return -1
			elif self.url > other.url:
				return 1
			return 0

		def hide(self):
			self.hidden = 1

		def unhide(self):
			self.hidden = 0

		def to_database_key(self):
			return '%s%s%s' % (self.url, first_separator, self.memotitle)

		def __str__(self):
			return "%s|%s|%s|%s" % (self.filename, self.memotitle, self.url, self.urltitle)

		def to_html(self):
			if self.hidden:
				s = ''
			else:
				s = '<tr>\n'
				if self.url.find(':') == -1:
					url = 'http://www.google.com/search?hl=en&q=%s&btnG=Google+Search' % self.url
				else:
					url = self.url
				for value in [ self.memotitle, '<a href="%s">%s</a>' % (url, self.urltitle[:50]), '<a href="%s">%s</a>' % (url, self.url[:30]), \
					'<a href="plucker-urls.cgi?hide%s%s">hide</a>' % (first_separator, self.to_database_key()), '<a href="plucker-urls?categorize">categorize</a>', '<a href="plucker-urls?comment">comment</a>' ]:
					s += '\t<td>%s</td>\n' % value
				s += '</tr>\n'
			return s

		def to_database_representation(self):
			s = '%s%s%s%s%s%s%s%s%d%s%s%s%s' % (self.filename, first_separator, self.memotitle, first_separator, self.url, first_separator, self.urltitle, first_separator, self.hidden, first_separator, self.category, first_separator, self.comment)
#			sys.stdout.write('Converting to database representation %s\n' % s)
			return s

		def get_title(self):
			# assume that urllib2 isn't reusing preexisting sockets somehow...
			# be careful to unset this timeout in all exit paths, so we don't accidentally end up with socket timeouts on sockets that shouldn't time out!
			socket.setdefaulttimeout(20)
			try:
				url=urllib2.urlopen(self.url)
				html=url.read()
				url.close()
			#except ValueError:
			except:
				socket.setdefaulttimeout(None)
				#self.urltitle = '<text="#e00000">(Could not open URL)</text>'
				self.urltitle = '<font color=#c00000>(Could not open URL)</font>'
			else:
				socket.setdefaulttimeout(None)
				soup=BeautifulSoup.BeautifulSoup()
				soup.feed(html)
				titles = soup.first('title')
				if titles.contents:
					self.urltitle = soup.first('title').contents[0]
				else:
					self.urltitle = '<font color=#c00000>(Could not obtain title)</font>'
				del soup
#			print 'got urltitle', self.urltitle

	def from_database_representation(s):
		#                                    0              1               2         3              4            5              6
		#s = '%s\0%s\0%s\0%s\0%s\0%s\0%s' % (self.filename, self.memotitle, self.url, self.urltitle, self.hidden, self.category, self.comment)
		fields = string.splitfields(s, first_separator)
#		sys.stdout.write('fields is %s\n' % str(fields))
#		br()
#		                url      , filename,  memotitle, comment
		url = url_class(fields[2], fields[0], fields[1], fields[6])
		url.urltitle = fields[3]
		url.hidden = string.atoi(fields[4])
		url.category = fields[5]
#		sys.stdout.write('Converted %s to %s\n' % (str(fields), str(url)))
		return url

	def delete_chars(s, c):
		# delete all occurences of character c from string s
		return string.joinfields(string.splitfields(s, c), '')

	def br():
		print '<br>'

	def p():
		print '<p>'

	def ul():
		print '<ul>'

	def end_ul():
		print '</ul>'

	def extract(database):
		#sys.stdout.write('extract() is still a NOOP!\n')
		br()

	def chdir_base():
		# could use some testing in the scenario where ~/.jpilot/plucker-urls does not preexist
		try:
			os.chdir(os.path.expanduser('~/.jpilot'))
		except:
			print 'cd ~/.jpilot failed'
			br()
			sys.exit(1)
		try:
			os.chdir(os.path.expanduser('plucker-urls'))
		except:
			try:
				os.mkdir('plucker-urls', 0755)
			except IOError:
				pass
			except:
				sys.stderr.write('mkdir plucker-urls failed\n')
				br()
				sys.exit(1)
			try:
				os.chdir('plucker-urls')
			except:
				sys.stdout.write('cd plucker-urls failed\n')
				br()

	def update(url_database, filename_database):
		num_added = 0
		os.putenv('PATH', '%s:%s' % (os.environ['PATH'], os.path.expanduser('~/trees/rhel-3-i686/bin')))
		#print os.environ['PATH']
		os.system('par x ../MemoDB.pdb')
		for filename in os.listdir('.'):
			if filename_database.has_key(filename):
				continue
			try:
				file = open(filename, 'r')
			except:
				sys.stdout.write('Opening %s failed - continuing\n' % filename)
				br()
			else:
				memo_title = file.readline()
				if memo_title[:13] == 'Plucker URLs ':
					memo_title = memo_title[13:]
					while 1:
						line = file.readline()
						if not line:
							break
						line = string.strip(line)
						# strip out any characters with special values to this program or to C code
						for sep in [ first_separator, second_separator, chr(0) ]:
							line = delete_chars(line, sep)
						if line != '':
							url = url_class(line, filename, memo_title, '')
							if url_database.has_key(url.to_database_key()):
#								sys.stdout.write('%s: Already present: %s\n' % (time.ctime(time.time()), str(url)))
#								br()
								pass
							else:
								if line[:7] == 'http://':
									url.get_title()
								sys.stdout.write('%s: Adding: %d %s\n' % (time.ctime(time.time()), num_added, str(url)))
								br()
								url_database[url.to_database_key()] = url
								num_added += 1
					file.close()
				filename_database[filename] = ''
			if num_added >= 500:
				sys.stdout.write("Won't add more URL's on this update.  There've been too many, and your browser is likely to get cranky\n")
				br()
				return
		sys.stdout.write("Added %d URL's\n" % num_added)
		br()

	def display(database):
		sys.stdout.write("<title>Plucker URL's</title>\n")
		sys.stdout.write('<body bgcolor="#102030" text="#f0f0f0" link="#f0e0f0" vlink="#e0f0f0">\n')
#		sys.stdout.write('foobar!\n')
		sys.stdout.write('<table width="100%" border="1">\n')
		keys = database.keys()
		list = map(lambda x: database[x], keys)
		list.sort()
		list.reverse()
		hidden_keyno = 0
		visible_keyno = 0
		for item in list:
			if item.hidden:
				hidden_keyno += 1
			else:
				sys.stdout.write('%s\n' % item.to_html())
				visible_keyno += 1
		sys.stdout.write('</table>\n')
		p()
		sys.stdout.write("%d URL's visible, %d hidden, total of %d\n" % (visible_keyno, hidden_keyno, visible_keyno+hidden_keyno))
		sys.stdout.write("</body>\n")

	def erase_all(database):
		keyno=0
		keys = database.keys()
		# keys.sort()
		num_keys = len(keys)
		for key in keys:
			keyno += 1
			print "Erasing key %d of %d" % (keyno, num_keys)
			br()
		print 'Done'

	def main():
		do_ct()

		if os.uname()[1] == 'dcs.nac.uci.edu':
			print 'Disabled on dcs.nac.uci.edu.<br>Please use http://seki.nac.uci.edu/~strombrg/plucker-urls.cgi instead.'
			return 0
			
		#database = gdbm.open(os.path.expanduser('~/public_html/plucker-urls/database', 'w')
		#def __init__(self, databasefile, databasetype, databasemode, to_string=None, from_string=None, max_elements_in_memory=10000, read_use=True, write_use=True, write_through=True, too_many_percent=95.0, \
		# we're using a write-through cache here, because:
		# 1) Performance isn't paramount for this project
		# 2) It's simpler - no cache flushing to worry about
		url_database = cachedb.database_cache(os.path.expanduser('~/public_html/plucker-urls/url_database'), gdbm, 'w', lambda x: x.to_database_representation(), from_database_representation, write_through=True)

		filename_database = gdbm.open(os.path.expanduser('~/public_html/plucker-urls/filename_database'), 'w')

#		for i in form.keys():
#			print i,form[i]

		#print cgi.parse_qsl(sys.argv[1], 1, 1)
#		import urlparse
		import urllib
#		print '-->',urlparse.urlparse(sys.argv[1]),'<--'
		args = map(urllib.unquote, sys.argv[:])

		if args[1:]:
			first_arg_list = string.splitfields(args[1], first_separator)
		else:
			first_arg_list = []

		#sys.stdout.write('first_arg_list is %s\n<p>\n' % str(first_arg_list))
		if first_arg_list[0:] and first_arg_list[0] == 'update':
			extract(url_database)
			chdir_base()
			update(url_database, filename_database)
		elif first_arg_list[0:] and first_arg_list[0] == 'display':
			chdir_base()
			display(url_database)
		elif first_arg_list[0:] and first_arg_list[0] == 'erase-all':
			chdir_base()
			erase_all(url_database)
		elif first_arg_list[0:] and first_arg_list[0] == 'hide' and first_arg_list[1:]:
			chdir_base()
			hide(first_arg_list[1:])
		else:
			p()
			print 'Sorry, legal options to %s are "update" and "display" only, specified with a ? via CGI (for real life), or as an argument on the command line (for testing)' % sys.argv[0]
			print 'You instead gave me --> %s <--\n' % str(sys.argv[1:])

#		if sys.argv[1:] and sys.argv[1] == 'update':
#			extract(database)
#			chdir_base()
#			update(database)
#		elif sys.argv[1:] and sys.argv[1] == 'display':
#			chdir_base()
#			display(database)
#		elif sys.argv[1:] and sys.argv[1] == 'erase-all':
#			chdir_base()
#			erase_all(database)
#		elif sys.argv[1:] and sys.argv[1] == 'hide' and sys.argv[2:]:
#			chdir_base()
#			hide(sys.argv[2])
#		else:
#			p()
#			print 'Sorry, legal options to %s are "update" and "display" only, specified with a ? via CGI (for real life), or as an argument on the command line (for testing)' % sys.argv[0]
#			print 'You instead gave me --> %s <--\n' % str(sys.argv[1:])

	main()

except:
	do_ct()
	print
	print "<br>\n\n<b>==> Error!  Debugging information follows:</b><br><pre>"
	traceback.print_exc()
	print "</pre>"