#!/usr/bin/python
import os
import cgi
import sys
import time
import gdbm
import string
import socket
import urllib2
import traceback
def do_ct(first_time=True):
if first_time == True:
first_time = False
print 'Content-type: text/html'
print
sys.stderr = sys.stdout
try:
sys.path.insert(0,os.path.expanduser('~strombrg/lib'))
import BeautifulSoup
import cachedb
first_separator = chr(1)
second_separator = chr(2)
class url_class:
def __init__(self, url, filename, memotitle, comment):
self.url = string.strip(url)
self.urltitle = ''
self.filename = filename
self.memotitle = string.strip(memotitle)
self.hidden = 0
self.comment = comment
self.category = ''
def __cmp__(self, other):
if self.memotitle < other.memotitle:
return -1
elif self.memotitle > other.memotitle:
return 1
if self.urltitle < other.urltitle:
return -1
elif self.urltitle > other.urltitle:
return 1
if self.url < other.url:
return -1
elif self.url > other.url:
return 1
return 0
def hide(self):
self.hidden = 1
def unhide(self):
self.hidden = 0
def to_database_key(self):
return '%s%s%s' % (self.url, first_separator, self.memotitle)
def __str__(self):
return "%s|%s|%s|%s" % (self.filename, self.memotitle, self.url, self.urltitle)
def to_html(self):
if self.hidden:
s = ''
else:
s = '
\n'
if self.url.find(':') == -1:
url = 'http://www.google.com/search?hl=en&q=%s&btnG=Google+Search' % self.url
else:
url = self.url
for value in [ self.memotitle, '%s' % (url, self.urltitle[:50]), '%s' % (url, self.url[:30]), \
'hide' % (first_separator, self.to_database_key()), 'categorize', 'comment' ]:
s += '\t%s | \n' % value
s += '
\n'
return s
def to_database_representation(self):
s = '%s%s%s%s%s%s%s%s%d%s%s%s%s' % (self.filename, first_separator, self.memotitle, first_separator, self.url, first_separator, self.urltitle, first_separator, self.hidden, first_separator, self.category, first_separator, self.comment)
# sys.stdout.write('Converting to database representation %s\n' % s)
return s
def get_title(self):
# assume that urllib2 isn't reusing preexisting sockets somehow...
# be careful to unset this timeout in all exit paths, so we don't accidentally end up with socket timeouts on sockets that shouldn't time out!
socket.setdefaulttimeout(20)
try:
url=urllib2.urlopen(self.url)
html=url.read()
url.close()
#except ValueError:
except:
socket.setdefaulttimeout(None)
#self.urltitle = '(Could not open URL)'
self.urltitle = '(Could not open URL)'
else:
socket.setdefaulttimeout(None)
soup=BeautifulSoup.BeautifulSoup()
soup.feed(html)
titles = soup.first('title')
if titles.contents:
self.urltitle = soup.first('title').contents[0]
else:
self.urltitle = '(Could not obtain title)'
del soup
# print 'got urltitle', self.urltitle
def from_database_representation(s):
# 0 1 2 3 4 5 6
#s = '%s\0%s\0%s\0%s\0%s\0%s\0%s' % (self.filename, self.memotitle, self.url, self.urltitle, self.hidden, self.category, self.comment)
fields = string.splitfields(s, first_separator)
# sys.stdout.write('fields is %s\n' % str(fields))
# br()
# url , filename, memotitle, comment
url = url_class(fields[2], fields[0], fields[1], fields[6])
url.urltitle = fields[3]
url.hidden = string.atoi(fields[4])
url.category = fields[5]
# sys.stdout.write('Converted %s to %s\n' % (str(fields), str(url)))
return url
def delete_chars(s, c):
# delete all occurences of character c from string s
return string.joinfields(string.splitfields(s, c), '')
def br():
print '
'
def p():
print ''
def ul():
print '
'
def extract(database):
#sys.stdout.write('extract() is still a NOOP!\n')
br()
def chdir_base():
# could use some testing in the scenario where ~/.jpilot/plucker-urls does not preexist
try:
os.chdir(os.path.expanduser('~/.jpilot'))
except:
print 'cd ~/.jpilot failed'
br()
sys.exit(1)
try:
os.chdir(os.path.expanduser('plucker-urls'))
except:
try:
os.mkdir('plucker-urls', 0755)
except IOError:
pass
except:
sys.stderr.write('mkdir plucker-urls failed\n')
br()
sys.exit(1)
try:
os.chdir('plucker-urls')
except:
sys.stdout.write('cd plucker-urls failed\n')
br()
def update(url_database, filename_database):
num_added = 0
os.putenv('PATH', '%s:%s' % (os.environ['PATH'], os.path.expanduser('~/trees/rhel-3-i686/bin')))
#print os.environ['PATH']
os.system('par x ../MemoDB.pdb')
for filename in os.listdir('.'):
if filename_database.has_key(filename):
continue
try:
file = open(filename, 'r')
except:
sys.stdout.write('Opening %s failed - continuing\n' % filename)
br()
else:
memo_title = file.readline()
if memo_title[:13] == 'Plucker URLs ':
memo_title = memo_title[13:]
while 1:
line = file.readline()
if not line:
break
line = string.strip(line)
# strip out any characters with special values to this program or to C code
for sep in [ first_separator, second_separator, chr(0) ]:
line = delete_chars(line, sep)
if line != '':
url = url_class(line, filename, memo_title, '')
if url_database.has_key(url.to_database_key()):
# sys.stdout.write('%s: Already present: %s\n' % (time.ctime(time.time()), str(url)))
# br()
pass
else:
if line[:7] == 'http://':
url.get_title()
sys.stdout.write('%s: Adding: %d %s\n' % (time.ctime(time.time()), num_added, str(url)))
br()
url_database[url.to_database_key()] = url
num_added += 1
file.close()
filename_database[filename] = ''
if num_added >= 500:
sys.stdout.write("Won't add more URL's on this update. There've been too many, and your browser is likely to get cranky\n")
br()
return
sys.stdout.write("Added %d URL's\n" % num_added)
br()
def display(database):
sys.stdout.write("Plucker URL's\n")
sys.stdout.write('\n')
# sys.stdout.write('foobar!\n')
sys.stdout.write('\n')
keys = database.keys()
list = map(lambda x: database[x], keys)
list.sort()
list.reverse()
hidden_keyno = 0
visible_keyno = 0
for item in list:
if item.hidden:
hidden_keyno += 1
else:
sys.stdout.write('%s\n' % item.to_html())
visible_keyno += 1
sys.stdout.write('
\n')
p()
sys.stdout.write("%d URL's visible, %d hidden, total of %d\n" % (visible_keyno, hidden_keyno, visible_keyno+hidden_keyno))
sys.stdout.write("\n")
def erase_all(database):
keyno=0
keys = database.keys()
# keys.sort()
num_keys = len(keys)
for key in keys:
keyno += 1
print "Erasing key %d of %d" % (keyno, num_keys)
br()
print 'Done'
def main():
do_ct()
if os.uname()[1] == 'dcs.nac.uci.edu':
print 'Disabled on dcs.nac.uci.edu.
Please use http://seki.nac.uci.edu/~strombrg/plucker-urls.cgi instead.'
return 0
#database = gdbm.open(os.path.expanduser('~/public_html/plucker-urls/database', 'w')
#def __init__(self, databasefile, databasetype, databasemode, to_string=None, from_string=None, max_elements_in_memory=10000, read_use=True, write_use=True, write_through=True, too_many_percent=95.0, \
# we're using a write-through cache here, because:
# 1) Performance isn't paramount for this project
# 2) It's simpler - no cache flushing to worry about
url_database = cachedb.database_cache(os.path.expanduser('~/public_html/plucker-urls/url_database'), gdbm, 'w', lambda x: x.to_database_representation(), from_database_representation, write_through=True)
filename_database = gdbm.open(os.path.expanduser('~/public_html/plucker-urls/filename_database'), 'w')
# for i in form.keys():
# print i,form[i]
#print cgi.parse_qsl(sys.argv[1], 1, 1)
# import urlparse
import urllib
# print '-->',urlparse.urlparse(sys.argv[1]),'<--'
args = map(urllib.unquote, sys.argv[:])
if args[1:]:
first_arg_list = string.splitfields(args[1], first_separator)
else:
first_arg_list = []
#sys.stdout.write('first_arg_list is %s\n\n' % str(first_arg_list))
if first_arg_list[0:] and first_arg_list[0] == 'update':
extract(url_database)
chdir_base()
update(url_database, filename_database)
elif first_arg_list[0:] and first_arg_list[0] == 'display':
chdir_base()
display(url_database)
elif first_arg_list[0:] and first_arg_list[0] == 'erase-all':
chdir_base()
erase_all(url_database)
elif first_arg_list[0:] and first_arg_list[0] == 'hide' and first_arg_list[1:]:
chdir_base()
hide(first_arg_list[1:])
else:
p()
print 'Sorry, legal options to %s are "update" and "display" only, specified with a ? via CGI (for real life), or as an argument on the command line (for testing)' % sys.argv[0]
print 'You instead gave me --> %s <--\n' % str(sys.argv[1:])
# if sys.argv[1:] and sys.argv[1] == 'update':
# extract(database)
# chdir_base()
# update(database)
# elif sys.argv[1:] and sys.argv[1] == 'display':
# chdir_base()
# display(database)
# elif sys.argv[1:] and sys.argv[1] == 'erase-all':
# chdir_base()
# erase_all(database)
# elif sys.argv[1:] and sys.argv[1] == 'hide' and sys.argv[2:]:
# chdir_base()
# hide(sys.argv[2])
# else:
# p()
# print 'Sorry, legal options to %s are "update" and "display" only, specified with a ? via CGI (for real life), or as an argument on the command line (for testing)' % sys.argv[0]
# print 'You instead gave me --> %s <--\n' % str(sys.argv[1:])
main()
except:
do_ct()
print
print "
\n\n==> Error! Debugging information follows:
"
traceback.print_exc()
print "
"