#!/usr/bin/env python # rewritten from O(n^2) in bash, to O(n) in python, Thu Mar 24, 2005 import shutil import time import os import sys import string import popen2 sys.stderr.write('Running web-extract at %s\n' % time.ctime()) # to use this script: # you'll need par (which is part of the prc.tgz package) and txt2html # (which is a nice python script for an obvious purpose). And of # course, we're assuming you're sync'ing your palm via jpilot from time # to time at least. # the following patch to txt2html should make it better suited for # converting palm memos; after this patch, the first line of the memo # will be used as the title, rather than the filename: #--- txt2html.t 2004-10-28 15:58:34.000000000 -0700 #+++ txt2html 2004-10-28 15:59:12.000000000 -0700 #@@ -34,8 +34,9 @@ # return '

' % (img, img) # # def main(filename): #- title = re.sub('(.*)[.].*$', '\\1', os.path.basename(filename)) #+ #title = re.sub('(.*)[.].*$', '\\1', os.path.basename(filename)) # file = open(filename) #+ title = file.readline() # # prev_blank = 1 # img = None # the script has a special case as a sort of safety check: # It won't add a memo whose title is less than 5 characters long. # This should hopefully prevent the mishap where you accidentally # add a blank line to the titles list, and suddenly find every last one # of your memos on the internet. :) # the script is O(n^2). This isn't a problem until I have a huge number # of memos though. :) def fn_from_title(title): # sed \ # -e 's#[^ /A-Za-z0-9\.=\+-]*##g' \ # -e 's#[ /]#-#g' \ # -e 's/$/.html/')" fn = '' for ch in title: if ch in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.=+-': fn += ch elif ch in ' /': fn += '-' fn += '.html' #sys.stderr.write('Converted title %s to filename %s\n' % (title, fn)) return fn def write_memo(title,srcfn,dstfn): # txt2html "$file" | tr -d '\0' | sed 's/^------*$/

/' | \ # while read line # do # # Reformat long lines. This probably should be # # a command line option, or better, an option specified # # in the titles file. # if [ "$(echo $line | wc -c)" -gt 80 ] # then # echo "$line" | fmt # else # echo "$line" # fi # done # echo "

Back to Dan's palm memos" sys.stderr.write('Converting %s to %s\n' % (title, dstfn)) file = open(os.path.expanduser('~/public_html/'+dstfn),'w') pipe = os.popen("txt2html %s | tr -d '\\0' | sed 's/^-----*$/

/'" % srcfn) while 1: line = pipe.readline() if not line: break if len(line) > 80: # reformat the line to fit the screen a bit better (stdouterr,stdin) = popen2.popen4('fmt') stdin.write(line) stdin.close() while 1: fmtline = stdouterr.readline() if not fmtline: break file.write(fmtline) stdin.close() stdouterr.close() else: # this line isn't that long, so just write it without opening a # pipe to fmt first - which can slow things down a lot file.write(line) #echo "

Back to Dan's palm memos" file.write('

'+"Back to Dan's palm memos") file.close() pipe.close() # make the directory... It probably already exists though os.system('mkdir -p ~/.jpilot/web-extract/files') os.chdir(os.path.expanduser('~/.jpilot/web-extract/files')) # remove previous extracted palm memos for file in os.listdir('.'): os.unlink(file) # extract current palm memos os.system(os.path.expanduser('~/trees/`/dcs/bin/systype`/bin/par') + ' x ../../Memo32DB.pdb') # copy the top of the top-level document shutil.copyfile(os.path.expanduser('~/public_html/theme.html'),os.path.expanduser('~/public_html/palm-memos.html.new')) file=open(os.path.expanduser('~/.jpilot/web-extract/titles'),'r') do_titles=file.readlines() file.close() for title_no in range(len(do_titles)): do_titles[title_no] = string.strip(do_titles[title_no]) do_titles.sort() class title_class: def __init__(self,title,filename): self.title = title self.filename = filename def title(self): return self.title def filename(self): return self.filename def __cmp__(self,other): if self.title < other.title: return -1 elif self.title > other.title: return 1 else: return 0 have_titles=[] for filename in os.listdir('.'): file=open(os.path.expanduser(filename)) # the title is always the first line have_titles.append(title_class(string.strip(file.readline()),filename)) file.close() have_titles.sort() memoshtml = open(os.path.expanduser('~/public_html/palm-memos.html.new'),'a') len_do = len(do_titles) len_have = len(have_titles) do_no = 0 have_no = 0 while do_no < len_do and have_no < len_have: #sys.stderr.write('Comparing names %s and %s\n' % (do_titles[do_no], have_titles[have_no].title)) if len(do_titles[do_no]) < 5: # if "do title" has an extremely short length, then ignore it. # Otherwise, we might typo in the list of titles, and # (practically) everything would match it. sys.stderr.write('Skipping very short title: %s\n' % do_titles[do_no]) do_no += 1 elif do_titles[do_no] == have_titles[have_no].title: # titles match - output a memo as HTML! #sys.stderr.write('Found %s, writing...\n' % do_titles[do_no]) fn = fn_from_title(do_titles[do_no]) memoshtml.write('
%s\n' % (fn, do_titles[do_no])) write_memo(do_titles[do_no],have_titles[have_no].filename,fn) do_no += 1 have_no += 1 elif do_titles[do_no] < have_titles[have_no].title: sys.stderr.write('Warning: title %s not found!!!!!!!!!!!!!!!!!!!!!!!\n' % string.strip(do_titles[do_no])) do_no += 1 else: # do > have have_no += 1 while do_no < len_do: sys.stderr.write('Warning: title %s not found!!!!!!!!!!!!!!!!!!!!!!!\n' % string.strip(do_titles[do_no])) do_no += 1 # we don't really need to cover this case - if we have titles that # aren't in do_titles, we can just ignore them #while have_no < len_have: # sys.stderr.write('Work on finishing up have_no\n') memoshtml.write('

\nPages last created at %s\n

\n' % time.ctime()) memoshtml.write('\n') memoshtml.close() os.rename(os.path.expanduser('~/public_html/palm-memos.html.new'),os.path.expanduser('~/public_html/palm-memos.html')) sys.stderr.write('Done with web-extract at %s\n' % time.ctime())