#!/usr/bin/python # Script to download a paper from a web site # creating a permement link between the orginal paper # and the local copy. # usage: paper.py [-l] URL # See also gtkpaper.py # # # # BUGS: ## The setup is a bit arduous ## import sys import os import urllib import urlparse import getopt import re import string import shutil import tempfile import time import dates import HTMLgen ############ Change these to your suiting ############## paperbase = "~kris/reading/" commentbase = "~kris/reading/" listbase = "/cs/student/kris/public_html/Research/" serverurl = "http://merrimack.cs.ucsb.edu/" bibtexfile = "~kris/TeX/language.bib" # These are the words to remove from titles nowords = r'\ba\b|\ban\b|\band\b|\bfor\b|\bin\b|\bof\b|\bto\b|\bthe\b' # ################################################################ ########## Below this and you should be careful ############### # COMMENTTEMPLAT : a valid HTML file for containing comments about the paper. #commenttemplate= commentbase + "comments/comments.template" # Standard place to put all comment files (?) commentdir = commentbase + "comments/" # Place new papers in PAPERDIR paperdir = paperbase + "TOREAD/" readinglist = listbase + "reading.html" #readingtemplate= listbase + "reading.template" readingkey = "\12" # We keep all the Paper data in this dictoionary. New variables go here. # Initialize all the variable allvars = { "url":"" , "projecturl": "", "title" : "", "authors":"", "pubtype" : "","bibtexentry" : "", "bibref" : "" , "date":"","month":"", "year" : "", "publication":"", "publisher": "", "address":"" , "pages" : "", "keywords" : "", "localurl":"", "localfile":"", "commentfile":"" , "commenturl":"" } # An entry in the reading list # The reading list is an html file that maintians the list of papers # you have downloaded. (see the READINGLIST variable defined above). # WARNING assumes you have a style sheet with the follow classes defined: # paper, title, author # Variables from allvars are replaced with value if surround by {} reading_template="""
{authors} {title} {publication}, {address}, {date} [Project] [Local] [Comments]
""" # A comment file # Each paper has an associated comment file. The original # paper links are also maintained. comment_template=""" Comments
{title}
{authors}
{booktitle}
[ Project ] [ Original] [ Local Copy]
        {bibtexentry}
            

Original Abstract

{abstract}

Synopsis

Main Contribution

New Ideas

Relation to Current Research

Relation to Future Research

Notes

""" # pubtype is # article -- author, title, journal, year -- pages, month, note # book -- author, title, publisher, year -- # inproceeding -- author, title, booktitle, year -- publisher, month, address, pages # masters -- author, title, school, year -- month, address, note # phdthesis -- author, title, school, year -- month, address, note # techreport -- author, title, institution, year -- month, address, note # manual -- title -- author, organization, year -- month, address, note # Templates for BibText Entries # This is a table indexed by pubtype # NOTICE : variables are surround by ## because bibtex is senstive to {} bibtexplate = { "Article" : """@article(#bibref#, author={#authors#}, title={#title#}, journal={#publication#}, year={#year#}, month={#month#}, pages={#pages#}, url={#url#}, keywords={#keywords#})""", "Book" : """@book(#bibref#, author={#authors#}, title={#title#}, publisher={#publisher#}, year={#year#}, url={#url#}, keywords={#keywords#})""", "Proceedings" : """@inproceedings(#bibref#, author={#authors#}, title={#title#}, booktitle={#publication#}, publisher={#publisher#}, address={#address#}, year={#year#}, month={#month#}, pages={#pages#}, url={#url#}, keywords={#keywords#})""", "Masters" : """@mastersthesis(#bibref#, author={#authors#}, title={#title#}, school={#address#}, year={#year#}, url={#url#}, keywords={#keywords#})""", "PhD Thesis" : """@phdthesis(#bibref#, author={#authors#}, title={#title#}, school={#address#}, year={#year#}, url={#url#}, keywords={#keywords#})""", "Tech report" : """@techreport(#bibref#, author={#authors#}, title={#title#}, institution={#address#}, year={#year#}, url={#url#}, keywords={#keywords#})""", "Manual" : """@manual(#bibref#, title={#title#}, author={#authors#}, institution={#address#}, year={#year#}, pages={#pages#}, url={#url#}, keywords={#keywords#})""" } def getbasename(title): "Change the title string into a reasonable filename" t = string.lower(title) # Remove the list of non signifigant words t = re.sub (nowords, "_", t) # The next three convert whitespace, remove duplicate '_' and end position t = re.sub (r'\s+',"_", t) t = re.sub (r'_+', "_", t) t = re.sub (r'^_|_$',"", t) # Turn all non alphanumberics to '' t = re.sub (r'\W',"", t) # Take only the first four words t = re.sub (r'([^_]+(_[^_]+){0,3}).*', '\\1', t) return t # Make a bibref. Current we use the first author, # Removing initials, and the a 2 digit year.. # def getbibref(title, author, year): "Make a bibtex ref based on paper title, author and year" if string.find (author, ',')>0: author = author[0:string.find (author, ',')] author = re.sub(r'(\b.\.?\s+)+', '', author) author = re.sub(r'\s+', ' ', author) author = string.split (author, ' ') if len (author) > 1: author[0] = author[1] title = string.split (getbasename (title), '_') if len (year) > 2: year = year[2:] return string.lower(author[0] + ":" + title[0] + year) def getbibentry(subs): template = bibtexplate[subs["pubtype"]] authors = savauthors = subs["authors"] authors = re.sub (r',', ' and ', authors) authors = re.sub (r'\band(\band\b)+', ' and ', authors) subs["authors"] = authors t = HTMLgen.StringTemplate(template, subs, delimiters=[ "#", "#" ]) entry = str (t) subs["authors"] = savauthors return entry def normalize_path(name): from os.path import * return abspath(expanduser(expandvars (name))) class EditableFile: def __init__(self, path): self.lines = [] self.path = path if path: self.open (path) def open(self, path): try: path = normalize_path(path) tfile = open (path) self.lines = tfile.readlines() tfile.close(); except IOError: print "Unable to open file" + path def save(self, path = ""): path = normalize_path(path) if len(self.lines) > 0: if path == "" : path = self.path readname,readext = os.path.splitext(path) os.rename(path, readname + ".bak") tfile = open (path, "w") tfile.writelines(self.lines) tfile.close(); return path # Edit operation def findline(self, key): return self.lines.index(key) def insertline(self, pos, line): return self.lines.insert(pos, line) def appendline(self, line): return self.lines.append(line) def deleteline(self, pos): return self.lines.pop(pos) def countlines(self): return len(self.lines) class ReadingList(EditableFile): "A class representing the changeable reading list" def __init__(self): EditableFile.__init__(self, readinglist) def addpaper(self, subs): t = HTMLgen.StringTemplate (reading_template, subs) try: print "adding paper to reading list" keyline = self.findline (readingkey) self.insertline (keyline + 1, str(t)) except ValueError, e: print "Could not find key line" + readingkey + " in " + readinglist def save(self): stat = os.stat (readinglist) mode = stat[0] EditableFile.save (self, readinglist) os.chmod(readinglist, mode) class BibTeXList(EditableFile): def __init__(self): EditableFile.__init__(self, bibtexfile) def addpaper(self, subs): print "adding paper to " + bibtexfile self.appendline (subs["bibtexentry"]) self.appendline ("\n"); def save(self): EditableFile.save (self, bibtexfile) # Run the bibtool if available bib = normalize_path(bibtexfile) os.rename (bib, bib+ ".bak") if os.system("bibtool -q -s -r iso2tex "+bib+".bak >"+bib): print "Problem while running bibtool" shutil.copy (bib + ".bak" , bib) class Paper : def __init__(self, table): self.vars = allvars self.vars.update( table ) self.initialize_vars() def initialize_vars(self): title = self.vars["title"] authors = self.vars["authors"] url = self.vars["url"] # Kill all extra spaces in all variables. for k in self.vars.keys(): self.vars[k] = re.sub(r'\s+', ' ', self.vars[k]) # convert Date. if self.vars.has_key('date'): start,end = dates.parse_date (self.vars['date']) self.vars['month'] = time.strftime("%b", time.gmtime(start)) self.vars['year'] = time.strftime("%Y", time.gmtime(start)) scheme,location,path,params,query,frag = urlparse.urlparse(url) filename = self.vars["filename"] = os.path.basename (path) pos = string.find (filename, '.') if pos > 0: basname = filename[0:pos-1] extension = filename[pos:] else: basename,extension = os.path.splitext(filename) if self.vars["projecturl"] == "": self.vars["projecturl"] = urlparse.urljoin (url, os.path.dirname(path)) basename = getbasename(title) self.vars["localfile"] = normalize_path(paperdir + basename + extension) self.vars["commentfile"] = normalize_path(commentdir + basename + "-comments.html") self.vars["localurl"] = serverurl + paperdir + basename + extension self.vars["commenturl"] = serverurl + commentdir + basename + "-comments.html" self.vars["bibref"] = getbibref (title, authors, self.vars["year"]) if self.vars["pubtype"] == "": self.vars["pubtype"] = "Article" self.vars["bibtexentry"] = getbibentry(self.vars) def compress_ps(self): newfile = self.vars["localfile"] print "compressing file " + newfile os.system ("gzip " + newfile) newfile = newfile + ".gz" self.vars["localfile"] = newfile; self.vars["localurl"] = self.vars["localurl"] + ".gz" return newfile def convert_pdf2ps(self): newfile = self.vars["localfile"] base,extension = os.path.splitext(newfile) print "converting pdf to postscript" os.system ("acroread -toPostScript -pairs " + newfile + " " \ + base + ".ps") newfile = base + ".ps" self.vars["localfile"] = newfile; newurl = self.vars["localurl"] base,extension = os.path.splitext(newurl) newurl = base + ".ps" self.vars["localurl"] = newurl; newfile = self.compress_ps() return newfile def make_local_copy(self): try: newfile = self.vars["localfile"] url = self.vars["url"] print "getting " + url print "placing in " + newfile newfile, headers = urllib.urlretrieve(url, newfile) base,extension = os.path.splitext(newfile) # if extension == ".pdf": # newfile = self.convert_pdf2ps() if extension == ".ps": newfile = self.compress_ps() os.chmod (newfile, 0644) return 1 except IOError, e: print "An error occured while retrieving " , url , " : " , e return 0 except KeyboardInterrupt, k: print "The user interrupted the transfer" return 0 def generate_html_entry(self): R = ReadingList() R.addpaper (self.vars) R.save(); def generate_bibtex_entry(self): R = BibTeXList() R.addpaper (self.vars) R.save(); def create_commentfile(self): try: commentfile = self.vars["commentfile"] doc = HTMLgen.StringTemplate(comment_template) doc.substitutions = self.vars doc.write(commentfile) os.chmod (commentfile, 0644) return 1 except: print "Problem while building comments" return 0 def newpaper(subs, p_copy=None, p_comment=None, p_html = None, p_bibtex = None): p = Paper(subs) print p.vars copy = 1 if p_copy: copy = p.make_local_copy() if copy: if p_comment: p.create_commentfile() if p_html: p.generate_html_entry() if p_bibtex: p.generate_bibtex_entry() # M A I N P R O G R A M # Retreive the file and place # a, author # t, title, # def process_args(arglist): options = { } v = { } opts,pargs=getopt.getopt(sys.argv[1:], 'ht:a:b:y:p:') for a in opts: opt,arg = a options[opt] = arg if options.has_key("-t"): title = v["title"] = re.sub(r'\s+', ' ', options["-t"]) if options.has_key("-a"): v["authors"] = options["-a"] if options.has_key("-b"): v["booktitle"] = options["-b"] if options.has_key("-y"): v["year"] = options["-y"] if options.has_key("-p"): v["projecturl"] = options["-p"] #print title + author + url if options.has_key("-h") \ or (v["title"] == "") \ or (v["authors"] == "") \ or len(pargs) < 1: print "usage: paper.py -t -a <authors> <url>" sys.exit(0) url = v["url"] = pargs[0] return v if __name__ == '__main__': vars = process_args (sys.argv[1:]) newpaper(vars) #print varsubst