#!/usr/bin/python
# Script to download a paper from a web site
# creating a permement link between the orginal paper
# and the local copy.
# usage: paper.py [-l] URL
# See also gtkpaper.py
#
#
#
# BUGS:
## The setup is a bit arduous
##
import sys
import os
import urllib
import urlparse
import getopt
import re
import string
import shutil
import tempfile
import time
import dates
import HTMLgen
############ Change these to your suiting ##############
paperbase = "~kris/reading/"
commentbase = "~kris/reading/"
listbase = "/cs/student/kris/public_html/Research/"
serverurl = "http://merrimack.cs.ucsb.edu/"
bibtexfile = "~kris/TeX/language.bib"
# These are the words to remove from titles
nowords = r'\ba\b|\ban\b|\band\b|\bfor\b|\bin\b|\bof\b|\bto\b|\bthe\b'
#
################################################################
########## Below this and you should be careful ###############
# COMMENTTEMPLAT : a valid HTML file for containing comments about the paper.
#commenttemplate= commentbase + "comments/comments.template"
# Standard place to put all comment files (?)
commentdir = commentbase + "comments/"
# Place new papers in PAPERDIR
paperdir = paperbase + "TOREAD/"
readinglist = listbase + "reading.html"
#readingtemplate= listbase + "reading.template"
readingkey = "\12"
# We keep all the Paper data in this dictoionary. New variables go here.
# Initialize all the variable
allvars = { "url":"" , "projecturl": "",
"title" : "", "authors":"",
"pubtype" : "","bibtexentry" : "",
"bibref" : "" , "date":"","month":"", "year" : "",
"publication":"",
"publisher": "", "address":"" , "pages" : "", "keywords" : "",
"localurl":"", "localfile":"",
"commentfile":"" , "commenturl":""
}
# An entry in the reading list
# The reading list is an html file that maintians the list of papers
# you have downloaded. (see the READINGLIST variable defined above).
# WARNING assumes you have a style sheet with the follow classes defined:
# paper, title, author
# Variables from allvars are replaced with value if surround by {}
reading_template="""
"""
# A comment file
# Each paper has an associated comment file. The original
# paper links are also maintained.
comment_template="""
Comments
Original Abstract
{abstract}
Synopsis
Main Contribution
New Ideas
Relation to Current Research
Relation to Future Research
Notes
"""
# pubtype is
# article -- author, title, journal, year -- pages, month, note
# book -- author, title, publisher, year --
# inproceeding -- author, title, booktitle, year -- publisher, month, address, pages
# masters -- author, title, school, year -- month, address, note
# phdthesis -- author, title, school, year -- month, address, note
# techreport -- author, title, institution, year -- month, address, note
# manual -- title -- author, organization, year -- month, address, note
# Templates for BibText Entries
# This is a table indexed by pubtype
# NOTICE : variables are surround by ## because bibtex is senstive to {}
bibtexplate = {
"Article" :
"""@article(#bibref#,
author={#authors#},
title={#title#},
journal={#publication#},
year={#year#},
month={#month#},
pages={#pages#},
url={#url#},
keywords={#keywords#})""",
"Book" :
"""@book(#bibref#,
author={#authors#},
title={#title#},
publisher={#publisher#},
year={#year#},
url={#url#},
keywords={#keywords#})""",
"Proceedings" :
"""@inproceedings(#bibref#,
author={#authors#},
title={#title#},
booktitle={#publication#},
publisher={#publisher#},
address={#address#},
year={#year#},
month={#month#},
pages={#pages#},
url={#url#},
keywords={#keywords#})""",
"Masters" :
"""@mastersthesis(#bibref#,
author={#authors#},
title={#title#},
school={#address#},
year={#year#},
url={#url#},
keywords={#keywords#})""",
"PhD Thesis" :
"""@phdthesis(#bibref#,
author={#authors#},
title={#title#},
school={#address#},
year={#year#},
url={#url#},
keywords={#keywords#})""",
"Tech report" :
"""@techreport(#bibref#,
author={#authors#},
title={#title#},
institution={#address#},
year={#year#},
url={#url#},
keywords={#keywords#})""",
"Manual" :
"""@manual(#bibref#,
title={#title#},
author={#authors#},
institution={#address#},
year={#year#},
pages={#pages#},
url={#url#},
keywords={#keywords#})"""
}
def getbasename(title):
"Change the title string into a reasonable filename"
t = string.lower(title)
# Remove the list of non signifigant words
t = re.sub (nowords, "_", t)
# The next three convert whitespace, remove duplicate '_' and end position
t = re.sub (r'\s+',"_", t)
t = re.sub (r'_+', "_", t)
t = re.sub (r'^_|_$',"", t)
# Turn all non alphanumberics to ''
t = re.sub (r'\W',"", t)
# Take only the first four words
t = re.sub (r'([^_]+(_[^_]+){0,3}).*', '\\1', t)
return t
# Make a bibref. Current we use the first author,
# Removing initials, and the a 2 digit year..
#
def getbibref(title, author, year):
"Make a bibtex ref based on paper title, author and year"
if string.find (author, ',')>0:
author = author[0:string.find (author, ',')]
author = re.sub(r'(\b.\.?\s+)+', '', author)
author = re.sub(r'\s+', ' ', author)
author = string.split (author, ' ')
if len (author) > 1: author[0] = author[1]
title = string.split (getbasename (title), '_')
if len (year) > 2: year = year[2:]
return string.lower(author[0] + ":" + title[0] + year)
def getbibentry(subs):
template = bibtexplate[subs["pubtype"]]
authors = savauthors = subs["authors"]
authors = re.sub (r',', ' and ', authors)
authors = re.sub (r'\band(\band\b)+', ' and ', authors)
subs["authors"] = authors
t = HTMLgen.StringTemplate(template, subs, delimiters=[ "#", "#" ])
entry = str (t)
subs["authors"] = savauthors
return entry
def normalize_path(name):
from os.path import *
return abspath(expanduser(expandvars (name)))
class EditableFile:
def __init__(self, path):
self.lines = []
self.path = path
if path: self.open (path)
def open(self, path):
try:
path = normalize_path(path)
tfile = open (path)
self.lines = tfile.readlines()
tfile.close();
except IOError:
print "Unable to open file" + path
def save(self, path = ""):
path = normalize_path(path)
if len(self.lines) > 0:
if path == "" : path = self.path
readname,readext = os.path.splitext(path)
os.rename(path, readname + ".bak")
tfile = open (path, "w")
tfile.writelines(self.lines)
tfile.close();
return path
# Edit operation
def findline(self, key):
return self.lines.index(key)
def insertline(self, pos, line):
return self.lines.insert(pos, line)
def appendline(self, line):
return self.lines.append(line)
def deleteline(self, pos):
return self.lines.pop(pos)
def countlines(self):
return len(self.lines)
class ReadingList(EditableFile):
"A class representing the changeable reading list"
def __init__(self):
EditableFile.__init__(self, readinglist)
def addpaper(self, subs):
t = HTMLgen.StringTemplate (reading_template, subs)
try:
print "adding paper to reading list"
keyline = self.findline (readingkey)
self.insertline (keyline + 1, str(t))
except ValueError, e:
print "Could not find key line" + readingkey + " in " + readinglist
def save(self):
stat = os.stat (readinglist)
mode = stat[0]
EditableFile.save (self, readinglist)
os.chmod(readinglist, mode)
class BibTeXList(EditableFile):
def __init__(self):
EditableFile.__init__(self, bibtexfile)
def addpaper(self, subs):
print "adding paper to " + bibtexfile
self.appendline (subs["bibtexentry"])
self.appendline ("\n");
def save(self):
EditableFile.save (self, bibtexfile)
# Run the bibtool if available
bib = normalize_path(bibtexfile)
os.rename (bib, bib+ ".bak")
if os.system("bibtool -q -s -r iso2tex "+bib+".bak >"+bib):
print "Problem while running bibtool"
shutil.copy (bib + ".bak" , bib)
class Paper :
def __init__(self, table):
self.vars = allvars
self.vars.update( table )
self.initialize_vars()
def initialize_vars(self):
title = self.vars["title"]
authors = self.vars["authors"]
url = self.vars["url"]
# Kill all extra spaces in all variables.
for k in self.vars.keys():
self.vars[k] = re.sub(r'\s+', ' ', self.vars[k])
# convert Date.
if self.vars.has_key('date'):
start,end = dates.parse_date (self.vars['date'])
self.vars['month'] = time.strftime("%b", time.gmtime(start))
self.vars['year'] = time.strftime("%Y", time.gmtime(start))
scheme,location,path,params,query,frag = urlparse.urlparse(url)
filename = self.vars["filename"] = os.path.basename (path)
pos = string.find (filename, '.')
if pos > 0:
basname = filename[0:pos-1]
extension = filename[pos:]
else:
basename,extension = os.path.splitext(filename)
if self.vars["projecturl"] == "":
self.vars["projecturl"] = urlparse.urljoin (url, os.path.dirname(path))
basename = getbasename(title)
self.vars["localfile"] = normalize_path(paperdir + basename + extension)
self.vars["commentfile"] = normalize_path(commentdir + basename + "-comments.html")
self.vars["localurl"] = serverurl + paperdir + basename + extension
self.vars["commenturl"] = serverurl + commentdir + basename + "-comments.html"
self.vars["bibref"] = getbibref (title, authors, self.vars["year"])
if self.vars["pubtype"] == "": self.vars["pubtype"] = "Article"
self.vars["bibtexentry"] = getbibentry(self.vars)
def compress_ps(self):
newfile = self.vars["localfile"]
print "compressing file " + newfile
os.system ("gzip " + newfile)
newfile = newfile + ".gz"
self.vars["localfile"] = newfile;
self.vars["localurl"] = self.vars["localurl"] + ".gz"
return newfile
def convert_pdf2ps(self):
newfile = self.vars["localfile"]
base,extension = os.path.splitext(newfile)
print "converting pdf to postscript"
os.system ("acroread -toPostScript -pairs " + newfile + " " \
+ base + ".ps")
newfile = base + ".ps"
self.vars["localfile"] = newfile;
newurl = self.vars["localurl"]
base,extension = os.path.splitext(newurl)
newurl = base + ".ps"
self.vars["localurl"] = newurl;
newfile = self.compress_ps()
return newfile
def make_local_copy(self):
try:
newfile = self.vars["localfile"]
url = self.vars["url"]
print "getting " + url
print "placing in " + newfile
newfile, headers = urllib.urlretrieve(url, newfile)
base,extension = os.path.splitext(newfile)
# if extension == ".pdf":
# newfile = self.convert_pdf2ps()
if extension == ".ps":
newfile = self.compress_ps()
os.chmod (newfile, 0644)
return 1
except IOError, e:
print "An error occured while retrieving " , url , " : " , e
return 0
except KeyboardInterrupt, k:
print "The user interrupted the transfer"
return 0
def generate_html_entry(self):
R = ReadingList()
R.addpaper (self.vars)
R.save();
def generate_bibtex_entry(self):
R = BibTeXList()
R.addpaper (self.vars)
R.save();
def create_commentfile(self):
try:
commentfile = self.vars["commentfile"]
doc = HTMLgen.StringTemplate(comment_template)
doc.substitutions = self.vars
doc.write(commentfile)
os.chmod (commentfile, 0644)
return 1
except:
print "Problem while building comments"
return 0
def newpaper(subs, p_copy=None, p_comment=None, p_html = None, p_bibtex = None):
p = Paper(subs)
print p.vars
copy = 1
if p_copy: copy = p.make_local_copy()
if copy:
if p_comment: p.create_commentfile()
if p_html: p.generate_html_entry()
if p_bibtex: p.generate_bibtex_entry()
# M A I N P R O G R A M
# Retreive the file and place
# a, author
# t, title,
#
def process_args(arglist):
options = { }
v = { }
opts,pargs=getopt.getopt(sys.argv[1:], 'ht:a:b:y:p:')
for a in opts:
opt,arg = a
options[opt] = arg
if options.has_key("-t"):
title = v["title"] = re.sub(r'\s+', ' ', options["-t"])
if options.has_key("-a"):
v["authors"] = options["-a"]
if options.has_key("-b"):
v["booktitle"] = options["-b"]
if options.has_key("-y"):
v["year"] = options["-y"]
if options.has_key("-p"):
v["projecturl"] = options["-p"]
#print title + author + url
if options.has_key("-h") \
or (v["title"] == "") \
or (v["authors"] == "") \
or len(pargs) < 1:
print "usage: paper.py -t -a "
sys.exit(0)
url = v["url"] = pargs[0]
return v
if __name__ == '__main__':
vars = process_args (sys.argv[1:])
newpaper(vars)
#print varsubst