http://www.ubc.uevora.pt/index.php?option=com_jombib&Itemid=61
We publish a good number of references (actually 20000+, just
references, no pdf download but with many link to the reference page on
the publisher site) for the conservation biology field and I have been
asked to make them "zotero compatibles" and to make the translator
public.
As I have control on the output of the page that shows each reference,
and also on the reference db, I adapted the the html code to the
Scaffold tutorial that I found on the zotero site. This helped me also
because I have very limited Javascript skills.
The translator has been tested pretty deeply and it seems to work
without flaw.
Here is the code:
// Detect Code
function detectWeb(doc, url) {
if(doc.title == "UBC: referencia bibliografica") {
return "Reference found";
}
}
// End Detect Code
// Code
function doWeb(doc, url) {
scrape(doc,url);
}
function scrape(doc,url) {
var xpath = "/html/body/div/div/div[3]/div[3]/table/tbody/tr/td"
var xpathurl =
"/html/body/div/div/div[3]/div[3]/table/tbody/tr/td/a[1]"
var allRefText = Zotero.Utilities.cleanString(doc.evaluate(xpath, doc,
null, XPathResult.ANY_TYPE, null).iterateNext().textContent);
var allRefTexturl = Zotero.Utilities.cleanString(doc.evaluate(xpathurl,
doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent);
// bib data scraper code here
function getItem(reftext,re) {
var item = reftext.match(re);
// Zotero.debug(item[1]);
return item[1];
}
var TipoRe = "Type:(.*?)Title";
var tipoo = getItem(allRefText,TipoRe);
var titleRe = "Title:(.*?)Author";
var title = getItem(allRefText,titleRe);
var authorsRe = "Author.*?: (.*?)Journal";
var authors = getItem(allRefText,authorsRe);
var journalRe = "Journal:(.*?)Year";
var journal = getItem(allRefText,journalRe);
var yearRe = "Year:(.*?)Volume";
var year = getItem(allRefText,yearRe);
var volRe = "Volume:(.*?)Number";
var vol = getItem(allRefText,volRe);
var numRe = "Number:(.*?)Pages";
var num = getItem(allRefText,numRe);
var pageRe = "Pages:(.*?)Abstract";
var page = getItem(allRefText,pageRe);
var abstractRe = "Abstract:(.*?)Keywords";
var abstract = getItem(allRefText,abstractRe);
var keyRe = "Keywords:(.*?)Link";
var key = getItem(allRefText,keyRe);
// zotero entry creation code here
if (tipoo == "article")
{
tipooo = 'journalArticle';
}
else if (tipoo == "book")
{
tipooo = 'book';
}
else if (tipoo == "booklet")
{
tipooo = 'document';
}
else if (tipoo == "conference")
{
tipooo = 'conferencePaper';
}
else if (tipoo == "inbook")
{
tipooo = 'bookSection';
}
else if (tipoo == "incollection")
{
tipooo = 'bookSection';
}
else if (tipoo == "inproceedings")
{
tipooo = 'conferencePaper';
}
else if (tipoo == "manual")
{
tipooo = 'document';
}
else if (tipoo == "mastersthesis")
{
tipooo = 'thesis';
}
else if (tipoo == "misc")
{
tipooo = 'document';
}
else if (tipoo == "other")
{
tipooo = 'journalArticle';
}
else if (tipoo == "phdthesis")
{
tipooo = 'thesis';
}
else if (tipoo == "proceedings")
{
tipooo = 'conferencePaper';
}
else if (tipoo == "techreport")
{
tipooo = 'document';
}
else if (tipoo == "unpublished")
{
tipooo = 'document';
}
else
{
tipooo = 'document';
}
var newArticle = new Zotero.Item(tipooo);
var aus = authors.split(",");
for (var i=0; i< aus.length ; i++) {
newArticle.creators.push(Zotero.Utilities.cleanAuthor(aus[i],
"author"));
}
newArticle.title = title;
newArticle.publicationTitle = journal;
newArticle.date = year;
newArticle.volume = vol;
newArticle.issue = num;
newArticle.pages = page;
newArticle.abstractNote = abstract;
newArticle.keywords = key;
if (allRefTexturl == "no link")
{
newArticle.url = " ";
}
else
{
newArticle.url = allRefTexturl;
}
Zotero.debug(newArticle);
newArticle.complete();
}
// End Code
As I said I have very limited JS skills, so probably the code needs to
be cleaned.
Take care
--
Giovanni Manghi
Conservation Biology Unit
University of Évora
Portugal
http://www.ubc.uevora.pt
giovann...@gmail.com
PGP Key available
Skype: toirao
Tel. + 351 96 7058216
--