New translator for Science Fiction + Fantasy Research Database

2 views
Skip to first unread message

Inigo

unread,
Jan 12, 2008, 12:30:12 PM1/12/08
to zotero-dev
Hi,

I've written a new translator for the Science Fiction and Fantasy
Research Database at http://library.tamu.edu/cushing/sffrd/. This is
one of the major research resources for literary criticism of SF.

The translator is attached below. It does a fairly good job on
journals and web pages, but isn't yet so good on edited book sections.
I'd like it to be in the Zotero main distribution - how does this
happen? I'm happy to put the code in the public domain.

I'm very impressed with the work that the Zotero team has done to make
writing translators easy - particularly Scaffold and the accompanying
Wiki tutorial. It took me very little time to get a basic translator
working, and all of the tricky bits were to do with parsing
unstructured site text, rather than any complexity inside Zotero. Well
done!

Cheers

Inigo
--
Inigo Surguy
http://surguy.net/



REPLACE INTO translators VALUES ('31902d5e-75f1-44c0-b1b0-
fd108ff891e2', '1.0.2', '', '2008-01-12 17:07:26', '0', '100', '4',
'SF and Fantasy Research Database', 'Inigo Surguy', 'http://
library.tamu.edu/cushing/sffrd/',
'function detectWeb(doc,url) {
var detailTitle = "Search Results - Detailed View";
if (doc.title.substring(0,detailTitle.length)==detailTitle) {
return "journalArticle";
}
return false;
}
',
'function doWeb(doc,url) {
Zotero.debug("doWeb for SF and F research database has been called
with "+doc.title);
scrape(doc,url);
}

function getText(doc, xpath) {
return Zotero.Utilities.cleanString(doc.evaluate(xpath, doc, null,
XPathResult.ANY_TYPE, null).iterateNext().textContent);
}

function applyRe(reftext,re) {
var item = reftext.match(re);
return (item!=null) ? item[1] : "";
}

function scrape(doc, url) {
var author = getText(doc, "//p/table//tr[1]/td[2]");
var title = getText(doc, "//p/table//tr[2]/td[2]");
var imprint = getText(doc, "//p/table//tr[3]/td[2]");
var subject = getText(doc, "//p/table//tr[4]/td[2]");

if (imprint.match(/online resource/i)) {
createOnlineResource(author, title, imprint, subject);
} else if (imprint.match(/^in:/i)) {
createBookSection(author,title,imprint,subject);
} else {
createArticle(author, title, imprint, subject);
}
}

function createArticle(author, title, imprint, subject) {
var newArticle = new Zotero.Item("journalArticle");
var aus = author.split("/");
for (var i=0; i< aus.length ; i++) {
newArticle.creators.push(Zotero.Utilities.cleanAuthor(aus[i],
"author", true));
}
newArticle.title = title;
// Subject is a set of keywords for the article, and imprint is the
publication
newArticle.extra = subject+"\n"+imprint;
// Lower down, we''ll attempt to improve the publication title
newArticle.publicationTitle = imprint;

var u = imprint.match("http[\.\:/-_\\w]*");
if (u!=null) { newArticle.url = u[0]; }
// This makes matching to identify the publication simpler by
removing URLs (which contain colons)
var imprintWithoutUrl = imprint.replace(/http[\.\:\/-_\w]*/,"")

// Typical format is:
// Publication name volume(issue): pp-pp. Season Year.
try {
var splitPublication = imprintWithoutUrl.match("^(.*):(.*)");
var titleVolumeIssue = splitPublication[1];
// Publication titles may contain some unusual characters -
hopefully this is all of them
var m = titleVolumeIssue.match(/^([A-z\s:-''\(\)_]*)(.*)/)
if (m!=null) {
newArticle.publicationTitle = m[1];
var volumeAndIssue = m[2];
}

Zotero.debug("Volume and issue is "+volumeAndIssue);
var vi = volumeAndIssue.match(/(\d+)\((\d+)\)/);
if (vi!=null) {
newArticle.volume = vi[1];
newArticle.issue = vi[2];
}

var pagesDate = splitPublication[2];
var pages = pagesDate.match(/(\d+\s*-\s*\d+)/g);
if (pages!=null) { newArticle.pages = pages.join(","); }
var date = pagesDate.match(/([\w\/]*\s+(19|20)\d{2})/)
newArticle.date = date[1];
} catch (e) {
// If the article doesn''t match the format, fall back to whatever
we''ve got so far
}

newArticle.complete();
}

function createOnlineResource(author, title, imprint, subject) {
var newArticle = new Zotero.Item("webpage");
newArticle.title = title;
newArticle.publicationTitle = imprint;
newArticle.extra = imprint + "\n"+subject;
var u = imprint.match("http[\.\:/-_\\w]*");
if (u!=null) { newArticle.url = u[0]; }

var aus = author.split("/");
for (var i=0; i< aus.length ; i++) {
newArticle.creators.push(Zotero.Utilities.cleanAuthor(aus[i],
"author", true));
}
newArticle.complete();
}

function createBookSection(author,title,imprint,subject) {
var newArticle = new Zotero.Item("bookSection");
newArticle.title = title;
newArticle.publicationTitle = imprint;
newArticle.extra = imprint + "\n"+subject;

// Book sections are more complicated than this, but this is a start

var aus = author.split("/");
for (var i=0; i< aus.length ; i++) {
newArticle.creators.push(Zotero.Utilities.cleanAuthor(aus[i],
"author", true));
}
newArticle.complete();
}
');
Reply all
Reply to author
Forward
0 new messages