IEEE Computer Society Translator

5 views
Skip to first unread message

reyven

unread,
Apr 3, 2008, 1:08:05 AM4/3/08
to zotero-dev
Hello there,

I've written a translator for the IEEE Computer Society repository and
wanted to share it with the Zotero community. I'm not exactly sure
what the process is to contribute a translator to the official Zotero
release, but I'm willing to maintain this translator also in the
future.

Anyway, here's the clipboard copy of the translator:


REPLACE INTO translators VALUES ('e444a659-
c764-4a89-9484-03103adcd604', '0.1b', '', '2008-04-03 14:03:07', '1',
'100', '4', 'IEEE computer society', 'Rey Abe', '^http?://(csdl[0-9]?|
search[0-9]?).computer.org/(persagen/DLAbsToc.jsp|search/results)',
'function detectWeb(doc, url) {

//supports table of contents, seach results and single document pages

if (url.indexOf("DOI=") == -1) {
return "multiple";
} else if (url.indexOf("resourcePath=/dl/mag") != -1) {
return "magazineArticle";
} else if (url.indexOf("resourcePath=/dl/trans") != -1) {
return "journalArticle";
} else if (url.indexOf("resourcePath=/dl/proceedings") != -1) {
return "conferencePaper";
} else if (url.indexOf("resourcePath=/dl/letters") != -1) {
return "letter";
} else {
return false;
}

}',
'function doWeb(doc, url) {
if (url.indexOf("DOI=") == -1) {
// handle listing pages containing document links (either a table of
contents or search results page)

var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
} : null;

// look for documents in the page
var items = new Array();

if (url.indexOf("search/results") != -1) {
//search results page

var entries = doc.evaluate(''//div[@class="searchresult"]'', doc,
nsResolver, XPathResult.ANY_TYPE, null);
var entry;
while(entry = entries.iterateNext()) {

//add text of first bold text in "searchresult" as label for item
selection list
var title = "";
titleNode = doc.evaluate(''./b'', entry, nsResolver,
XPathResult.ANY_TYPE, null).iterateNext();
if (titleNode) title += titleNode.textContent;

//add link url of the abstract icon
var link = doc.evaluate(''.//img[@src="images/abstract_icon.gif"]/
ancestor::a'', entry, nsResolver, XPathResult.ANY_TYPE,
null).iterateNext().href;

items[link] = Zotero.Utilities.cleanString(title);
}
} else {
//table of contents page

var entries = doc.evaluate(''//td[@class="tocEntry"]'', doc,
nsResolver, XPathResult.ANY_TYPE, null);
var entry;
while(entry = entries.iterateNext()) {

//add text of first link in "tocEntry" as label for item selection
list
var title = "";
titleNode = doc.evaluate(''./a'', entry, nsResolver,
XPathResult.ANY_TYPE, null).iterateNext();
if (titleNode) title += titleNode.textContent;

//var link = doc.evaluate(''.//p[@class="fulltext_icons"]/a'',
entry, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().href;
//add link url of the abstract icon
var links = doc.evaluate(''.//img[@src="/common/images/
abstract_icon.gif"]/ancestor::a'', entry, nsResolver,
XPathResult.ANY_TYPE, null);
var link;
while(link = links.iterateNext()) {
items[link.href] = Zotero.Utilities.cleanString(title);
}
}
}

// let user select documents to scrape
items = Zotero.selectItems(items);
if(!items) return true;

var urls = new Array();
for(var url in items) {
urls.push(url);
}

// scrape selected documents
Zotero.Utilities.processDocuments(urls, scrape, function()
{ Zotero.done(); });
Zotero.wait();
} else {
scrape(doc);
}
}

function scrape(doc,url) {

//scrapes abstract and keywords from the document page, additional
metadata from the bibtex link and downloads full pdf if available and
accessible

var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
} : null;

var itemType;
if (doc.location.href.indexOf("resourcePath=/dl/mag") != -1) {
itemType = "magazineArticle";
} else if (doc.location.href.indexOf("resourcePath=/dl/trans") != -1)
{
itemType = "journalArticle";
} else if (doc.location.href.indexOf("resourcePath=/dl/proceedings") !
= -1) {
itemType = "conferencePaper";
} else if (doc.location.href.indexOf("resourcePath=/dl/letters") !=
-1) {
itemType = "letter";
}

//get abstract if available
var abstract = doc.evaluate(''//abstractl'', doc, nsResolver,
XPathResult.ANY_TYPE, null).iterateNext();
if (abstract) abstract =
Zotero.Utilities.cleanString(abstract.textContent);

//get keywords if available (will be added as tags)
var keywords = new Array();
var keywordText = doc.evaluate(''//td[b/text()="Index Terms-"]/
text()'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (keywordText) keywords =
(Zotero.Utilities.cleanString(keywordText.textContent.toLowerCase())).split(",");

//get full pdf if available and accessible (user must be logged in)
var attachments = new Array();
var notes = new Array();
var pdfs = doc.evaluate(''//img[@src="/common/images/
pdf_icon_green.gif"]/ancestor::a'', doc, nsResolver,
XPathResult.ANY_TYPE, null);
var pdf;
if (pdf = pdfs.iterateNext()) {

//deprecated
//var onclickAttrValue =
pdf.attributes.getNamedItem("onclick").value;
//var urlField = onclickAttrValue.substring( 10,
onclickAttrValue.indexOf("'',") );
var urlField = pdf.attributes.getNamedItem("href").value;
var mimeTypeField = "application/pdf";
var titleField = "Complete PDF document";
var attachment = {url:urlField, mimeType:mimeTypeField, title:
titleField};
attachments.push(attachment);
} else {
notes.push( {note:"Complete PDF document was either not available or
accessible. Please make sure you''re logged in to the digital library
to retrieve the complete PDF document."} );
}

//get bibtex entry and delegate to bibtex translator, including the
data scraped so far
var bibtex = doc.evaluate(''//div[@class="hilite"]/
a[text()="BibTex"]'', doc, nsResolver, XPathResult.ANY_TYPE,
null).iterateNext();

if (bibtex) {

//clean up the bibtex string which is embedded into the javascript
of the page and thus urlencoded etc.
bibtex = decodeURI(bibtex);
bibtex = bibtex.substring(bibtex.indexOf("document.write(''")
+16,bibtex.indexOf("'');Popup.document.close();")); //workaround as
bibtex translator obviously needs a whitespace following the first
curly brace
bibtex = Zotero.Utilities.cleanTags(bibtex);
bibtex = Zotero.Utilities.cleanString(bibtex);

var translator = Zotero.loadTranslator("import");
translator.setTranslator("9cb70025-a888-4a29-a210-93ec52da40d4");
translator.setString(bibtex);
translator.setHandler("itemDone", function(obj, item) {
if (item.url) { // add http to url
item.url = "http://"+item.url;
}
if (itemType) item.itemType = itemType;
item.attachments = attachments;
if (abstract) item.abstractNote = abstract;
if (keywords) item.tags = keywords;
if (notes) item.notes = notes;

item.complete();
});
translator.translate();

} else {
throw "No BibTeX found!";
}

}

');



best regards,
-Rey Abe
Reply all
Reply to author
Forward
0 new messages