Question on Wiley InterScience' translator

8 views
Skip to first unread message

Helvine

unread,
Jan 2, 2009, 3:39:18 AM1/2/09
to zotero-dev
I wrote a new translator for ACS (http://pubs.acs.org/) based on wiley
translator, but I can't fetch the multiple results. It always fetches
the last result for several times. When I checked the wiley ones, I
found It also has the same error with the multiple resulsts. I knew
the problem exists in the part of "for each (id in ids) ",but I don't
know how to correct it. Can anybody tell me the reason and the
solutions?

Wiley Translator:
REPLACE INTO translators VALUES
('fe728bc9-595a-4f03-98fc-766f1d8d0936', '1.0.0b4.r5', '', '2009-01-02
17:34:27', '1', '100', '4', 'Wiley InterScience', 'Sean Takats and
Michael Berkowitz', 'https?:\/\/(?:www3\.|www\.)?interscience\.wiley
\.com[^\/]*\/(?:search\/|(cgi-bin|journal)\/[0-9]+\/abstract|
journal)',
'function detectWeb(doc, url){
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
} : null;

var xpath = ''//input[@name="ID"][@type="checkbox"]'';
if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE,
null).iterateNext()) {
return "multiple";
}
if (url.match(/journal\/\d+\/(issue|home)$/)) {
return "multiple";
}
var m = url.match(/https?:\/\/[^\/]*\/(cgi-bin|journal)(\/(abstract|
summary))?\/[0-9]+\/abstract/);
if (m){
return "journalArticle";
}
}',
'function doWeb(doc, url){
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
} : null;
var host = ''http://'' + doc.location.host + "/";
Zotero.debug(host);
var m = url.match(/https?:\/\/[^\/]*\/(journal|cgi-bin\/summary)\/
([0-9]+)\/(abstract)?/);
Zotero.debug(m);
var ids = new Array();
if(detectWeb(doc, url) == "multiple") { //search
var id;
var title;
var availableItems = new Array();
var xpath = ''//tr[td/input[@name="ID"][@type="checkbox"]]'';
if (doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE,
null).iterateNext()) {
elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE,
null);
var elmt = elmts.iterateNext();
do {
title = doc.evaluate(''./td/strong'', elmt, nsResolver,
XPathResult.ANY_TYPE, null).iterateNext().textContent;
id = doc.evaluate(''./td/input[@name="ID"][@type="checkbox"]'',
elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().value;
availableItems[id] = title;
} while (elmt = elmts.iterateNext())
} else {
var xpath = ''//div[@id="contentCell"]/div[*/a]'';
var elmts = doc.evaluate(xpath, doc, nsResolver,
XPathResult.ANY_TYPE, null);
var elmt = elmts.iterateNext();
do {
title = Zotero.Utilities.trimInternal(doc.evaluate(''.//strong'',
elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext
().textContent);
id = doc.evaluate(''.//a[1]'', elmt, nsResolver,
XPathResult.ANY_TYPE, null).iterateNext().href.match(/abstract\/([\d]+)
\//)[1];
availableItems[id] = title;
} while (elmt = elmts.iterateNext())
}
var items = Zotero.selectItems(availableItems);
if(!items) {
return true;
}
for(var id in items) {
ids.push(id);
}

} else if (m){ //single article
ids.push(m[2]);
Zotero.debug(m[2]);
}
for each (id in ids) {
var uri = host + ''tools/citex'';
var poststring = "clienttype=1&subtype=1&mode=1&version=1&id=" + id;
Zotero.Utilities.HTTP.doPost(uri, poststring, function(text) {
uri = host+"tools/CitEx";
poststring =
"mode=2&format=3&type=2&file=3&exportCitation.x=16&exportCitation.y=10&exportCitation=submit";
Zotero.Utilities.HTTP.doPost(uri, poststring, function(text) {
var m = text.match(/%A\s(.*)/); //following lines fix Wiley''s
incorrect %A tag (should be separate tags for each author)
if (m){
var newauthors ="";
var authors = m[1].split(",")
for each (var author in authors){
if (author != ""){
newauthors = newauthors + "%A "+Zotero.Utilities.unescapeHTML
(Zotero.Utilities.trimInternal(author))+"\n";
}
}
text = text.replace(/%A\s.*\n/, newauthors);
}
var translator = Zotero.loadTranslator("import");
translator.setTranslator("881f60f2-0802-411a-9228-
ce5f47b64c7d"); //EndNote/Refer/BibIX
translator.setString(text);
translator.setHandler("itemDone", function(obj, item) {
var pdfurl = ''http://download.interscience.wiley.com/cgi-bin/
fulltext?ID='' + id + ''&PLACEBO=IE.pdf&mode=pdf'';
item.attachments.push({url:pdfurl, title:"Wiley Interscience
PDF", mimeType:"application/pdf"});
item.DOI = item.url.match(/\.org\/(.*)$/)[1];
item.complete();
});
translator.translate();
Zotero.done();
});
});
};
Zotero.wait();
}');



The ACS Translator:

REPLACE INTO translators VALUES ('938ebe32-2b2e-4349-a5b3-
b3a05d3de627', '1.0.0b3.r1', '', '2009-01-02 17:37:58', '1', '100',
'4', 'ACS Publications', 'Sean Takats and Michael Berkowitz', 'http://
[^/]*pubs3?.acs.org[^/]*/(?:wls/journals/query/(?:subscriberResults|
query)\.html|acs/journals/toc.page|cgi-bin/(?:article|abstract|sample|
asap).cgi)?',
'function detectWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
} : null;

if(doc.evaluate(''//input[@id="articleListHeader_selectAllToc"]'',
doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
return "multiple";
} else if (doc.evaluate(''//div[@id="articleHead"]'', doc,
nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
return "journalArticle";
}
return false;
}
',
'function doWeb(doc, url){
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
} : null;
var host = ''http://'' + doc.location.host + "/";
Zotero.debug(host);
var m = url.match(/https?:\/\/[^\/]*\/doi\/(abs|full)\/(.+)/);
Zotero.debug(m);
var dois = new Array();
if(detectWeb(doc, url) == "multiple") { //search
var doi;
var title;
var availableItems = new Array();
var xpath = ''//div[@class="articleBox"]'';
if (doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE,
null).iterateNext()) {
elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE,
null);
var elmt = elmts.iterateNext();
do {
title = doc.evaluate(''./div[@class="articleBoxMeta"]/h2'', elmt,
nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
doi = doc.evaluate(''./div[@class="articleBoxMeta"]/h2/a/@href'',
elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext
().textContent.replace("/doi/abs/","");
//Zotero.debug(doi);
if (doi.indexOf("prevSearch") != -1){
doi = doi.substring(0,doi.indexOf("?"))
}
//Zotero.debug(doi);
availableItems[doi] = title;
} while (elmt = elmts.iterateNext())
} else {
var xpath = ''//div[@id="contentCell"]/div[*/a]'';
var elmts = doc.evaluate(xpath, doc, nsResolver,
XPathResult.ANY_TYPE, null);
var elmt = elmts.iterateNext();
do {
title = Zotero.Utilities.trimInternal(doc.evaluate(''.//strong'',
elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext
().textContent);
id = doc.evaluate(''.//a[1]'', elmt, nsResolver,
XPathResult.ANY_TYPE, null).iterateNext().href.match(/abstract\/([\d]+)
\//)[1];
availableItems[id] = title;
} while (elmt = elmts.iterateNext())
}
var items = Zotero.selectItems(availableItems);
if(!items) {
return true;
}
for(var doi in items) {
dois.push(doi);
}
} else if (m){ //single article
dois.push(m[2]);
Zotero.debug(doi);
}

for each (doi in dois) {
var defaultUrl = host + ''doi/abs/'' + doi;
var absUrl = defaultUrl;
var citUrl = defaultUrl.replace("doi/abs/","action/showCitFormats?
doi=");
Zotero.debug(doi);
//get citation export page''s source code;
Zotero.Utilities.HTTP.doGet(citUrl, function(text){
Zotero.debug(citUrl);
//Zotero.debug(text);
//get the exported RIS file name;
var downloadFileName = text.match(/<input type="hidden"
name="downloadFileName" value="([^>]+)" \/>/)[1];
Zotero.debug(downloadFileName);
var baseurl = "http://pubs.acs.org/action/downloadCitation";
//post url,it could be found by Httplook
var post = "doi=" + doi + "&downloadFileName=" + downloadFileName +
"&include=abs&format=refman&direct=on&submit=Download+article+citation
+data";
//export RIS file
Zotero.Utilities.HTTP.doPost(baseurl, post,function(text){
Zotero.debug(text);
var translator = Zotero.loadTranslator("import");
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
translator.setString(text);
translator.setHandler("itemDone", function(obj, item) {
var pdfUrl = defaultUrl.replace("abs","pdf");
Zotero.debug(pdfUrl);
var fullTextUrl = defaultUrl.replace("abs","full");
Zotero.debug(fullTextUrl);
item.attachments.push(
{title:"ACS Full Text PDF",url:pdfUrl, mimeType:"application/
pdf"},
{url:fullTextUrl, title:"ACS Snapshot", mimeType:"text/html"}
);
item.complete();
});
translator.translate();
Zotero.done();
});
});
}
Zotero.wait();
}');

mcburton

unread,
Jan 2, 2009, 4:20:50 PM1/2/09
to muzy...@gmail.com, zoter...@googlegroups.com
Helvine,
Thanks for helping out with the translators! Can you send my the URLs
of the search results that you are having trouble with (both Wiley &
ACS). Also could you attach the code for detectWeb and doWeb in
javascript files instead of inline SQL updates? Email messes with the
linebreaks and I don't need them escaped for SQL, I just need the
javascript.

Thanks,
Matt Burton
Message has been deleted

Helvine

unread,
Jan 3, 2009, 4:29:33 AM1/3/09
to zotero-dev
Matt Burton,
Thanks for your kind reply. I modified the two translators for Wiley
and ACS according to the ScienceDirect ones. Both work well but the
Wiley translator can't get the PDF files.

ACS:

function detectWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;

if(doc.evaluate('//input[@id="articleListHeader_selectAllToc"]', doc,
nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
return "multiple";
} else if (doc.evaluate('//div[@id="articleHead"]', doc, nsResolver,
XPathResult.ANY_TYPE, null).iterateNext()) {
return "journalArticle";
}
return false;
}
function doWeb(doc, url){
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var host = 'http://' + doc.location.host + "/";
//Zotero.debug(host);
var m = url.match(/https?:\/\/[^\/]*\/doi\/(abs|full)\/(.+)/);
//Zotero.debug(m);
var dois = new Array();
if(detectWeb(doc, url) == "multiple") { //search
var doi;
var title;
var availableItems = new Array();
var xpath = '//div[@class="articleBox"]';
if (doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE,
null).iterateNext()) {
elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE,
null);
var elmt = elmts.iterateNext();
do {
title = doc.evaluate('./div[@class="articleBoxMeta"]/h2', elmt,
nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
doi = doc.evaluate('./div[@class="articleBoxMeta"]/h2/a/@href',
elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext
().textContent.replace("/doi/abs/","");
//Zotero.debug(doi);
if (doi.indexOf("prevSearch") != -1){
doi = doi.substring(0,doi.indexOf("?"))
}
doi = host + 'doi/abs/' + doi;
//Zotero.debug(doi);
availableItems[doi] = title;
} while (elmt = elmts.iterateNext())
}
var items = Zotero.selectItems(availableItems);
if(!items) {
return true;
}
for(var i in items) {
dois.push(i);
}
} else if (m){ //single article
dois.push(host + 'doi/abs/' + m[2]);
}

Zotero.Utilities.processDocuments(dois, function(newDoc) {
var defaultUrl = newDoc.location.href;
var doi = defaultUrl.replace("http://pubs.acs.org/doi/abs/","");
var absUrl = defaultUrl;
var citUrl = defaultUrl.replace("doi/abs/","action/showCitFormats?
doi=");
//get citation export page's source code;
Zotero.Utilities.HTTP.doGet(citUrl, function(text){
//Zotero.debug(citUrl);
//Zotero.debug(text);
//get the exported RIS file name;
var downloadFileName = text.match(/<input type="hidden"
name="downloadFileName" value="([^>]+)" \/>/)[1];
//Zotero.debug(downloadFileName);
var baseurl = "http://pubs.acs.org/action/downloadCitation";
//post url,it could be found by Httplook
var post = "doi=" + doi + "&downloadFileName=" + downloadFileName +
"&include=abs&format=refman&direct=on&submit=Download+article+citation
+data";
//export RIS file
Zotero.Utilities.HTTP.doPost(baseurl, post,function(text){
//Zotero.debug(text);
text = text.replace("N1 - doi:","M3 - doi:")
//Zotero.debug(text);
var translator = Zotero.loadTranslator("import");
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
translator.setString(text);
translator.setHandler("itemDone", function(obj, item) {
var pdfUrl = defaultUrl.replace("abs","pdf");
//Zotero.debug(pdfUrl);
var fullTextUrl = defaultUrl.replace("abs","full");
//Zotero.debug(fullTextUrl);
item.attachments.push(
{title:"ACS Full Text PDF",url:pdfUrl, mimeType:"application/
pdf"},
{url:fullTextUrl, title:"ACS Snapshot", mimeType:"text/html"}
);
item.complete();
});
translator.translate();
Zotero.done();
});
});
}, function() {Zotero.done;});
Zotero.wait();
}


Wiley:


function detectWeb(doc, url){
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;

var xpath = '//input[@name="ID"][@type="checkbox"]';
if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE,
null).iterateNext()) {
return "multiple";
}
if (url.match(/journal\/\d+\/(issue|home)$/)) {
return "multiple";
}
var m = url.match(/https?:\/\/[^\/]*\/(cgi-bin|journal)(\/(abstract|
summary))?\/[0-9]+\/abstract/);
if (m){
return "journalArticle";
}
}

function doWeb(doc, url){
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var host = 'http://' + doc.location.host + "/";
Zotero.debug(host);
var m = url.match(/https?:\/\/[^\/]*\/(journal|cgi-bin\/summary)\/
([0-9]+)\/(abstract)?/);
Zotero.debug(m);
var ids = new Array();
if(detectWeb(doc, url) == "multiple") { //search
var id;
var title;
var availableItems = new Array();
var xpath = '//tr[td/input[@name="ID"][@type="checkbox"]]';
if (doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE,
null).iterateNext()) {
elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE,
null);
var elmt = elmts.iterateNext();
do {
title = doc.evaluate('./td/strong', elmt, nsResolver,
XPathResult.ANY_TYPE, null).iterateNext().textContent;
id = doc.evaluate('./td/a[1]', elmt, nsResolver,
XPathResult.ANY_TYPE, null).iterateNext().href;
availableItems[id] = title;
} while (elmt = elmts.iterateNext())
} else {
var xpath = '//div[@id="contentCell"]/div[*/a]';
var elmts = doc.evaluate(xpath, doc, nsResolver,
XPathResult.ANY_TYPE, null);
var elmt = elmts.iterateNext();
do {
title = Zotero.Utilities.trimInternal(doc.evaluate('.//strong',
elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext
().textContent);
id = doc.evaluate('.//a[1]', elmt, nsResolver,
XPathResult.ANY_TYPE, null).iterateNext().href;
availableItems[id] = title;
} while (elmt = elmts.iterateNext())
}
var items = Zotero.selectItems(availableItems);
if(!items) {
return true;
}
for(var i in items) {
ids.push(i);
}

} else if (m){ //single article
ids.push(m[2]);
Zotero.debug(m[2]);
}
Zotero.Utilities.processDocuments(ids, function(newDoc) {
var id = newDoc.location.href;
id = id.replace("http://www3.interscience.wiley.com/
journal/","").replace("/abstract","")
}, function() {Zotero.done;});
Zotero.wait();
}

On Jan 3, 6:20 am, mcburton <mcbur...@gmail.com> wrote:
> Helvine,
> Thanks for helping out with the translators! Can you send my the URLs
> of the search results that you are having trouble with (both Wiley &
> ACS). Also could you attach the code for detectWeb and doWeb in
> javascript files instead of inline SQL updates? Email messes with the
> linebreaks and I don't need them escaped for SQL, I just need the
> javascript.
>
> Thanks,
> Matt Burton
>
> ...
>
> read more »

Dan Stillman

unread,
Jan 9, 2009, 5:31:42 PM1/9/09
to zoter...@googlegroups.com
On 1/2/09 3:39 AM, Helvine wrote:
> I wrote a new translator for ACS (http://pubs.acs.org/) based on wiley
> translator, but I can't fetch the multiple results. It always fetches
> the last result for several times. When I checked the wiley ones, I
> found It also has the same error with the multiple resulsts. I knew
> the problem exists in the part of "for each (id in ids) ",but I don't
> know how to correct it. Can anybody tell me the reason and the
> solutions?

Thanks for the updated translators. Unfortunately, while your new
versions may appear to work most of the time, they aren't quite correct.
The problem is a little hard to explain, but take a look at the current
Wiley translator, which I've updated:

https://www.zotero.org/trac/browser/extension/trunk/translators/Wiley%20InterScience.js

Wiley requires a two-phase save, with one request to set up a download
and the other to grab the file. Since web requests in translators are
asynchronous, you can't just loop through and nest XMLHTTPRequest calls
in order to save multiple items, since the outer request will return
immediately, causing the loop to iterate, and there's no guarantee (with
network delays, etc.) that the nested request will be called before
another setup request is made. This is why the original translator often
resulted in duplicate items. You used processDocuments() in an attempt
to solve it, but 1) that's adding additional unnecessary (and
slower-than-XHR) requests and 2) it's not really solving the problem. I
didn't test your translator, but it's probably effective only insofar as
it's slowing the whole process down. You were also, for example, calling
Zotero.done() many times, when it should only be called once. Other
existing translators probably also handle this incorrectly.

A proper solution is to chain Request 1 of each pair to the end of the
callback of the preceding Request 2 so that the pairs of requests will
always occur in order, and to call Zotero.done() only when no more
requests are left. The updated Wiley translator that does this has been
pushed to clients. Your copy of Zotero should auto-update within 24
hours, or you can update manually by clicking Update Now in the General
pane of the Zotero prefs.

Note that there is one glitch with the popup progress window in 1.0,
which may show up with this new translator, in which the popup will
close before all the requests have been made. We've fixed this in the
latest trunk (1.5) build, but it probably won't be fixed in 1.0, since
there are many translators that weren't calling Zotero.done() properly,
and there may be other issues as well.

- Dan

kosa

unread,
Jan 13, 2009, 5:39:57 AM1/13/09
to zotero-dev
Hi,

Sorry for repeating my question everywhere on the zotero forums but
is the issue of that automatic PDF attachment is not working for Wiley
closed? That is I am the only one for whom it does not work?

kosa

unread,
Jan 16, 2009, 3:40:31 AM1/16/09
to zotero-dev
Ok, I learned that it has been solved now.
Reply all
Reply to author
Forward
0 new messages