Hi all,
I posted this on the forum but no reply.
The translator has issues with grabbing articles that are different from the main site, i.e
http://in.reuters.com. That can be change by changing target to "http://(.*\.)?reuters.com/",
The
second issue has to do with bylines. It breaks when it meets a
non-standard byline. Reuters tend to have a fair amount of non-standard
bylines. The else statement in the translator doesn't seem to grab
those non-standard. I'm not sure if it's even possible to capture all
the varieties. I would recommend removing the else statement in byline
match.
Thanks
Patrick
I pasted the 2.0 code translator
code that I've been using. I tend to grab about a dozen Reuters
articles daily and so far have not experience any problems with the
code below
{
"translatorID":"83979786-44af-494a-9ddb-46654e0486ef",
"translatorType":4,
"label":"Reuters",
"creator":"Michael Berkowitz",
"target":"http://(.*\.)?reuters.com/", "minVersion":"1.0.0b4.r5",
"maxVersion":"",
"priority":100,
"inRepository":true,
"lastUpdated":"2008-07-07 14:50:00"
}
function detectWeb(doc, url) {
if (url.match(/article/)) {
return "newspaperArticle";
}
}
function doWeb(doc, url) {
var item = new Zotero.Item("newspaperArticle");
item.title =
Zotero.Utilities.trimInternal(doc.evaluate('//div[@class="article
primaryContent"]/h1', doc, null, XPathResult.ANY_TYPE,
null).iterateNext().textContent);
item.date =
doc.evaluate('//div[@class="timestampHeader"]', doc, null,
XPathResult.ANY_TYPE,
null).iterateNext().textContent.match(/^.*\d{4}/)[0];
var byline = doc.evaluate('//div[@id="resizeableText"]/p[1]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
if (byline.match(/^By/)) {
var authors = byline.substr(3).split(',');
for each (var aut in authors) {
item.creators.push(Zotero.Utilities.cleanAuthor(aut, "author"));
}
item.abstractNote = doc.evaluate('//div[@id="resizeableText"]/p[2]',
doc, null, XPathResult.ANY_TYPE,
null).iterateNext().textContent.match(/\-\s+(.*)$/)[1];
}
item.url = url;
item.complete();
}