I made a JabRef importer (kludge)

oct4

unread,

Apr 2, 2008, 1:14:03 PM4/2/08

to zotero-dev

reposted from the Zotero forums (thanks for the link Tjowens)

====================================================================================

Hi all - I needed a JabRef importer that would keep my existing links
to pdfs, and some custom fields, so I kludged something together based
on Simon Kornblith's BibTeX importer. It is ugly but it works OK. Does
anyone want the code? I think I've seen a few requests here. It
doesn't automagically recognize the home directory, you have to edit
that in Scaffold. I have some ideas as to what I would do to finish it
up nicely given the time, but it will be a while until I get to it.
Anyways, let me know if you're interested.

BTW what is etiquette regarding re-using other people's code here? My
instincts as a scientist are to put it as dual-authored with Simon as
1st author as 90% of the code is his, but maybe he doesn't want to be
associated with my ugly code...

oct4

unread,

Apr 3, 2008, 1:05:17 PM4/3/08

to zotero-dev

OK, per request here is the code:

(NB changes are the "re" definition in "detectImport" and then in
"processField", one for pdf at the top & a few more at the bottom)
===========================================
REPLACE INTO translators VALUES ('9d4a2d7f-bb37-49cc-
ab1a-5398023edf36', '1.0.0b4.r1', '', '2008-04-03 13:00:27', '1',
'100', '3', 'JabRef', 'Simon Kornblith, modified by Mark Ungrin',
'bib',
'Zotero.configure("dataMode", "block");

function detectImport() {
var block = "";
var read;
// read 20 chars out of the file
while(read = Zotero.read(1)) {
if(read == "%") {
// read until next newline
block = "";
while(Zotero.read(1) != "\n") {}
} else if(read == "\n" && block) {
break;
} else if(" \n\r\t".indexOf(read) == -1) {
block += read;
}
}

var re = /JabRef/; // just looks for "JabRef" in the first line
if(re.test(block)) {
return true;
}
}',
'var fieldMap = {
address:"place",
chapter:"section",
edition:"edition",
// number:"issue",
type:"type",
series:"series",
title:"title",
volume:"volume",
copyright:"rights",
isbn:"ISBN",
issn:"ISSN",
location:"archiveLocation",
url:"url",
doi:"DOI",
"abstract":"abstractNote"
};

var inputFieldMap = {
booktitle :"publicationTitle",
school:"publisher",
publisher:"publisher"
};

var typeMap = {
book:"book",
bookSection:"inbook",
journalArticle:"article",
magazineArticle:"article",
newspaperArticle:"article",
thesis:"phdthesis",
letter:"misc",
manuscript:"unpublished",
interview:"misc",
film:"misc",
artwork:"misc",
webpage:"misc",
conferencePaper:"inproceedings"
};

// supplements outputTypeMap for importing
var inputTypeMap = {
conference:"inproceedings",
techreport:"report",
booklet:"book",
incollection:"bookSection",
manual:"book",
mastersthesis:"thesis",
misc:"book",
proceedings:"book"
};

/*
* three-letter month abbreviations. i assume these are the same ones
that the
* docs say are defined in some appendix of the LaTeX book. (i don''t
have the
* LaTeX book.)
*/
var months = ["jan", "feb", "mar", "apr", "may", "jun",
"jul", "aug", "sep", "oct", "nov", "dec"]

/*
* this is the character table for converting TeX to Unicode. sorry,
Czech
* speakers; you''ll have to add your own (or stop using BibTeX!)
*/
var accentedCharacters = {
// grave accents
192:"\\`A", 224:"\\`a",
200:"\\`E", 232:"\\`e",
204:"\\`I", 236:"\\`i",
210:"\\`O", 242:"\\`o",
217:"\\`U", 249:"\\`u",
// acute accents
193:"\\''A", 225:"\\''a",
201:"\\''E", 233:"\\''e",
205:"\\''I", 237:"\\''i",
211:"\\''O", 243:"\\''o",
218:"\\''U", 250:"\\''u",
// circumflexes
194:"\\^A", 226:"\\^a",
202:"\\^E", 234:"\\^e",
206:"\\^I", 238:"\\^i",
212:"\\^O", 244:"\\^o",
219:"\\^U", 251:"\\^u",
// tildes
195:"\\~A", 227:"\\~a",
213:"\\~O", 245:"\\~o",
209:"\\~N", 241:"\\~n",
// umlauts
196:''\\"A'', 228:''\\"a'',
203:''\\"E'', 235:''\\"e'',
207:''\\"I'', 239:''\\"i'',
214:''\\"O'', 246:''\\"o'',
220:''\\"U'', 252:''\\"u'',
// cidillas
191:"\\c{C}", 231:"\\c{c}",
// AE norwegian tings
198:"{\\AE}", 230:"{\\ae}",
// o norwegian things
216:"{\\o}", 248:"{\\O}",
// a norweigan things
197:"{\\AA}", 229:"{\\aa}"
};

function processField(item, field, value) {
if(fieldMap[field]) {
item[fieldMap[field]] = value;
} else if(inputFieldMap[field]) {
item[inputFieldMap[field]] = value;
} else if(field == "pdf") { // this bit handles linking to the
pdfs you have accumulated in JabRef - NB you need to specify the home
directory manually
pdfLocation= value;
attachmentDefaultDir="C:\\Documents and Settings\\Mark\\My
Documents\\organized PDFs\\"; // CHANGE THIS LINE TO POINT TO YOUR PDF
HOME DIRECTORY IN JabRef - IF YOU HAVE THE TIME YOU COULD CODE IT TO
FIND THE VALUE IN THE JabRef FILE (IT''S AT THE END)
pdfName= value;
if (pdfLocation.indexOf("C:/")==-1) pdfLocation=
attachmentDefaultDir+pdfLocation;
pdfLocation="file://"+pdfLocation.replace(/\\/g,"/");
item.attachments = [
{url:pdfLocation, title:pdfName, mimeType:"application/
pdf"}
];
} else if(field == "journal") {
if(item.publicationTitle) {
// we already had an fjournal
item.journalAbbreviation = value
} else {
item.publicationTitle = value;
}
} else if(field == "fjournal") {
if(item.publicationTitle) {
// move publicationTitle to abbreviation
item.journalAbbreviation = value;
}
item.publicationTitle = value;
} else if(field == "author" || field == "editor") {
// parse authors/editors
var names = value.split(" and ");
for each(var name in names) {
item.creators.push(Zotero.Utilities.cleanAuthor(name,
field,
(name.indexOf(",") !=
-1)));
}
} else if(field == "institution" || field == "organization") {
item.backupPublisher = value;
} else if(field == "number"){ // fix for techreport
if (item.itemType == "report") {
item.reportNumber = value;
} else {
item.issue = value;
}
} else if(field == "month") {
var monthIndex = months.indexOf(value.toLowerCase());
if(monthIndex != -1) {
value = Zotero.Utilities.formatDate({month:monthIndex});
} else {
value += " ";
}

if(item.date) {
if(value.indexOf(item.date) != -1) {
// value contains year and more
item.date = value;
} else {
item.date = value+item.date;
}
} else {
item.date = value;
}
} else if(field == "year") {
if(item.date) {
if(item.date.indexOf(value) == -1) {
// date does not already contain year
item.date += value;
}
} else {
item.date = value;
}
} else if(field == "pages") {
item.pages = value.replace(/--/g, "-");
} else if(field == "note" || field == "annote") {
item.extra += "\n"+value;
} else if(field == "howpublished") {
item.extra += "\nPublished: "+value;
} else if(field == "keywords") {
if(value.indexOf(";") == -1) {
// keywords/tags
item.tags = value.split(" ");
} else {
item.tags = value.split(/; ?/g);
}
} else if(field == "projects") { // a custom field I use in JabRef
if(value.indexOf(";") == -1) {
// keywords/tags
projectsList = value.split(" ");
} else {
projectsList = value.split(/; ?/g);
}
for(i=0;i<projectsList.length;i++) {
projectsList[i] = "@Project: "+projectsList[i];
item.tags.push(projectsList[i]);
}
} else if(field == "review") { // a custom field I use in JabRef
item.notes.push({note:value});
} else if(field == "pmid") { // links to PubMed where the PMID is
available
item.attachments.push({url:"http://www.ncbi.nlm.nih.gov/
pubmed/"+value, title:"PMID "+value, mimeType:"text/html"}); //
http://www.ncbi.nlm.nih.gov/pubmed/
} else if(field == "timestamp") {
item.notes.push({note:"@Timestamp: "+value});
} else {
item.notes.push({note:"@"+field+": "+value}); // unknown
fields are inserted as notes, preceded by the @ so you can separate
them out again in the future e.g. for export, or when Zotero
implements custom fields
}
}

function getFieldValue() {
// read whitespace
var read = Zotero.read(1);
while(" \n\r\t".indexOf(read) != -1) {
read = Zotero.read(1);
}

var value = "";
// now, we have the first character of the field
if("0123456789".indexOf(read) != -1) {
value += read;
// character is a number
while((read = Zotero.read(1)) && ("0123456789".indexOf(read) !
= -1)) {
value += read;
}
} else if(read == "{") {
// character is a brace
var openBraces = 1;
while(read = Zotero.read(1)) {
if(read == "{" && value[value.length-1] != "\\") {
openBraces++;
value += "{";
} else if(read == "}" && value[value.length-1] != "\\") {
openBraces--;
if(openBraces == 0) {
break;
} else {
value += "}";
}
} else {
value += read;
}
}
} else if(read == ''"'') {
var openBraces = 0;
while(read = Zotero.read(1)) {
if(read == "{" && value[value.length-1] != "\\") {
openBraces++;
value += "{";
} else if(read == "}" && value[value.length-1] != "\\") {
openBraces--;
value += "}";
} else if(read == ''"'' && openBraces == 0) {
break;
} else {
value += read;
}
}
}

if(value.length > 1) {
// replace accented characters (yucky slow)
for(var i in accentedCharacters) {
value = value.replace(accentedCharacters[i], i);
}

// kill braces
value = value.replace(/([^\\])[{}]+/g, "$1");
if(value[0] == "{") {
value = value.substr(1);
}

// chop off backslashes
value = value.replace(/([^\\])\\([#$%&~_^\\{}])/g, "$1$2");
value = value.replace(/([^\\])\\([#$%&~_^\\{}])/g, "$1$2");
if(value[0] == "\\" && "#$%&~_^\\{}".indexOf(value[1]) != -1)
{
value = value.substr(1);
}
if(value[value.length-1] == "\\" && "#$%&~_^\\
{}".indexOf(value[value.length-2]) != -1) {
value = value.substr(0, value.length-1);
}
value = value.replace(/\\\\/g, "\\");
value = value.replace(/\s+/g, " ");
}

return value;
}

function beginRecord(type, closeChar) {
type = Zotero.Utilities.cleanString(type.toLowerCase());
if(inputTypeMap[type]) {
var item = new Zotero.Item(inputTypeMap[type]);
} else {
for(var i in typeMap) {
if(typeMap[i] == type) {
var item = new Zotero.Item(i);
break;
}
}
if(!item) {
Zotero.debug("discarded item from BibTeX; type was
"+type);
}
}

item.extra = "";

var field = "";
while(read = Zotero.read(1)) {
if(read == "=") { // equals
begin a field
var value = getFieldValue();
if(item) {
processField(item, field.toLowerCase(), value);
}
field = "";
} else if(read == ",") { // commas
reset
field = "";
} else if(read == closeChar) {
if(item) {
if(item.extra) item.extra = item.extra.substr(1); //
chop \n
item.complete();
}
return;
} else if(" \n\r\t".indexOf(read) == -1) { // skip
whitespace
field += read;
}
}
}

function doImport() {
// make regular expressions out of values
var newArray = new Array();
for(var i in accentedCharacters) {
newArray[String.fromCharCode(i)] = new
RegExp(accentedCharacters[i].replace(/\\/g, "\\\\"), "g");
}
accentedCharacters = newArray;

var read = "", text = "", recordCloseElement = false;
var type = false;

while(read = Zotero.read(1)) {
if(read == "@") {
type = "";
} else if(type !== false) {
if(read == "{") { // possible open
character
beginRecord(type, "}");
type = false;
} else if(read == "(") { // possible open character
beginRecord(type, ")");
type = false;
} else {
type += read;
}
}
}
}

// some fields are, in fact, macros. If that is the case then we
should not put the
// data in the braces as it will cause the macros to not expand
properly
function writeMacroField(field, value) {
if (!value) {
return;
}

value = value.toString();
// replace naughty chars
value = value.replace(/([#$%&~_^\\{}])/g, "\\$1");

// replace accented characters
for (var i in accentedCharacters) {
value = value.replace(accentedCharacters[i], i);
}
// replace other accented characters
value = value.replace(/[\u0080-\uFFFF]/g, "?")

// write
Zotero.write(",\n\t"+field+" = "+value);
}

function writeField(field, value) {
if(!value) return;

value = value.toString();
// replace naughty chars
value = value.replace(/([#$%&~_^\\{}])/g, "\\$1");
// we assume people who use braces in their title probably did so
intentionally
if (field == "title") {
value = value.replace(/\\([{}])/g, "$1");
}
// replace accented characters
for (var i in accentedCharacters) {
value = value.replace(accentedCharacters[i], i);
}
// replace other accented characters
value = value.replace(/[\u0080-\uFFFF]/g, "?")

// write
Zotero.write(",\n\t"+field+" = {"+value+"}");
}

var numberRe = /^[0-9]+/;
function doExport() {
// switch keys and values of accented characters
var newArray = new Array();
for(var i in accentedCharacters) {
newArray["{"+accentedCharacters[i]+"}"] = new
RegExp(String.fromCharCode(i), "g");
}
accentedCharacters = newArray;

//Zotero.write("% BibTeX export generated by Zotero
"+Zotero.Utilities.getVersion());

var first = true;
var citekeys = new Object();
var item;
while(item = Zotero.nextItem()) {
// determine type
var type = typeMap[item.itemType];
if(!type) type = "misc";

// create a unique citation key
var basekey = "";
if(item.creators && item.creators[0] &&
item.creators[0].lastName) {
basekey =
item.creators[0].lastName.toLowerCase().replace(/ /g,"_").replace(/,/
g,"");
}

// include the item title as part of the citation key
if (item["title"]) {
// this is a list of words that should not appear as part
of the citation key
var bannedTitleKeys = {"a" : 1, "an" : 1, "does": 1,
"how": 1, "it''s": 1, "on" : 1, "some": 1, "the" : 1, "this" : 1,
"why" : 1 };
var titleElements = item["title"].split(" ");
var appendKey = "";
for (te in titleElements) {
if (!bannedTitleKeys[titleElements[te].toLowerCase()])
{
appendKey = "_" + titleElements[te].toLowerCase()
+ "_";
break;
}
}
basekey = basekey + appendKey;
}

if(item.date) {
var date = Zotero.Utilities.strToDate(item.date);
if(date.year && numberRe.test(date.year)) {
basekey += date.year;
}
}

// make sure we do not have any other funny characters
basekey = basekey.replace(/[\. ,'':\"!&]/g,"");
var citekey = basekey;
var i = 0;
while(citekeys[citekey]) {
i++;
citekey = basekey+"-"+i;
}
citekeys[citekey] = true;

// write citation key
Zotero.write((first ? "" : ",\n\n") + "@"+type+"{"+citekey);
first = false;

for(var field in fieldMap) {
if(item[fieldMap[field]]) {
writeField(field, item[fieldMap[field]]);
}
}

if(item.conferenceName) {
writeField("booktitle", item.conferenceName);
}

if(item.publicationTitle) {
if(item.itemType == "chapter") {
writeField("booktitle", item.publicationTitle);
} else {
writeField("journal", item.publicationTitle);
}
}

if(item.publisher) {
if(item.itemType == "thesis") {
writeField("school", item.publisher);
} else {
writeField("publisher", item.publisher);
}
}

if(item.creators && item.creators.length) {
// split creators into subcategories
var author = "";
var editor = "";
for each(var creator in item.creators) {
var creatorString = creator.lastName;

if (creator.firstName) {
creatorString = creator.firstName + " " +
creator.lastName;
}

if (creator.creatorType == "editor") {
editor += " and "+creatorString;
} else {
author += " and "+creatorString;
}
}

if(author) {
writeField("author", author.substr(5));
}
if(editor) {
writeField("editor", editor.substr(5));
}
}

if(item.date) {
// need to use non-localized abbreviation
if(date.month) {
writeMacroField("month", months[date.month]);
}
if(date.year) {
writeField("year", date.year);
}
}

if(item.extra) {
writeField("note", item.extra);
}

if(item.tags && item.tags.length) {
var tagString = "";
for each(var tag in item.tags) {
tagString += ","+tag.tag;
}
writeField("keywords", tagString.substr(1));
}
if(item.pages) {
writeField("pages", item.pages);
}

Zotero.write("\n}");
}
}');

ffrank

unread,

Apr 14, 2008, 12:43:18 PM4/14/08

to zotero-dev

Hey,

it looks like a nice script, but I have no idea what to do with it.
Can you write a small manual so I can get my jabref library imported
as well.

Help is much appreciated.

> pubmed/"+value, title:"PMID "+value, mimeType:"text/html"}); //http://www.ncbi.nlm.nih.gov/pubmed/

oct4

unread,

Apr 15, 2008, 8:46:23 PM4/15/08

to zotero-dev

Anyone have a way to import Zotero translators?

You can do it using the Scaffold plugin & cutting & pasting, but that
will be a multi-step process - can anyone tell me a better way?

If not I will explain how to do it using Scaffold, but there's gotta
be a better way...

(if there's no response to the above question & you don't hear from me
again, feel free to email me to remind me, work is busy these days...)

> ...
>
> read more »

Reply all

Reply to author

Forward