[solrmarc] r1803 committed - Changes to support translating records from Unicode to MARC8 with ones...

1 view
Skip to first unread message

solr...@googlecode.com

unread,
Jul 16, 2015, 5:38:15 PM7/16/15
to solrmarc...@googlegroups.com
Revision: 1803
Author: rh...@virginia.edu
Date: Thu Jul 16 21:37:58 2015 UTC
Log: Changes to support translating records from Unicode to MARC8 with
ones containing a vertical bar being written out using Numeric Character
References (NCR)
https://code.google.com/p/solrmarc/source/detail?r=1803

Modified:
/trunk/lib/solrmarc/src/org/solrmarc/marc/MarcPrinter.java

=======================================
--- /trunk/lib/solrmarc/src/org/solrmarc/marc/MarcPrinter.java Mon Jan 27
16:39:32 2014 UTC
+++ /trunk/lib/solrmarc/src/org/solrmarc/marc/MarcPrinter.java Thu Jul 16
21:37:58 2015 UTC
@@ -17,7 +17,9 @@
*/


+import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
+import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
@@ -122,12 +124,36 @@
}

}
+
+ private static boolean byteArrayContains(byte[] bytes, byte[] seq)
+ {
+ for ( int i = 0; i < bytes.length - seq.length; i++)
+ {
+ if (bytes[i] == seq[0])
+ {
+ for (int j = 0; j < seq.length; j++)
+ {
+ if (bytes[i+j] != seq[j])
+ {
+ break;
+ }
+ if (j == seq.length-1) return(true);
+ }
+ }
+ }
+ return(false);
+ }
+
@Override
public int handleAll()
{
// keep track of record count
int recordCounter = 0;
java.util.Set<String> contentMap = new
java.util.LinkedHashSet<String>();
+ UnicodeToAnsel conv = null;
+ UnicodeToAnsel convNCR = null;
+ ByteArrayOutputStream baos = null;
+
while(reader != null && reader.hasNext())
{
recordCounter++;
@@ -198,6 +224,31 @@
record.getLeader().setCharCodingScheme(' ');
writer.write(record);
}
+ else if (mode.equals("untranslateNCRifneeded"))
+ {
+ if (writer == null)
+ {
+ baos = new ByteArrayOutputStream();
+ conv = new UnicodeToAnsel();
+ convNCR = new UnicodeToAnsel(true);
+ writer = new MarcStreamWriter(baos, "ISO8859_1",
true);
+ writer.setConverter(conv);
+ }
+ baos.reset();
+ record.getLeader().setCharCodingScheme(' ');
+ writer.setConverter(conv);
+ writer.write(record);
+ baos.flush();
+ byte[] bytes = baos.toByteArray();
+ if (byteArrayContains(bytes, "|".getBytes()))
+ {
+ baos.reset();
+ writer.setConverter(convNCR);
+ writer.write(record);
+ }
+ baos.flush();
+ System.out.write(baos.toByteArray());
+ }
else if (mode.equals("index"))
{
String recStr = record.toString();
@@ -282,6 +333,12 @@
System.err.println("Error reading Marc Record: "+
me.getMessage());
logger.error("Error reading Marc Record: "+
me.getMessage());
return(1);
+ }
+ catch (IOException e)
+ {
+ System.err.println("Error writing to
ByteArrayOutputStream: "+ e.getMessage());
+ logger.error("Error writing to ByteArrayOutputStream: "+
e.getMessage());
+ return(1);
}
}
if (writer != null) { writer.close(); }
Reply all
Reply to author
Forward
0 new messages