[solrmarc] r1782 committed - Added VuFindIndexer convenience methods for call numbers, includes tes...

1 view
Skip to first unread message

solr...@googlecode.com

unread,
Mar 5, 2015, 4:33:10 PM3/5/15
to solrmarc...@googlegroups.com
Revision: 1782
Author: olso...@gmail.com
Date: Thu Mar 5 21:32:51 2015 UTC
Log: Added VuFindIndexer convenience methods for call numbers,
includes tests.

https://code.google.com/p/solrmarc/source/detail?r=1782

Added:
/trunk/examples/GenericVuFind/test/src
/trunk/examples/GenericVuFind/test/src/org
/trunk/examples/GenericVuFind/test/src/org/solrmarc
/trunk/examples/GenericVuFind/test/src/org/solrmarc/index

/trunk/examples/GenericVuFind/test/src/org/solrmarc/index/VuFindIndexerTest.java
Modified:
/trunk/build.properties
/trunk/examples/GenericVuFind/src/org/solrmarc/index/VuFindIndexer.java
/trunk/lib/solrmarc/src/org/solrmarc/index/SolrIndexer.java

=======================================
--- /dev/null
+++
/trunk/examples/GenericVuFind/test/src/org/solrmarc/index/VuFindIndexerTest.java
Thu Mar 5 21:32:51 2015 UTC
@@ -0,0 +1,131 @@
+package org.solrmarc.index;
+
+import static org.junit.Assert.*;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.marc4j.marc.ControlField;
+import org.marc4j.marc.DataField;
+import org.marc4j.marc.Leader;
+import org.marc4j.marc.Record;
+import org.marc4j.marc.Subfield;
+import org.marc4j.marc.VariableField;
+import org.marc4j.marc.impl.DataFieldImpl;
+import org.marc4j.marc.impl.LeaderImpl;
+import org.marc4j.marc.impl.RecordImpl;
+import org.marc4j.marc.impl.SubfieldImpl;
+import org.marc4j.marc.impl.VariableFieldImpl;
+import org.solrmarc.callnum.DeweyCallNumber;
+import org.solrmarc.callnum.LCCallNumber;
+import org.solrmarc.index.VuFindIndexer;
+
+
+public class VuFindIndexerTest {
+
+ private VuFindIndexer indexer = null;
+
+ Leader genericLeader = null;
+ Record callNumRec = null;
+
+ @Before
+ public void initCallNumRec() {
+ Record callNumRec = new RecordImpl();
+ genericLeader = new LeaderImpl(" ");
+ callNumRec.setLeader(genericLeader);
+
+ // Rubbish: tests not yet using a pre-inited record
+ String callNumLC = "PS 1234.5 .G78";
+ DataField df090LC = new DataFieldImpl("090", ' ', ' ');
+ df090LC.addSubfield(new SubfieldImpl('a', callNumLC));
+ df090LC.addSubfield(new SubfieldImpl('t', "LC"));
+ callNumRec.addVariableField(df090LC);
+
+ String callNumDDC = "324.987 B34";
+ DataField df090DDC = new DataFieldImpl("090", ' ', ' ');
+ df090DDC.addSubfield(new SubfieldImpl('a', callNumDDC));
+ df090DDC.addSubfield(new SubfieldImpl('t', "DDC"));
+ callNumRec.addVariableField(df090DDC);
+
+ }
+
+ @Test
+ public void testGetCallNumberByType() {
+
+ // Init records
+ Record myCallNumRec = new RecordImpl();
+ myCallNumRec.setLeader(genericLeader);;
+
+ String callNumLC = "PS 1234.5 .G78";
+ DataField df090LC = new DataFieldImpl("090", ' ', ' ');
+ df090LC.addSubfield(new SubfieldImpl('a', callNumLC));
+ df090LC.addSubfield(new SubfieldImpl('t', "LC"));
+ myCallNumRec.addVariableField(df090LC);
+
+ String callNumDDC = "324.987 B34";
+ DataField df090DDC = new DataFieldImpl("090", ' ', ' ');
+ df090DDC.addSubfield(new SubfieldImpl('a', callNumDDC));
+ df090DDC.addSubfield(new SubfieldImpl('t', "DDC"));
+ myCallNumRec.addVariableField(df090DDC);
+
+
assertTrue(VuFindIndexer.getCallNumberByType(myCallNumRec, "090a", "t", "XXX").isEmpty());
+
+ Set<String> lcCallStrings = new HashSet<>();
+ lcCallStrings.add(callNumLC);
+ Set<String> lcCallFound =
VuFindIndexer.getCallNumberByType(myCallNumRec, "090a", "t", "LC");
+ assertEquals(1, lcCallFound.size());
+ assertTrue(lcCallFound.containsAll(lcCallStrings));
+
+ Set<String> deweyCallSet = new HashSet<>();
+ deweyCallSet.add(callNumDDC);
+
assertTrue(VuFindIndexer.getCallNumberByType(myCallNumRec, "090a", "t", "DDC").containsAll(deweyCallSet));
+ }
+
+ @Test
+ public void testGetDeweySortableByType() {
+
+ // Init records
+ Record myCallNumRec = new RecordImpl();
+ myCallNumRec.setLeader(genericLeader);;
+
+ String callNumLC = "PS 1234.5 .G78";
+ DataField df090LC = new DataFieldImpl("090", ' ', ' ');
+ df090LC.addSubfield(new SubfieldImpl('a', callNumLC));
+ df090LC.addSubfield(new SubfieldImpl('t', "LC"));
+ myCallNumRec.addVariableField(df090LC);
+
+ String callNumDDC = "324.987 B34";
+ DataField df090DDC = new DataFieldImpl("090", ' ', ' ');
+ df090DDC.addSubfield(new SubfieldImpl('a', callNumDDC));
+ df090DDC.addSubfield(new SubfieldImpl('t', "DDC"));
+ myCallNumRec.addVariableField(df090DDC);
+
+ assertEquals(new DeweyCallNumber(callNumDDC).getShelfKey(),
+
VuFindIndexer.getDeweySortableByType(myCallNumRec, "090a", "t", "DDC"));
+ }
+
+ @Test
+ public void testGetLCSortableByType() {
+
+ // Init records
+ Record myCallNumRec = new RecordImpl();
+ myCallNumRec.setLeader(genericLeader);;
+
+ String callNumLC = "PS 1234.5 .G78";
+ DataField df090LC = new DataFieldImpl("090", ' ', ' ');
+ df090LC.addSubfield(new SubfieldImpl('a', callNumLC));
+ df090LC.addSubfield(new SubfieldImpl('t', "LC"));
+ myCallNumRec.addVariableField(df090LC);
+
+ String callNumDDC = "324.987 B34";
+ DataField df090DDC = new DataFieldImpl("090", ' ', ' ');
+ df090DDC.addSubfield(new SubfieldImpl('a', callNumDDC));
+ df090DDC.addSubfield(new SubfieldImpl('t', "DDC"));
+ myCallNumRec.addVariableField(df090DDC);
+
+ assertEquals(new LCCallNumber(callNumLC).getShelfKey(),
+
VuFindIndexer.getLCSortableByType(myCallNumRec, "090a", "t", "LC"));
+ }
+}
=======================================
--- /trunk/build.properties Fri Jan 24 14:42:27 2014 UTC
+++ /trunk/build.properties Thu Mar 5 21:32:51 2015 UTC
@@ -1,5 +1,5 @@
# TOP LEVEL solrmarc properties file
-java.compat.version = 1.6
+java.compat.version = 1.7

name = solrmarc

=======================================
--- /trunk/examples/GenericVuFind/src/org/solrmarc/index/VuFindIndexer.java
Thu Feb 5 17:09:42 2015 UTC
+++ /trunk/examples/GenericVuFind/src/org/solrmarc/index/VuFindIndexer.java
Thu Mar 5 21:32:51 2015 UTC
@@ -26,6 +26,8 @@
import java.io.OutputStreamWriter;
import java.lang.StringBuilder;
import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
@@ -798,11 +800,97 @@
return result;
}

+ /**
+ * Get call numbers of a specific type.
+ *
+ * <p>{@code fieldSpec} is of form {@literal 098abc:099ab}, does not
accept subfield ranges.
+ *
+ *
+ * @param record current MARC record
+ * @param fieldSpec which MARC fields / subfields need to be analyzed
+ * @param callTypeSf subfield containing call number type, single
character only
+ * @param callType literal call number code
+ * @param result a collection to gather the call numbers
+ * @return collection of call numbers, same object as {@code result}
+ */
+ public static Collection<String> getCallNumberByTypeCollector(
+ Record record, String fieldSpec, String callTypeSf, String
callType, Collection<String> result) {
+ for (String tag : fieldSpec.split(":")) {
+ // Check to ensure tag length is at least 3 characters
+ if (tag.length() < 3) {
+ //TODO: Should this go to a log? Better message for a bad
tag in a field spec?
+ System.err.println("Invalid tag specified: " + tag);
+ continue;
+ }
+ String dfTag = tag.substring(0, 3);
+ String sfSpec = null;
+ if (tag.length() > 3) {
+ sfSpec = tag.substring(3);
+ }
+
+ // do all fields for this tag
+ for (VariableField vf : record.getVariableFields(dfTag)) {
+ // Assume tag represents a DataField
+ DataField df = (DataField) vf;
+ boolean callTypeMatch = false;
+
+ // Assume call type subfield could repeat
+ for (Subfield typeSf : df.getSubfields(callTypeSf)) {
+ if (callTypeSf.indexOf(typeSf.getCode()) != -1 &&
typeSf.getData().equals(callType)) {
+ callTypeMatch = true;
+ }
+ }
+ System.err.println("callTypeMatch after loop: " +
callTypeMatch);
+ if (callTypeMatch) {
+ result.add(df.getSubfieldsAsString(sfSpec));
+ }
+ } // end loop over variable fields
+ } // end loop over fieldSpec
+ return result;
+ }
+
+
+ /**
+ * Get call numbers of a specific type.
+ *
+ * <p>{@code fieldSpec} is of form {@literal 098abc:099ab}, does not
accept subfield ranges.
+ *
+ * @param record current MARC record
+ * @param fieldSpec which MARC fields / subfields need to be analyzed
+ * @param callTypeSf subfield containing call number type, single
character only
+ * @param callType literal call number code
+ * @return set of call numbers
+ */
+ public static Set<String> getCallNumberByType(Record record, String
fieldSpec, String callTypeSf, String callType) {
+ return (Set<String>) getCallNumberByTypeCollector(record,
fieldSpec, callTypeSf, callType,
+ new LinkedHashSet<String>());
+ }
+
+ /**
+ * Get call numbers of a specific type.
+ *
+ * <p>{@code fieldSpec} is of form {@literal 098abc:099ab}, does not
accept subfield ranges.
+ *
+ * @param record current MARC record
+ * @param fieldSpec which MARC fields / subfields need to be analyzed
+ * @param callTypeSf subfield containing call number type, single
character only
+ * @param callType literal call number code
+ * @return list of call numbers
+ */
+ public static List<String> getCallNumberByTypeAsList(Record record,
String fieldSpec, String callTypeSf, String callType) {
+ return (List<String>) getCallNumberByTypeCollector(record,
fieldSpec, callTypeSf, callType,
+ new ArrayList<String>());
+ }
+
/**
* Extract the full call number from a record, stripped of spaces
* @param record MARC record
* @return Call number label
+ * @deprecated Obsolete as of VuFind 2.4.
+ * This method exists only to support the VuFind call number
search, version <= 2.3.
+ * As of VuFind 2.4, the munging for call number search in
handled entirely in Solr.
*/
+ @Deprecated
public String getFullCallNumber(final Record record) {

return(getFullCallNumber(record, "099ab:090ab:050ab"));
@@ -813,7 +901,11 @@
* @param record MARC record
* @param fieldSpec taglist for call number fields
* @return Call number label
+ * @deprecated Obsolete as of VuFind 2.4.
+ * This method exists only to support the VuFind call number
search, version <= 2.3.
+ * As of VuFind 2.4, the munging for call number search in
handled entirely in Solr.
*/
+ @Deprecated
public String getFullCallNumber(final Record record, String fieldSpec)
{

String val = getFirstFieldVal(record, fieldSpec);
@@ -988,6 +1080,86 @@
// If we made it this far, we found no sign of illustrations:
return "Not Illustrated";
}
+
+
+ /**
+ * Normalize LC numbers for sorting purposes (use only the first valid
number!)
+ *
+ * Can return null
+ *
+ * @param record current MARC record
+ * @param fieldSpec which MARC fields / subfields need to be analyzed
+ * @return String containing the first valid Dewey number encountered,
normalized
+ * for sorting purposes.
+ */
+ public String getLCSortable(Record record, String fieldSpec) {
+ // Loop through the specified MARC fields:
+ Set<String> input = getFieldList(record, fieldSpec);
+ Iterator<String> iter = input.iterator();
+ while (iter.hasNext()) {
+ // Get the current string to work on:
+ String current = iter.next();
+
+ // If this is a valid LC number, return the sortable shelf key:
+ LCCallNumber callNum = new LCCallNumber(current);
+ if (callNum.isValid()) {
+ return callNum.getShelfKey();
+ }
+ }
+
+ // If we made it this far, we didn't find a valid sortable Dewey
number:
+ return null;
+ }
+
+ /**
+ * Get sort key for first LC call number, identified by call type.
+ *
+ * <p>{@code fieldSpec} is of form {@literal 098abc:099ab}, does not
accept subfield ranges.
+ *
+ *
+ * @param record current MARC record
+ * @param fieldSpec which MARC fields / subfields need to be analyzed
+ * @param callTypeSf subfield containing call number type, single
character only
+ * @param callType literal call number code
+ * @return sort key for first identified Dewey call number
+ */
+ public static String getLCSortableByType(
+ Record record, String fieldSpec, String callTypeSf, String
callType) {
+ String sortKey = null;
+ for (String tag : fieldSpec.split(":")) {
+ // Check to ensure tag length is at least 3 characters
+ if (tag.length() < 3) {
+ //TODO: Should this go to a log? Better message for a bad
tag in a field spec?
+ System.err.println("Invalid tag specified: " + tag);
+ continue;
+ }
+ String dfTag = tag.substring(0, 3);
+ String sfSpec = null;
+ if (tag.length() > 3) {
+ sfSpec = tag.substring(3);
+ }
+
+ // do all fields for this tag
+ for (VariableField vf : record.getVariableFields(dfTag)) {
+ // Assume tag represents a DataField
+ DataField df = (DataField) vf;
+ boolean callTypeMatch = false;
+
+ // Assume call type subfield could repeat
+ for (Subfield typeSf : df.getSubfields(callTypeSf)) {
+ if (callTypeSf.indexOf(typeSf.getCode()) != -1 &&
typeSf.getData().equals(callType)) {
+ callTypeMatch = true;
+ }
+ }
+ // take the first call number coded as Dewey
+ if (callTypeMatch) {
+ sortKey = new
LCCallNumber(df.getSubfieldsAsString(sfSpec)).getShelfKey();
+ break;
+ }
+ } // end loop over variable fields
+ } // end loop over fieldSpec
+ return sortKey;
+ }

/**
* Extract a numeric portion of the Dewey decimal call number
@@ -1094,6 +1266,57 @@
return null;
}

+ /**
+ * Get sort key for first Dewey call number, identified by call type.
+ *
+ * <p>{@code fieldSpec} is of form {@literal 098abc:099ab}, does not
accept subfield ranges.
+ *
+ *
+ * @param record current MARC record
+ * @param fieldSpec which MARC fields / subfields need to be analyzed
+ * @param callTypeSf subfield containing call number type, single
character only
+ * @param callType literal call number code
+ * @return sort key for first identified Dewey call number
+ */
+ public static String getDeweySortableByType(
+ Record record, String fieldSpec, String callTypeSf, String
callType) {
+ String sortKey = null;
+ for (String tag : fieldSpec.split(":")) {
+ // Check to ensure tag length is at least 3 characters
+ if (tag.length() < 3) {
+ //TODO: Should this go to a log? Better message for a bad
tag in a field spec?
+ System.err.println("Invalid tag specified: " + tag);
+ continue;
+ }
+ String dfTag = tag.substring(0, 3);
+ String sfSpec = null;
+ if (tag.length() > 3) {
+ sfSpec = tag.substring(3);
+ }
+
+ // do all fields for this tag
+ for (VariableField vf : record.getVariableFields(dfTag)) {
+ // Assume tag represents a DataField
+ DataField df = (DataField) vf;
+ boolean callTypeMatch = false;
+
+ // Assume call type subfield could repeat
+ for (Subfield typeSf : df.getSubfields(callTypeSf)) {
+ if (callTypeSf.indexOf(typeSf.getCode()) != -1 &&
typeSf.getData().equals(callType)) {
+ callTypeMatch = true;
+ }
+ }
+ // take the first call number coded as Dewey
+ if (callTypeMatch) {
+ sortKey = new
DeweyCallNumber(df.getSubfieldsAsString(sfSpec)).getShelfKey();
+ break;
+ }
+ } // end loop over variable fields
+ } // end loop over fieldSpec
+ return sortKey;
+ }
+
+
/**
* Normalize Dewey numbers for AlphaBrowse sorting purposes (use all
numbers!)
*
=======================================
--- /trunk/lib/solrmarc/src/org/solrmarc/index/SolrIndexer.java Wed Jan 7
19:00:06 2015 UTC
+++ /trunk/lib/solrmarc/src/org/solrmarc/index/SolrIndexer.java Thu Mar 5
21:32:51 2015 UTC
@@ -1514,7 +1514,7 @@
* values, a <code>List</code> will allow values to repeat.
*/
public static void getFieldListCollector(Record record,
- String tagStr,
+ String tagStr,
Collection<String> collector)
{
String[] tags = tagStr.split(":");
@@ -2525,8 +2525,8 @@
* values, a <code>List</code> will allow values to repeat.
*/
public static void getAllSubfieldsCollector(final Record record,
- String fieldSpec, String separator,
- Collection<String> collector)
+ String fieldSpec, String
separator,
+ Collection<String>
collector)
{
String[] fldTags = fieldSpec.split(":");
for (int i = 0; i < fldTags.length; i++)
Reply all
Reply to author
Forward
0 new messages