[solrmarc] r1798 committed - updated GenericVuFind scripts to mimic new callnubmer and dewey method...

3 views
Skip to first unread message

solr...@googlecode.com

unread,
Jun 4, 2015, 4:55:47 PM6/4/15
to solrmarc...@googlegroups.com
Revision: 1798
Author: olso...@gmail.com
Date: Thu Jun 4 20:55:26 2015 UTC
Log: updated GenericVuFind scripts to mimic new callnubmer and dewey
methods in VuFindIndexer
https://code.google.com/p/solrmarc/source/detail?r=1798

Modified:
/trunk/examples/GenericVuFind/index_scripts/callnumber.bsh
/trunk/examples/GenericVuFind/index_scripts/dewey.bsh
/trunk/examples/GenericVuFind/marc_local.properties

=======================================
--- /trunk/examples/GenericVuFind/index_scripts/callnumber.bsh Fri Jan 30
18:36:15 2015 UTC
+++ /trunk/examples/GenericVuFind/index_scripts/callnumber.bsh Thu Jun 4
20:55:26 2015 UTC
@@ -6,125 +6,137 @@
* it will be applied during indexing.
*/
import org.marc4j.marc.Record;
+import org.solrmarc.callnum.LCCallNumber;
+import org.solrmarc.index.SolrIndexer;
+

// define the base level indexer so that its methods can be called from
the script.
// note that the SolrIndexer code will set this value before the script
methods are called.
org.solrmarc.index.SolrIndexer indexer = null;

-/**
-* Extract the call number label from a record
-* @param record
-* @return Call number label
-*/
-public String getFullCallNumber(Record record) {
+ /**
+ * Extract the full call number from a record, stripped of spaces
+ * @param record MARC record
+ * @return Call number label
+ * @deprecated Obsolete as of VuFind 2.4.
+ * This method exists only to support the VuFind call number
search, version <= 2.3.
+ * As of VuFind 2.4, the munging for call number search in
handled entirely in Solr.
+ */
+ public String getFullCallNumber(Record record) {

-return(getFullCallNumber(record, "099ab:090ab:050ab"));
-}
+ return(getFullCallNumber(record, "099ab:090ab:050ab"));
+ }

-/**
-* Extract the call number label from a record
-* @param record
-* @return Call number label
-*/
-public String getFullCallNumber(Record record, String fieldSpec) {
+ /**
+ * Extract the full call number from a record, stripped of spaces
+ * @param record MARC record
+ * @param fieldSpec taglist for call number fields
+ * @return Call number label
+ * @deprecated Obsolete as of VuFind 2.4.
+ * This method exists only to support the VuFind call number
search, version <= 2.3.
+ * As of VuFind 2.4, the munging for call number search in
handled entirely in Solr.
+ */
+ public String getFullCallNumber(Record record, String fieldSpec) {

-String val = indexer.getFirstFieldVal(record, fieldSpec);
+ String val = SolrIndexer.getFirstFieldVal(record, fieldSpec);

-if (val != null) {
-return val.toUpperCase().replaceAll(" ", "");
-} else {
-return val;
-}
-}
+ if (val != null) {
+ return val.toUpperCase().replaceAll(" ", "");
+ } else {
+ return val;
+ }
+ }

-/**
-* Extract the call number label from a record
-* @param record
-* @return Call number label
-*/
-public String getCallNumberLabel(Record record) {

-return getCallNumberLabel(record, "090a:050a");
-}
+ /**
+ * Extract the call number label from a record
+ * @param record MARC record
+ * @return Call number label
+ */
+ public String getCallNumberLabel(Record record) {

-/**
-* Extract the call number label from a record
-* @param record
-* @return Call number label
-*/
-public String getCallNumberLabel(Record record, String fieldSpec) {
+ return getCallNumberLabel(record, "090a:050a");
+ }

-String val = indexer.getFirstFieldVal(record, fieldSpec);
+ /**
+ * Extract the call number label from a record
+ * @param record MARC record
+ * @param fieldSpec taglist for call number fields
+ * @return Call number label
+ */
+ public String getCallNumberLabel(Record record, String fieldSpec) {

-if (val != null) {
-int dotPos = val.indexOf(".");
-if (dotPos > 0) {
- val = val.substring(0, dotPos);
-}
-return val.toUpperCase();
-} else {
-return val;
-}
-}
+ String val = SolrIndexer.getFirstFieldVal(record, fieldSpec);

-/**
-* Extract the subject component of the call number
-*
-* Can return null
-*
-* @param record
-* @return Call number label
-*/
-public String getCallNumberSubject(Record record) {
+ if (val != null) {
+ int dotPos = val.indexOf(".");
+ if (dotPos > 0) {
+ val = val.substring(0, dotPos);
+ }
+ return val.toUpperCase();
+ } else {
+ return val;
+ }
+ }
+
+ /**
+ * Extract the subject component of the call number
+ *
+ * Can return null
+ *
+ * @param record MARC record
+ * @return Call number subject letters
+ */
+ public String getCallNumberSubject(Record record) {

-return(getCallNumberSubject(record, "090a:050a"));
-}
+ return(getCallNumberSubject(record, "090a:050a"));
+ }

-/**
-* Extract the subject component of the call number
-*
-* Can return null
-*
-* @param record
-* @return Call number label
-*/
-public String getCallNumberSubject(Record record, String fieldSpec) {
+ /**
+ * Extract the subject component of the call number
+ *
+ * Can return null
+ *
+ * @param record current MARC record
+ * @return Call number subject letters
+ */
+ public String getCallNumberSubject(Record record, String fieldSpec) {

-String val = indexer.getFirstFieldVal(record, fieldSpec);
+ String val = SolrIndexer.getFirstFieldVal(record, fieldSpec);

-if (val != null) {
-String [] callNumberSubject = val.toUpperCase().split("[^A-Z]+");
-if (callNumberSubject.length > 0)
-{
- return callNumberSubject[0];
-}
-}
-return(null);
-}
+ if (val != null) {
+ String [] callNumberSubject =
val.toUpperCase().split("[^A-Z]+");
+ if (callNumberSubject.length > 0)
+ {
+ return callNumberSubject[0];
+ }
+ }
+ return(null);
+ }

-/**
-* Normalize a single LCCN
-* @param record
-* @param fieldSpec
-* @return String Normalized LCCN
-*/
-public String getFullCallNumberNormalized(Record record) {
+ /**
+ * Normalize a single LC call number
+ * @param record current MARC record
+ * @return String Normalized LCCN
+ */
+ public String getFullCallNumberNormalized(Record record) {

-return(getFullCallNumberNormalized(record, "099ab:090ab:050ab"));
-}
+ return(getFullCallNumberNormalized(record, "099ab:090ab:050ab"));
+ }

-/**
-* Normalize a single LCCN
-* @param record
-* @param fieldSpec
-* @return String Normalized LCCN
-*/
-public String getFullCallNumberNormalized(Record record, String fieldSpec)
{
+ /**
+ * Normalize a single LC call number
+ * @param record current MARC record
+ * @param fieldSpec which MARC fields / subfields need to be analyzed
+ * @return String Normalized LC call number
+ */
+ public String getFullCallNumberNormalized(Record record, String
fieldSpec) {

- if (fieldSpec != null) {
- String cn = indexer.getFirstFieldVal(record, fieldSpec);
- return (new LCCallNumber(cn)).getShelfKey();
+ // TODO: is the null fieldSpec still an issue?
+ if (fieldSpec != null) {
+ String cn = SolrIndexer.getFirstFieldVal(record, fieldSpec);
+ return (new LCCallNumber(cn)).getShelfKey();
+ }
+ // If we got this far, we couldn't find a valid value:
+ return null;
}
- // If we got this far, we couldn't find a valid value:
- return null;
-}
=======================================
--- /trunk/examples/GenericVuFind/index_scripts/dewey.bsh Fri Jan 23
19:54:42 2015 UTC
+++ /trunk/examples/GenericVuFind/index_scripts/dewey.bsh Thu Jun 4
20:55:26 2015 UTC
@@ -6,6 +6,8 @@
* it will be applied during indexing.
*/
import org.marc4j.marc.Record;
+import org.solrmarc.callnum.DeweyCallNumber;
+import org.solrmarc.index.SolrIndexer;
import org.solrmarc.tools.CallNumUtils;

/**
@@ -13,9 +15,9 @@
*
* Can return null
*
- * @param record
- * @param fieldSpec - which MARC fields / subfields need to be analyzed
- * @param precisionStr - a decimal number (represented in string format)
showing the
+ * @param record current MARC record
+ * @param fieldSpec which MARC fields / subfields need to be analyzed
+ * @param precisionStr a decimal number (represented in string format)
showing the
* desired precision of the returned number; i.e. 100 to round to nearest
hundred,
* 10 to round to nearest ten, 0.1 to round to nearest tenth, etc.
* @return Set containing requested numeric portions of Dewey decimal call
numbers
@@ -28,21 +30,18 @@
float precision = Float.parseFloat(precisionStr);

// Loop through the specified MARC fields:
- Set input = indexer.getFieldList(record, fieldSpec);
- Iterator iter = input.iterator();
- while (iter.hasNext()) {
- // Get the current string to work on:
- String current = iter.next();
-
+ Set input = SolrIndexer.getFieldList(record, fieldSpec);
+ for (String current: input) {
DeweyCallNumber callNum = new DeweyCallNumber(current);
if (callNum.isValid()) {
// Convert the numeric portion of the call number into a float:
float currentVal =
Float.parseFloat(callNum.getClassification());
-
+
// Round the call number value to the specified precision:
Float finalVal = new Float(Math.floor(currentVal / precision)
* precision);
-
+
// Convert the rounded value back to a string (with leading
zeros) and save it:
+ // TODO: Provide different conversion to remove CallNumUtils
dependency
result.add(CallNumUtils.normalizeFloat(finalVal.toString(), 3,
-1));
}
}
@@ -58,8 +57,8 @@
*
* Can return null
*
- * @param record
- * @param fieldSpec - which MARC fields / subfields need to be analyzed
+ * @param record current MARC record
+ * @param fieldSpec which MARC fields / subfields need to be analyzed
* @return Set containing normalized Dewey numbers extracted from
specified fields.
*/
public Set getDeweySearchable(Record record, String fieldSpec) {
@@ -67,7 +66,7 @@
Set result = new LinkedHashSet();

// Loop through the specified MARC fields:
- Set input = indexer.getFieldList(record, fieldSpec);
+ Set input = SolrIndexer.getFieldList(record, fieldSpec);
Iterator iter = input.iterator();
while (iter.hasNext()) {
// Get the current string to work on:
@@ -92,14 +91,14 @@
*
* Can return null
*
- * @param record
- * @param fieldSpec - which MARC fields / subfields need to be analyzed
+ * @param record current MARC record
+ * @param fieldSpec which MARC fields / subfields need to be analyzed
* @return String containing the first valid Dewey number encountered,
normalized
* for sorting purposes.
*/
public String getDeweySortable(Record record, String fieldSpec) {
// Loop through the specified MARC fields:
- Set input = indexer.getFieldList(record, fieldSpec);
+ Set input = SolrIndexer.getFieldList(record, fieldSpec);
Iterator iter = input.iterator();
while (iter.hasNext()) {
// Get the current string to work on:
@@ -115,14 +114,65 @@
// If we made it this far, we didn't find a valid sortable Dewey
number:
return null;
}
+
+/**
+ * Get sort key for first Dewey call number, identified by call type.
+ *
+ * <p>{@code fieldSpec} is of form {@literal 098abc:099ab}, does not
accept subfield ranges.
+ *
+ *
+ * @param record current MARC record
+ * @param fieldSpec which MARC fields / subfields need to be analyzed
+ * @param callTypeSf subfield containing call number type, single
character only
+ * @param callType literal call number code
+ * @return sort key for first identified Dewey call number
+ */
+public static String getDeweySortableByType(
+ Record record, String fieldSpec, String callTypeSf, String
callType) {
+ String sortKey = null;
+ for (String tag : fieldSpec.split(":")) {
+ // Check to ensure tag length is at least 3 characters
+ if (tag.length() < 3) {
+ //TODO: Should this go to a log? Better message for a bad tag
in a field spec?
+ System.err.println("Invalid tag specified: " + tag);
+ continue;
+ }
+ String dfTag = tag.substring(0, 3);
+ String sfSpec = null;
+ if (tag.length() > 3) {
+ sfSpec = tag.substring(3);
+ }
+
+ // do all fields for this tag
+ for (VariableField vf : record.getVariableFields(dfTag)) {
+ // Assume tag represents a DataField
+ DataField df = (DataField) vf;
+ boolean callTypeMatch = false;
+
+ // Assume call type subfield could repeat
+ for (Subfield typeSf : df.getSubfields(callTypeSf)) {
+ if (callTypeSf.indexOf(typeSf.getCode()) != -1 &&
typeSf.getData().equals(callType)) {
+ callTypeMatch = true;
+ }
+ }
+ // take the first call number coded as Dewey
+ if (callTypeMatch) {
+ sortKey = new
DeweyCallNumber(df.getSubfieldsAsString(sfSpec)).getShelfKey();
+ break;
+ }
+ } // end loop over variable fields
+ } // end loop over fieldSpec
+ return sortKey;
+}
+

/**
* Normalize Dewey numbers for AlphaBrowse sorting purposes (use all
numbers!)
*
* Can return null
*
- * @param record
- * @param fieldSpec - which MARC fields / subfields need to be analyzed
+ * @param record current MARC record
+ * @param fieldSpec which MARC fields / subfields need to be analyzed
* @return List containing normalized Dewey numbers extracted from
specified fields.
*/
public List getDeweySortables(Record record, String fieldSpec) {
@@ -130,7 +180,7 @@
List result = new LinkedList();

// Loop through the specified MARC fields:
- Set input = indexer.getFieldList(record, fieldSpec);
+ Set input = SolrIndexer.getFieldList(record, fieldSpec);
Iterator iter = input.iterator();
while (iter.hasNext()) {
// Get the current string to work on:
=======================================
--- /trunk/examples/GenericVuFind/marc_local.properties Thu Jan 10 20:44:55
2013 UTC
+++ /trunk/examples/GenericVuFind/marc_local.properties Thu Jun 4 20:55:26
2015 UTC
@@ -14,17 +14,17 @@
# (by default, the scripts have the same behavior as the built-in
functions,
# but the external scripts are easier to customize to your needs).
#format = script(format.bsh), getFormat, format_map.properties
-#callnumber = script(callnumber.bsh), getFullCallNumber(099ab:090ab:050ab)
-#callnumber-subject = script(callnumber.bsh),
getCallNumberSubject(090a:050a), callnumber_subject_map.properties
-#callnumber-subject-code = script(callnumber.bsh),
getCallNumberSubject(090a:050a)
-#callnumber-label = script(callnumber.bsh), getCallNumberLabel(090a:050a)
+callnumber = script(callnumber.bsh), getFullCallNumber(099ab:090ab:050ab)
+callnumber-subject = script(callnumber.bsh),
getCallNumberSubject(090a:050a), callnumber_subject_map.properties
+callnumber-subject-code = script(callnumber.bsh),
getCallNumberSubject(090a:050a)
+callnumber-label = script(callnumber.bsh), getCallNumberLabel(090a:050a)
#publishDate = script(getdate.bsh), getDate
#illustrated = script(illustrated.bsh), isIllustrated
-#dewey-hundreds = script(dewey.bsh), getDeweyNumber(082a:083a, 100),
ddc22_map.properties(hundreds)
-#dewey-tens = script(dewey.bsh), getDeweyNumber(082a:083a, 10),
ddc22_map.properties(tens)
-#dewey-ones = script(dewey.bsh), getDeweyNumber(082a:083a, 1),
ddc22_map.properties(ones)
-#dewey-full = script(dewey.bsh), getDeweySearchable(082a:083a)
-#dewey-sort = script(dewey.bsh), getDeweySortable(082a:083a)
+dewey-hundreds = script(dewey.bsh), getDeweyNumber(082a:083a, 100),
ddc22_map.properties(hundreds)
+dewey-tens = script(dewey.bsh), getDeweyNumber(082a:083a, 10),
ddc22_map.properties(tens)
+dewey-ones = script(dewey.bsh), getDeweyNumber(082a:083a, 1),
ddc22_map.properties(ones)
+dewey-full = script(dewey.bsh), getDeweySearchable(082a:083a)
+dewey-sort = script(dewey.bsh), getDeweySortable(082a:083a)

# Uncomment the following lines to track history of indexing times for RSS
feeds,
# OAI-PMH server and other updates. The parameter to these functions must
be the
Reply all
Reply to author
Forward
0 new messages