[hive-mrc] r633 committed - Added ability to set min occur during indexing/tagging

0 views
Skip to first unread message

hive...@googlecode.com

unread,
Jun 11, 2012, 3:22:11 PM6/11/12
to hive-c...@googlegroups.com
Revision: 633
Author: craig....@unc.edu
Date: Mon Jun 11 12:20:54 2012
Log: Added ability to set min occur during indexing/tagging
http://code.google.com/p/hive-mrc/source/detail?r=633

Modified:
/trunk/hive-core/src/edu/unc/ils/mrc/hive/api/SKOSTagger.java
/trunk/hive-core/src/edu/unc/ils/mrc/hive/api/impl/elmo/SKOSSchemeImpl.java
/trunk/hive-core/src/edu/unc/ils/mrc/hive/api/impl/elmo/SKOSTaggerImpl.java

=======================================
--- /trunk/hive-core/src/edu/unc/ils/mrc/hive/api/SKOSTagger.java Fri Oct
28 07:52:37 2011
+++ /trunk/hive-core/src/edu/unc/ils/mrc/hive/api/SKOSTagger.java Mon Jun
11 12:20:54 2012
@@ -40,10 +40,11 @@
* @param vocabularies List of vocabularies
* @param searcher Searcher implementation
* @param maxTerms Maximum number of terms
+ * @param minOccur Minimum number of times a phrase/term must occur
* @return
*/
public List<SKOSConcept> getTags(String path, List<String> vocabularies,
- SKOSSearcher searcher, int maxTerms);
+ SKOSSearcher searcher, int maxTerms, int minOccur);

/**
* Returns a list of SKOSConcept objects for the specified URL
@@ -57,10 +58,11 @@
* @param maxHops Maximum number of links to be traversed (hops)
* @param maxTerms Maximum number of terms
* @param diff Index only the differences between base page and
subsequent pages
+ * @param minOccur Minimum number of times a phrase/term must occur
* @return
*/
public List<SKOSConcept> getTags(URL url, List<String> vocabularies,
- SKOSSearcher searcher, int maxHops, int maxTerms, boolean diff);
+ SKOSSearcher searcher, int maxHops, int maxTerms, boolean diff, int
minOccur);

public void setConfig(Configuration config);

=======================================
---
/trunk/hive-core/src/edu/unc/ils/mrc/hive/api/impl/elmo/SKOSSchemeImpl.java
Fri May 11 12:01:43 2012
+++
/trunk/hive-core/src/edu/unc/ils/mrc/hive/api/impl/elmo/SKOSSchemeImpl.java
Mon Jun 11 12:20:54 2012
@@ -265,7 +265,7 @@
logger.warn("autocomplete property is empty");

// kea stemmer class
- this.keaStemmerClass =
properties.getProperty("keaStemmerClass", "kea.stemmers.PorterStemmer");
+ this.keaStemmerClass = properties.getProperty("keaStemmerClass", "l");
System.out.println("Using kea stemmer " + keaStemmerClass);
if (keaStemmerClass == null || keaStemmerClass.isEmpty())
logger.warn("keaStemmerClass property is empty, defaulting to
kea.stemers.PorterStemmer");
=======================================
---
/trunk/hive-core/src/edu/unc/ils/mrc/hive/api/impl/elmo/SKOSTaggerImpl.java
Fri May 11 12:01:43 2012
+++
/trunk/hive-core/src/edu/unc/ils/mrc/hive/api/impl/elmo/SKOSTaggerImpl.java
Mon Jun 11 12:20:54 2012
@@ -146,7 +146,7 @@
* @return
*/
public List<SKOSConcept> getTags(URL url, List<String> vocabulary,
- SKOSSearcher searcher, int maxHops, int numTerms, boolean diff)
+ SKOSSearcher searcher, int maxHops, int numTerms, boolean diff, int
minOccur)
{
try
{
@@ -157,7 +157,7 @@
tm.setProxy(proxyHost, proxyPort);
tm.setIgnorePrefixes(ignorePrefixes);
String text = tm.getPlainText(url, maxHops, diff);
- return getTagsInternal(text, vocabulary, searcher, numTerms, 2);
+ return getTagsInternal(text, vocabulary, searcher, numTerms, minOccur);
} catch (Exception e) {
logger.error(e);
}
@@ -175,12 +175,12 @@
* @return
*/
public List<SKOSConcept> getTags(String filePath, List<String>
vocabularies,
- SKOSSearcher searcher, int numTerms)
+ SKOSSearcher searcher, int numTerms, int minOccur)
{
TextManager tm = new TextManager();
String text = tm.getPlainText(filePath);

- return getTagsInternal(text, vocabularies, searcher, numTerms, 2);
+ return getTagsInternal(text, vocabularies, searcher, numTerms, minOccur);
}


Reply all
Reply to author
Forward
0 new messages