[lucene-gosen] r262 committed - fix issue #57 change libraries ver 4.10.0 ...

11 views
Skip to first unread message

lucene...@googlecode.com

unread,
Sep 3, 2014, 1:07:40 AM9/3/14
to lucene...@googlegroups.com
Revision: 262
Author: johtani
Date: Wed Sep 3 05:06:19 2014 UTC
Log: fix issue #57 change libraries ver 4.10.0
related LUCENE-5859 changes

https://code.google.com/p/lucene-gosen/source/detail?r=262

Added:
/branches/4x/lib/lucene-analyzers-common-4.10.0.jar
/branches/4x/lib/lucene-codecs-4.10.0.jar
/branches/4x/lib/lucene-core-4.10.0.jar
/branches/4x/lib/lucene-test-framework-4.10.0.jar
/branches/4x/lib/randomizedtesting-runner-2.1.6.jar
/branches/4x/lib/solr-core-4.10.0.jar
/branches/4x/lib/solr-solrj-4.10.0.jar
Deleted:
/branches/4x/lib/lucene-analyzers-common-4.9.0.jar
/branches/4x/lib/lucene-codecs-4.9.0.jar
/branches/4x/lib/lucene-core-4.9.0.jar
/branches/4x/lib/lucene-test-framework-4.9.0.jar
/branches/4x/lib/randomizedtesting-runner-2.1.3.jar
/branches/4x/lib/solr-core-4.9.0.jar
/branches/4x/lib/solr-solrj-4.9.0.jar
Modified:
/branches/4x/CHANGES.txt
/branches/4x/build.xml
/branches/4x/src/java/org/apache/lucene/analysis/gosen/GosenAnalyzer.java

/branches/4x/src/java/org/apache/lucene/analysis/gosen/GosenPartOfSpeechKeepFilter.java

/branches/4x/src/java/org/apache/lucene/analysis/gosen/GosenPartOfSpeechStopFilter.java

/branches/4x/src/java/org/apache/lucene/analysis/gosen/GosenPunctuationFilter.java

/branches/4x/src/java/org/apache/solr/analysis/GosenPartOfSpeechKeepFilterFactory.java

/branches/4x/src/java/org/apache/solr/analysis/GosenPartOfSpeechStopFilterFactory.java

/branches/4x/src/java/org/apache/solr/analysis/GosenPunctuationFilterFactory.java

/branches/4x/src/test/org/apache/lucene/analysis/gosen/TestGosenAnalyzer.java

=======================================
--- /dev/null
+++ /branches/4x/lib/lucene-analyzers-common-4.10.0.jar Wed Sep 3 05:06:19
2014 UTC
File is too large to display a diff.
=======================================
--- /dev/null
+++ /branches/4x/lib/lucene-codecs-4.10.0.jar Wed Sep 3 05:06:19 2014 UTC
Binary file, no diff available.
=======================================
--- /dev/null
+++ /branches/4x/lib/lucene-core-4.10.0.jar Wed Sep 3 05:06:19 2014 UTC
File is too large to display a diff.
=======================================
--- /dev/null
+++ /branches/4x/lib/lucene-test-framework-4.10.0.jar Wed Sep 3 05:06:19
2014 UTC
File is too large to display a diff.
=======================================
--- /dev/null
+++ /branches/4x/lib/randomizedtesting-runner-2.1.6.jar Wed Sep 3 05:06:19
2014 UTC
Binary file, no diff available.
=======================================
--- /dev/null
+++ /branches/4x/lib/solr-core-4.10.0.jar Wed Sep 3 05:06:19 2014 UTC
File is too large to display a diff.
=======================================
--- /dev/null
+++ /branches/4x/lib/solr-solrj-4.10.0.jar Wed Sep 3 05:06:19 2014 UTC
Binary file, no diff available.
=======================================
--- /branches/4x/lib/lucene-analyzers-common-4.9.0.jar Thu Jun 26 12:48:50
2014 UTC
+++ /dev/null
File is too large to display a diff.
=======================================
--- /branches/4x/lib/lucene-codecs-4.9.0.jar Thu Jun 26 12:48:50 2014 UTC
+++ /dev/null
Binary file, no diff available.
=======================================
--- /branches/4x/lib/lucene-core-4.9.0.jar Thu Jun 26 12:48:50 2014 UTC
+++ /dev/null
File is too large to display a diff.
=======================================
--- /branches/4x/lib/lucene-test-framework-4.9.0.jar Thu Jun 26 12:48:50
2014 UTC
+++ /dev/null
File is too large to display a diff.
=======================================
--- /branches/4x/lib/randomizedtesting-runner-2.1.3.jar Tue May 20 14:02:00
2014 UTC
+++ /dev/null
Binary file, no diff available.
=======================================
--- /branches/4x/lib/solr-core-4.9.0.jar Thu Jun 26 12:48:50 2014 UTC
+++ /dev/null
File is too large to display a diff.
=======================================
--- /branches/4x/lib/solr-solrj-4.9.0.jar Thu Jun 26 12:48:50 2014 UTC
+++ /dev/null
Binary file, no diff available.
=======================================
--- /branches/4x/CHANGES.txt Fri Jun 27 10:58:10 2014 UTC
+++ /branches/4x/CHANGES.txt Wed Sep 3 05:06:19 2014 UTC
@@ -4,6 +4,9 @@

Important Notice

+* http://code.google.com/p/lucene-gosen/issues/detail?id=55
+ Change Lucene/Solr 4.10.0 library
+
New Features & Improvements

Bug Fixes
=======================================
--- /branches/4x/build.xml Fri Jun 27 10:58:10 2014 UTC
+++ /branches/4x/build.xml Wed Sep 3 05:06:19 2014 UTC
@@ -6,7 +6,7 @@
<!-- to change the dictionary type to naist-chasen, ant
-Ddictype=naist-chasen -->
<property name="dictype" value="ipadic"/>

- <property name="version" value="4.9.0.1"/>
+ <property name="version" value="4.10.0"/>

<property name="src.dir" location="src/java"/>
<property name="cdic.dir" location="src/compiled-dictionaries"/>
=======================================
---
/branches/4x/src/java/org/apache/lucene/analysis/gosen/GosenAnalyzer.java
Wed Jul 24 01:39:01 2013 UTC
+++
/branches/4x/src/java/org/apache/lucene/analysis/gosen/GosenAnalyzer.java
Wed Sep 3 05:06:19 2014 UTC
@@ -28,7 +28,6 @@
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.util.Version;

/**
* Analyzer for Japanese which uses "Sen" morphological analyzer.
@@ -74,30 +73,29 @@
/**
* Create a GosenAnalyzer with the default stopwords and stoptags and no
stemExclusionSet
*/
- public GosenAnalyzer(Version version) {
- this(version, DefaultSetHolder.DEFAULT_STOP_SET,
DefaultSetHolder.DEFAULT_STOP_TAGS, CharArraySet.EMPTY_SET, null);
+ public GosenAnalyzer() {
+ this(DefaultSetHolder.DEFAULT_STOP_SET,
DefaultSetHolder.DEFAULT_STOP_TAGS, CharArraySet.EMPTY_SET, null);
}

/**
* Create a GosenAnalyzer with the default stopwords and stoptags and no
stemExclusionSet<br>
* and argument of dictionaryDir.
*/
- public GosenAnalyzer(Version version, String dictionaryDir) {
- this(version, DefaultSetHolder.DEFAULT_STOP_SET,
DefaultSetHolder.DEFAULT_STOP_TAGS, CharArraySet.EMPTY_SET, dictionaryDir);
+ public GosenAnalyzer(String dictionaryDir) {
+ this(DefaultSetHolder.DEFAULT_STOP_SET,
DefaultSetHolder.DEFAULT_STOP_TAGS, CharArraySet.EMPTY_SET, dictionaryDir);
}

/**
* Create a GosenAnalyzer with the specified stopwords, stoptags, and
stemExclusionSet
*
- * @param version lucene compatibility version
* @param stopwords a stopword set: words matching these (Surf
* @param stoptags a stoptags set: words containing these parts of
speech will be removed from the stream.
* @param stemExclusionSet a stemming exclusion set: these words are
ignored by
* {@link GosenBasicFormFilter} and {@link
GosenKatakanaStemFilter}
* @param dictionaryDir a directory of dictionary
*/
- public GosenAnalyzer(Version version, CharArraySet stopwords,
Set<String> stoptags, CharArraySet stemExclusionSet, String dictionaryDir) {
- super(version, stopwords);
+ public GosenAnalyzer(CharArraySet stopwords, Set<String> stoptags,
CharArraySet stemExclusionSet, String dictionaryDir) {
+ super(stopwords);
this.stoptags = stoptags;
this.stemExclusionSet = stemExclusionSet;
this.dictionaryDir = dictionaryDir;
@@ -120,14 +118,14 @@
protected TokenStreamComponents createComponents(String field, Reader
reader) {
Tokenizer tokenizer = new GosenTokenizer(reader, null, dictionaryDir);
TokenStream stream = new GosenWidthFilter(tokenizer);
- stream = new GosenPunctuationFilter(matchVersion, stream);
- stream = new GosenPartOfSpeechStopFilter(matchVersion, stream,
stoptags);
- stream = new StopFilter(matchVersion, stream, stopwords);
+ stream = new GosenPunctuationFilter(stream);
+ stream = new GosenPartOfSpeechStopFilter(stream, stoptags);
+ stream = new StopFilter(stream, stopwords);
if (!stemExclusionSet.isEmpty())
stream = new SetKeywordMarkerFilter(stream, stemExclusionSet);
stream = new GosenBasicFormFilter(stream);
stream = new GosenKatakanaStemFilter(stream);
- stream = new LowerCaseFilter(matchVersion, stream);
+ stream = new LowerCaseFilter(stream);
return new TokenStreamComponents(tokenizer, stream);
}
}
=======================================
---
/branches/4x/src/java/org/apache/lucene/analysis/gosen/GosenPartOfSpeechKeepFilter.java
Wed Jul 24 01:39:01 2013 UTC
+++
/branches/4x/src/java/org/apache/lucene/analysis/gosen/GosenPartOfSpeechKeepFilter.java
Wed Sep 3 05:06:19 2014 UTC
@@ -40,8 +40,8 @@
this.keepTags = keepTags;
}

- public GosenPartOfSpeechKeepFilter(Version version, TokenStream input,
Set<String> keepTags) {
- super(version, input);
+ public GosenPartOfSpeechKeepFilter(TokenStream input, Set<String>
keepTags) {
+ super(input);
this.keepTags = keepTags;
}

=======================================
---
/branches/4x/src/java/org/apache/lucene/analysis/gosen/GosenPartOfSpeechStopFilter.java
Wed Jul 24 01:39:01 2013 UTC
+++
/branches/4x/src/java/org/apache/lucene/analysis/gosen/GosenPartOfSpeechStopFilter.java
Wed Sep 3 05:06:19 2014 UTC
@@ -38,8 +38,8 @@
this.stopTags = stopTags;
}

- public GosenPartOfSpeechStopFilter(Version version, TokenStream input,
Set<String> stopTags) {
- super(version, input);
+ public GosenPartOfSpeechStopFilter(TokenStream input, Set<String>
stopTags) {
+ super(input);
this.stopTags = stopTags;
}

=======================================
---
/branches/4x/src/java/org/apache/lucene/analysis/gosen/GosenPunctuationFilter.java
Wed Jul 24 01:39:01 2013 UTC
+++
/branches/4x/src/java/org/apache/lucene/analysis/gosen/GosenPunctuationFilter.java
Wed Sep 3 05:06:19 2014 UTC
@@ -35,8 +35,8 @@
super(version, enablePositionIncrements, input);
}

- public GosenPunctuationFilter(Version version, TokenStream input) {
- super(version, input);
+ public GosenPunctuationFilter(TokenStream input) {
+ super(input);
}

@Override
=======================================
---
/branches/4x/src/java/org/apache/solr/analysis/GosenPartOfSpeechKeepFilterFactory.java
Wed Jul 24 01:39:01 2013 UTC
+++
/branches/4x/src/java/org/apache/solr/analysis/GosenPartOfSpeechKeepFilterFactory.java
Wed Sep 3 05:06:19 2014 UTC
@@ -27,6 +27,7 @@
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.gosen.GosenPartOfSpeechKeepFilter;
+import org.apache.lucene.util.Version;

/**
* Factory for {@link GosenPartOfSpeechKeepFilter}.
@@ -50,6 +51,10 @@
keepTagFiles = require(args, "tags");
enablePositionIncrements =
getBoolean(args, "enablePositionIncrements", true);

+ if (enablePositionIncrements == false &&
+ (luceneMatchVersion == null ||
luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0))) {
+ throw new IllegalArgumentException("enablePositionIncrements=false
is not supported anymore as of Lucene 4.4");
+ }
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
=======================================
---
/branches/4x/src/java/org/apache/solr/analysis/GosenPartOfSpeechStopFilterFactory.java
Wed Jul 24 01:39:01 2013 UTC
+++
/branches/4x/src/java/org/apache/solr/analysis/GosenPartOfSpeechStopFilterFactory.java
Wed Sep 3 05:06:19 2014 UTC
@@ -27,6 +27,7 @@
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.util.Version;

/**
* Factory for {@link GosenPartOfSpeechStopFilter}.
@@ -50,6 +51,10 @@
stopTagFiles = require(args, "tags");
enablePositionIncrements =
getBoolean(args, "enablePositionIncrements", true);

+ if (enablePositionIncrements == false &&
+ (luceneMatchVersion == null ||
luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0))) {
+ throw new IllegalArgumentException("enablePositionIncrements=false
is not supported anymore as of Lucene 4.4");
+ }
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
=======================================
---
/branches/4x/src/java/org/apache/solr/analysis/GosenPunctuationFilterFactory.java
Wed Jul 24 01:39:01 2013 UTC
+++
/branches/4x/src/java/org/apache/solr/analysis/GosenPunctuationFilterFactory.java
Wed Sep 3 05:06:19 2014 UTC
@@ -21,6 +21,7 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.gosen.GosenPunctuationFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.util.Version;

/**
* Factory for {@link GosenPunctuationFilter}.
@@ -38,6 +39,10 @@
public GosenPunctuationFilterFactory(Map<String,String> args) {
super(args);
enablePositionIncrements =
getBoolean(args, "enablePositionIncrements", true);
+ if (enablePositionIncrements == false &&
+ (luceneMatchVersion == null ||
luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0))) {
+ throw new IllegalArgumentException("enablePositionIncrements=false
is not supported anymore as of Lucene 4.4");
+ }
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
=======================================
---
/branches/4x/src/test/org/apache/lucene/analysis/gosen/TestGosenAnalyzer.java
Tue Jul 3 16:54:22 2012 UTC
+++
/branches/4x/src/test/org/apache/lucene/analysis/gosen/TestGosenAnalyzer.java
Wed Sep 3 05:06:19 2014 UTC
@@ -31,7 +31,7 @@
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
- new GosenAnalyzer(TEST_VERSION_CURRENT, SenTestUtil.IPADIC_DIR);
+ new GosenAnalyzer(SenTestUtil.IPADIC_DIR);
}

/**
@@ -40,7 +40,7 @@
* and offsets are correct.
*/
public void testBasics() throws IOException {
- assertAnalyzesTo(new GosenAnalyzer(TEST_VERSION_CURRENT,
SenTestUtil.IPADIC_DIR), "多くの学生が試験に落ちた。",
+ assertAnalyzesTo(new GosenAnalyzer(SenTestUtil.IPADIC_DIR), "多くの学生
が試験に落ちた。",
new String[] { "多く", "学生", "試験", "落ちる" },
new int[] { 0, 3, 6, 9 },
new int[] { 2, 5, 8, 11 },
@@ -53,7 +53,7 @@
* (results could be completely bogus, but makes sure we don't crash on
some input)
*/
public void testReliability() throws IOException {
- Analyzer analyzer = new GosenAnalyzer(TEST_VERSION_CURRENT,
SenTestUtil.IPADIC_DIR);
+ Analyzer analyzer = new GosenAnalyzer(SenTestUtil.IPADIC_DIR);
checkRandomData(random(), analyzer, 10000*RANDOM_MULTIPLIER);
}
}
Reply all
Reply to author
Forward
0 new messages