[flaptor-util] r214 committed - Updating TextSignature to use Scanner instead of StreamTokenizer

5 views
Skip to first unread message

codesite...@google.com

unread,
Nov 30, 2010, 10:05:51 AM11/30/10
to flaptor-o...@googlegroups.com
Revision: 214
Author: dbuthay
Date: Tue Nov 30 07:05:14 2010
Log: Updating TextSignature to use Scanner instead of StreamTokenizer
http://code.google.com/p/flaptor-util/source/detail?r=214

Modified:
/trunk/src/com/flaptor/util/TextSignature.java

=======================================
--- /trunk/src/com/flaptor/util/TextSignature.java Fri Jul 16 11:27:44 2010
+++ /trunk/src/com/flaptor/util/TextSignature.java Tue Nov 30 07:05:14 2010
@@ -30,10 +30,10 @@
import java.io.ObjectOutputStream;
import java.io.Reader;
import java.io.Serializable;
-import java.io.StreamTokenizer;
import java.io.StringReader;
import java.util.LinkedList;
import java.util.List;
+import java.util.Scanner;

public class TextSignature implements Serializable {

@@ -91,22 +91,19 @@
private void build(Reader reader) throws IOException {
int[] data = new int[HASH_SIZE];

- StreamTokenizer tokenizer = new StreamTokenizer(reader);
+ Scanner scanner = new Scanner(reader).useDelimiter("\\W+");
List<String> tokens = new LinkedList<String>();

- int windowSize = 4;
+ int windowSize = 2;
int tail = 1-windowSize;
int hash = 0;
boolean hashing = false;


- while (tokenizer.nextToken() != StreamTokenizer.TT_EOF) {
- if (null == tokenizer.sval){
- // should we consider this?
- continue;
- }
- tokens.add(tokenizer.sval);
- hash = addWord(hash, tokenizer.sval);
+ while (scanner.hasNext()) {
+ String token = scanner.next();
+ tokens.add(token);
+ hash = addWord(hash, token);
if (tail == 0) {
hashing = true;
}

Reply all
Reply to author
Forward
0 new messages