[flaptor-util] r203 committed - syncing...

0 views
Skip to first unread message

codesite...@google.com

unread,
Sep 1, 2009, 6:12:14 PM9/1/09
to flaptor-o...@googlegroups.com
Revision: 203
Author: ignaci...@gmail.com
Date: Tue Sep 1 15:10:08 2009
Log: syncing...
http://code.google.com/p/flaptor-util/source/detail?r=203

Modified:
/trunk/src/com/flaptor/util/DomUtil.java

=======================================
--- /trunk/src/com/flaptor/util/DomUtil.java Thu Aug 6 10:36:48 2009
+++ /trunk/src/com/flaptor/util/DomUtil.java Tue Sep 1 15:10:08 2009
@@ -16,6 +16,7 @@

package com.flaptor.util;

+import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.util.Iterator;
@@ -23,9 +24,13 @@
import org.apache.log4j.Logger;
import org.dom4j.Document;
import org.dom4j.Element;
+import org.dom4j.Node;
+import org.dom4j.Text;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.XMLWriter;

+import com.flaptor.util.parser.HtmlParser;
+
/**
* This class implements some static methods to manipulate doms.
*/
@@ -95,6 +100,30 @@
}
return value;
}
+
+ /**
+ * Gets the entire text of an element an all its children
+ *
+ * @param element
+ * @return
+ */
+ public static String getElementTextRecursively(final Element element) {
+ String result = "";
+
+ if (!(element.getNodeType() == Node.COMMENT_NODE)) {
+ int size = element.nodeCount();
+ for (int i = 0; i < size; i++) {
+ Node node = element.node(i);
+ if (node instanceof Element) {
+ result += getElementTextRecursively((Element) node);
+ } else if (node instanceof Text) {
+ result += node.getText();
+ }
+ }
+ }
+
+ return result;
+ }


/**
@@ -169,5 +198,13 @@
|| (c >= 0xe000 && c <= 0xfffd) || (c >= 0x10000 && c <=
0x10ffff);
}

+ public static void main(String[] arg) throws Exception {
+ String str = FileUtil.readFile(new File(arg[0]));
+ HtmlParser parser = new HtmlParser();
+ Document htmlDocument = parser.getHtmlDocument("http://url.com",
str.getBytes());
+
+
System.out.println(getElementTextRecursively(htmlDocument.getRootElement()));
+
+ }
}

Reply all
Reply to author
Forward
0 new messages