[apache-rat-pd] r58 committed - Bug-fix for: http://code.google.com/p/apache-rat-pd/source/detail?r=51...

0 views
Skip to first unread message

codesite...@google.com

unread,
Sep 6, 2009, 6:00:44 PM9/6/09
to apache-rat...@googlegroups.com
Revision: 58
Author: maka82
Date: Sun Sep 6 14:59:36 2009
Log: Bug-fix for: http://code.google.com/p/apache-rat-pd/source/detail?r=51#
Now application newer will hang because of owersized input line.
http://code.google.com/p/apache-rat-pd/source/detail?r=58

Modified:

/trunk/src/main/java/org/apache/rat/pd/engines/google/GoogleCodeSearchParser.java

/trunk/src/test/java/org/apache/rat/pd/engines/google/GoogleCodeSearchParserTest.java

=======================================
---
/trunk/src/main/java/org/apache/rat/pd/engines/google/GoogleCodeSearchParser.java
Mon Aug 24 15:34:24 2009
+++
/trunk/src/main/java/org/apache/rat/pd/engines/google/GoogleCodeSearchParser.java
Sun Sep 6 14:59:36 2009
@@ -217,7 +217,12 @@
public boolean queryEngine(String possibleCopyAndPastedCode) throws
IOException, ServiceException {
boolean toReturn = false;

- List<URL> listOfFeedUrl = splitLongUrl(possibleCopyAndPastedCode,
CODESEARCH_QUERY_MAX_LENGTH);
+ String[] lines = possibleCopyAndPastedCode.split("\n *");
+ // lines must be less then 1024 characters
+ // this is bug-fix of bug
+ // http://code.google.com/p/apache-rat-pd/source/detail?r=51#
+ List<String> splitedLines = generateLinesWithProperLength(lines,
CODESEARCH_QUERY_MAX_LENGTH);
+ List<URL> listOfFeedUrl = fitCodeLinesIntoUrls(splitedLines,
CODESEARCH_QUERY_MAX_LENGTH);
// call every URL and parse results in one map
for (URL url : listOfFeedUrl) {
CodeSearchFeed myFeed = codesearchService.getFeed(url,
CodeSearchFeed.class);
@@ -235,29 +240,30 @@
}

/**
- * This method can split code to produce URLs that will not be greater
then
- * length. For GoogleCodeSearch length is 1024.
+ * This method combine lines and produce URLs that will fit maxLength
but
+ * which are not greater then provided maxLength. For GoogleCodeSearch
+ * length is 1024.
*
- * @param possibleCopyAndPastedCode code to be checked
- * @param length maximum length of query which code search engine can
manage
- * @return list of URL-s not longer then @length
+ * @param lines list of lines which will be combined to produce URL
which
+ * length fit provided maxLength
+ * @param maxLength maximum length of query which code search engine
can
+ * manage
+ * @return list of URL-s not longer then maxLength
* @throws IOException
*/
- List<URL> splitLongUrl(String possibleCopyAndPastedCode, int length)
throws IOException {
+ List<URL> fitCodeLinesIntoUrls(List<String> lines, int maxLength)
throws IOException {
List<URL> toRet = new ArrayList<URL>();
- // lines must be less then 1024 characters
- String[] lines = possibleCopyAndPastedCode.split("\n *");
URL url = null;

int i = 0;
int j = 0;

- while (i < lines.length) {
- while (j < lines.length) {
+ while (i < lines.size()) {
+ while (j < lines.size()) {
StringBuffer currenCodePart = combineLines(lines, i, j);
URL currentUrl = createUrl(currenCodePart.toString());
-
- if (currentUrl.toString().length() <= length) {
+
+ if (currentUrl.toString().length() <= maxLength) {
url = currentUrl;
j++;
} else {
@@ -269,7 +275,7 @@
toRet.add(url);
url = null;
}
- if (j == lines.length) {
+ if (j == lines.size()) {
break;
}
}
@@ -284,14 +290,14 @@
* @param end index of end position
* @return appended tokens
*/
- private StringBuffer combineLines(String[] tokens, int start, int end)
{
+ private StringBuffer combineLines(List<String> tokens, int start, int
end) {

StringBuffer sb = new StringBuffer();
for (int k = start; k < end; k++) {
- sb.append(tokens[k]);
+ sb.append(tokens.get(k));
sb.append("\n");
}
- sb.append(tokens[end]);
+ sb.append(tokens.get(end));
return sb;
}

@@ -357,5 +363,70 @@

return toRet;
}
+
+ // this is bug-fix of bug
+ // http://code.google.com/p/apache-rat-pd/source/detail?r=51#
+ /**
+ * Generate list of lines that produce URL which length is smaller then
+ * provided maxLength.
+ *
+ * @param lines list of strings to filter
+ * @param maxLength maximum length of URL produced from one string
+ * @return list of lines that produce URL which length is smaller then
+ * provided maxLength
+ */
+ private List<String> generateLinesWithProperLength(String[] lines, int
maxLength) {
+ List<String> toRet = new ArrayList<String>();
+ for (String line : lines) {
+ try {
+ if (createUrl(line).toString().length() <= maxLength) {
+ toRet.add(line);
+ } else {
+ // TODO add better way to split lines with length of
URL
+ // produced from them longer then maxLength. Something
like
+ // splitting
+ // on dot sign or blank. Splitting in middle of the
word is
+ // not best solution but it is universal. It must
exist but
+ // only if all other solution are not successful.
+ toRet.addAll(splitOversizedLines(line, maxLength));
+ }
+ } catch (Exception e) {
+ // we do not want to stop execution because of one line
+ // anyway, this will be newer happened
+ System.err.println("Error creating URL from string " +
line + ", continuing...");
+ }
+ }
+ return toRet;
+ }
+
+ /**
+ * Splits line into multiple lines which satisfy expected maximum URL
length
+ *
+ * @param line line to split if URL created from it is too long
+ * @param maximumLength maximum length of URL created from this line
+ * @return list of lines which fit expected max URL length
+ * @throws MalformedURLException
+ */
+ public List<String> splitOversizedLines(String line, int
maximumLength) throws MalformedURLException {
+
+ ArrayList<String> toRet = new ArrayList<String>();
+
+ StringBuffer sb = new StringBuffer();
+ String current = "";
+ for (int i = 0; i < line.length(); i++) {
+
+ sb.append(line.charAt(i));
+
+ if (createUrl(sb.toString()).toString().length() <=
maximumLength) {
+ current = sb.toString();
+ } else {
+ toRet.add(current);
+ sb.setLength(0);
+ sb.append(line.charAt(i));
+ }
+ }
+ toRet.add(sb.toString());
+ return toRet;
+ }

}
=======================================
---
/trunk/src/test/java/org/apache/rat/pd/engines/google/GoogleCodeSearchParserTest.java
Mon Aug 24 15:34:24 2009
+++
/trunk/src/test/java/org/apache/rat/pd/engines/google/GoogleCodeSearchParserTest.java
Sun Sep 6 14:59:36 2009
@@ -22,32 +22,67 @@

import java.io.IOException;
import java.net.URL;
+import java.util.Arrays;
import java.util.List;

import junit.framework.TestCase;

public class GoogleCodeSearchParserTest extends TestCase {

- private GoogleCodeSearchParser parser = new
GoogleCodeSearchParser("lag", 100, System.out);
+ private GoogleCodeSearchParser parser = new
GoogleCodeSearchParser("lag", 100, System.out);

public void testSplitLongUrl() throws IOException {
- // minimum length is 96
- List<URL> actual = parser.splitLongUrl("a", 100);
+ // minimum length of search query is 95
+ String[] lines = "a".split("\n *");
+ List<URL> actual =
parser.fitCodeLinesIntoUrls(Arrays.asList(lines), 95);
assertEquals(1, actual.size());

- actual = parser.splitLongUrl("a\nb", 100);
+ lines = "a\nb".split("\n *");
+ actual = parser.fitCodeLinesIntoUrls(Arrays.asList(lines), 100);
assertEquals(2, actual.size());

- actual = parser.splitLongUrl("a\nb", 135);
+ lines = "a\nb".split("\n *");
+ actual = parser.fitCodeLinesIntoUrls(Arrays.asList(lines), 135);
assertEquals(1, actual.size());

- actual = parser.splitLongUrl("a\nb\nc", 100);
+ lines = "a\nb\nc".split("\n *");
+ actual = parser.fitCodeLinesIntoUrls(Arrays.asList(lines), 100);
assertEquals(3, actual.size());

- actual = parser.splitLongUrl("a\nb\nc\nd", 100);
+ lines = "a\nb\nc\nd".split("\n *");
+ actual = parser.fitCodeLinesIntoUrls(Arrays.asList(lines), 100);
assertEquals(4, actual.size());

- actual = parser.splitLongUrl("", 100);
+ lines = "".split("\n *");
+ actual = parser.fitCodeLinesIntoUrls(Arrays.asList(lines), 100);
assertEquals(1, actual.size());
}
-}
+
+ public void testAllSubArrays() throws Exception {
+ assertEquals("when search query us created from this string, there
will be one query", 1, parser.splitOversizedLines(
+ "a", 95).size());
+ assertEquals("when search query us created from this string, there
will be two queries", 2, parser
+ .splitOversizedLines("ab", 95).size());
+
+ assertEquals("when search query us created from this string, there
will be one query", 1, parser.splitOversizedLines(
+ "ab", 96).size());
+ assertEquals("when search query us created from this string, there
will be one query", 1, parser.splitOversizedLines(
+ "test", 98).size());
+ assertEquals("when search query us created from this string, there
will be two queries", 2, parser
+ .splitOversizedLines("test1", 98).size());
+
+ // this is fix checking of bug
+ // http://code.google.com/p/apache-rat-pd/source/detail?r=51#
+ assertEquals("when search query us created from this string, there
will be one query", 1, parser.splitOversizedLines(
+ "/////////////////////////////////////////////////",
1024).size());
+ assertEquals("when search query us created from this string, there
will be two queries", 2, parser
+ .splitOversizedLines("//////////////////////////////////////////////////",
1024).size());
+ assertEquals("when search query us created from this string, there
will be two queries", 2, parser.splitOversizedLines(
+ "//////////////////////////////////////////////////////////////////////////////////////////////////",
+ 1024).size());
+ assertEquals("when search query us created from this string, there
will be three queries", 3, parser.splitOversizedLines(
+ "///////////////////////////////////////////////////////////////////////////////////////////////////",
+ 1024).size());
+
+ }
+}

Reply all
Reply to author
Forward
0 new messages