Fix infinite recursion bug in util.Abbreviator. (issue 286250043 by kpreid@google.com)

8 views
Skip to first unread message

re...@codereview-hr.appspotmail.com

unread,
Feb 10, 2016, 6:33:17 PM2/10/16
to mikes...@gmail.com, kpr...@google.com, google-ca...@googlegroups.com, re...@codereview-hr.appspotmail.com
Reviewers: MikeSamuel,

Description:
If the table construction inserted "a/b/c" and then "b/c", it would fall
into an infinite loop, rather than accepting that "b/c" cannot be
improved on.

Also add comments to the algorithm.

Please review this at https://codereview.appspot.com/286250043/

Affected files (+55, -14 lines):
M src/com/google/caja/util/Abbreviator.java
M tests/com/google/caja/util/AbbreviatorTest.java


Index: src/com/google/caja/util/Abbreviator.java
diff --git a/src/com/google/caja/util/Abbreviator.java
b/src/com/google/caja/util/Abbreviator.java
index
2366242e559d8225fa56e298fc32c68a1607fea2..109242ca4820506ab9c264913a236767004c1cfc
100644
--- a/src/com/google/caja/util/Abbreviator.java
+++ b/src/com/google/caja/util/Abbreviator.java
@@ -52,9 +52,9 @@ public final class Abbreviator {
*/
public Abbreviator(Set<String> items, String sep) {
this.sep = sep;
- Map<String, String> abbrevToUri = new HashMap<String, String>();
- for (String item : items) { insert(abbrevToUri, item, ""); }
- for (Map.Entry<String, String> e : abbrevToUri.entrySet()) {
+ Map<String, String> abbrevToItem = new HashMap<String, String>();
+ for (String item : items) { insert(abbrevToItem, item, ""); }
+ for (Map.Entry<String, String> e : abbrevToItem.entrySet()) {
if (e.getValue() != null) {
itemToAbbrev.put(e.getValue(), e.getKey());
}
@@ -71,19 +71,45 @@ public final class Abbreviator {
return abbrev != null ? abbrev : item;
}

+ /**
+ * Insert an entry in a map of abbreviations to items.
+ *
+ * @param abbrevToItem The table. Null-valued entries mark ambiguous
+ * abbreviations.
+ * @param item The unabbreviated item to insert.
+ * @param abbrev The shortest abbreviation so far known to be
insufficiently
+ * specific; the inserted abbreviation will always be longer than
this
+ * if possible.
+ */
private void insert(
- Map<String, String> abbrevToUri, String item, String abbrev) {
+ Map<String, String> abbrevToItem, String item, String abbrev) {
+ // Find the next longer abbreviation to attempt.
abbrev = expand(item, abbrev);
- if (!abbrevToUri.containsKey(abbrev)) {
- abbrevToUri.put(abbrev, item);
+ if (!abbrevToItem.containsKey(abbrev)) {
+ // It is unambiguous; just insert it.
+ abbrevToItem.put(abbrev, item);
} else {
- String other = abbrevToUri.get(abbrev);
- if (!item.equals(other)) {
+ // It conflicts with an existing (longer or equal) abbreviation.
+ String other = abbrevToItem.get(abbrev);
+ if (!item.equals(other)) { // Skip if exact item already present.
if (!abbrev.equals(other)) {
- abbrevToUri.put(abbrev, null);
- if (other != null) { insert(abbrevToUri, other, abbrev); }
+ // The other item can be expressed longer.
+ // (If this condition is false, then the other item is a suffix
of
+ // this one, and so the other item is left as-is.)
+
+ // Mark this abbreviation as ambiguous.
+ abbrevToItem.put(abbrev, null);
+ // Re-insert the other item with its longer abbreviation.
+ if (other != null) { insert(abbrevToItem, other, abbrev); }
+ }
+ if (other == null && item.equals(abbrev)) {
+ // Item is a suffix of an existing item. Insert it and do not
attempt
+ // to find a longer abbreviation (which would infinitely
recurse).
+ abbrevToItem.put(abbrev, item);
+ } else {
+ // Try again to insert this item, with a longer abbreviation.
+ insert(abbrevToItem, item, abbrev);
}
- insert(abbrevToUri, item, abbrev);
}
}
}
Index: tests/com/google/caja/util/AbbreviatorTest.java
diff --git a/tests/com/google/caja/util/AbbreviatorTest.java
b/tests/com/google/caja/util/AbbreviatorTest.java
index
e727aacbb9723ddc2d16dcbf1651752102a6166c..71dcc57efa9949354ddb20d10067ea9d5fc7f30d
100644
--- a/tests/com/google/caja/util/AbbreviatorTest.java
+++ b/tests/com/google/caja/util/AbbreviatorTest.java
@@ -17,6 +17,7 @@ package com.google.caja.util;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
+import java.util.LinkedHashSet;

import junit.framework.TestCase;

@@ -30,7 +31,7 @@ public class AbbreviatorTest extends TestCase {

public final void testAbbreviator() {
Abbreviator a = new Abbreviator(
- new HashSet<String>(Arrays.asList(
+ new LinkedHashSet<String>(Arrays.asList(
"/a/b/c", "/a/d/e", "/a/d/f", "/a/g/h/c",
"/h/i", "/h/j/", "/", "/c", "/d", "x", "")),
"/");
@@ -48,9 +49,10 @@ public class AbbreviatorTest extends TestCase {
assertEquals("/notpresent",
a.unambiguousAbbreviationFor("/notpresent"));
}

- public final void testSetContainsSuffixOfOtherMember() {
+ public final void testSetContainsSuffixOfOtherMember_Ordering1() {
+ // Using order-preserving LinkedHashSet to test specific insertion
ordering.
Abbreviator a = new Abbreviator(
- new HashSet<String>(Arrays.asList(
+ new LinkedHashSet<String>(Arrays.asList(
"foo:bar/z", "foo:foo:baz/z", "foo:baz/z", "foo:bar:far/z")),
":");
assertEquals("bar/z", a.unambiguousAbbreviationFor("foo:bar/z"));
@@ -59,4 +61,17 @@ public class AbbreviatorTest extends TestCase {
"foo:foo:baz/z", a.unambiguousAbbreviationFor("foo:foo:baz/z"));
assertEquals("far/z", a.unambiguousAbbreviationFor("foo:bar:far/z"));
}
+
+ public final void testSetContainsSuffixOfOtherMember_Ordering2() {
+ // Using order-preserving LinkedHashSet to test specific insertion
ordering.
+ Abbreviator a = new Abbreviator(
+ new LinkedHashSet<String>(Arrays.asList(
+ "foo:bar/z", "foo:baz/z", "foo:foo:baz/z", "foo:bar:far/z")),
+ ":");
+ assertEquals("bar/z", a.unambiguousAbbreviationFor("foo:bar/z"));
+ assertEquals("foo:baz/z", a.unambiguousAbbreviationFor("foo:baz/z"));
+ assertEquals(
+ "foo:foo:baz/z", a.unambiguousAbbreviationFor("foo:foo:baz/z"));
+ assertEquals("far/z", a.unambiguousAbbreviationFor("foo:bar:far/z"));
+ }
}


mikes...@gmail.com

unread,
Feb 11, 2016, 8:16:47 AM2/11/16
to kpr...@google.com, google-ca...@googlegroups.com, re...@codereview-hr.appspotmail.com

kpr...@google.com

unread,
Feb 11, 2016, 12:50:31 PM2/11/16
to mikes...@gmail.com, google-ca...@googlegroups.com, re...@codereview-hr.appspotmail.com
On 2016/02/11 13:16:45, MikeSamuel wrote:
> +1

Sorry, is that a LGTM?

https://codereview.appspot.com/286250043/

Mike Samuel

unread,
Feb 11, 2016, 5:48:58 PM2/11/16
to re...@codereview-hr.appspotmail.com, Google Caja Discuss, kpr...@google.com

Yes.  LGTM

Reply all
Reply to author
Forward
0 new messages