Reviewers: MikeSamuel,
Description:
If the table construction inserted "a/b/c" and then "b/c", it would fall
into an infinite loop, rather than accepting that "b/c" cannot be
improved on.
Also add comments to the algorithm.
Please review this at
https://codereview.appspot.com/286250043/
Affected files (+55, -14 lines):
M src/com/google/caja/util/Abbreviator.java
M tests/com/google/caja/util/AbbreviatorTest.java
Index: src/com/google/caja/util/Abbreviator.java
diff --git a/src/com/google/caja/util/Abbreviator.java
b/src/com/google/caja/util/Abbreviator.java
index
2366242e559d8225fa56e298fc32c68a1607fea2..109242ca4820506ab9c264913a236767004c1cfc
100644
--- a/src/com/google/caja/util/Abbreviator.java
+++ b/src/com/google/caja/util/Abbreviator.java
@@ -52,9 +52,9 @@ public final class Abbreviator {
*/
public Abbreviator(Set<String> items, String sep) {
this.sep = sep;
- Map<String, String> abbrevToUri = new HashMap<String, String>();
- for (String item : items) { insert(abbrevToUri, item, ""); }
- for (Map.Entry<String, String> e : abbrevToUri.entrySet()) {
+ Map<String, String> abbrevToItem = new HashMap<String, String>();
+ for (String item : items) { insert(abbrevToItem, item, ""); }
+ for (Map.Entry<String, String> e : abbrevToItem.entrySet()) {
if (e.getValue() != null) {
itemToAbbrev.put(e.getValue(), e.getKey());
}
@@ -71,19 +71,45 @@ public final class Abbreviator {
return abbrev != null ? abbrev : item;
}
+ /**
+ * Insert an entry in a map of abbreviations to items.
+ *
+ * @param abbrevToItem The table. Null-valued entries mark ambiguous
+ * abbreviations.
+ * @param item The unabbreviated item to insert.
+ * @param abbrev The shortest abbreviation so far known to be
insufficiently
+ * specific; the inserted abbreviation will always be longer than
this
+ * if possible.
+ */
private void insert(
- Map<String, String> abbrevToUri, String item, String abbrev) {
+ Map<String, String> abbrevToItem, String item, String abbrev) {
+ // Find the next longer abbreviation to attempt.
abbrev = expand(item, abbrev);
- if (!abbrevToUri.containsKey(abbrev)) {
- abbrevToUri.put(abbrev, item);
+ if (!abbrevToItem.containsKey(abbrev)) {
+ // It is unambiguous; just insert it.
+ abbrevToItem.put(abbrev, item);
} else {
- String other = abbrevToUri.get(abbrev);
- if (!item.equals(other)) {
+ // It conflicts with an existing (longer or equal) abbreviation.
+ String other = abbrevToItem.get(abbrev);
+ if (!item.equals(other)) { // Skip if exact item already present.
if (!abbrev.equals(other)) {
- abbrevToUri.put(abbrev, null);
- if (other != null) { insert(abbrevToUri, other, abbrev); }
+ // The other item can be expressed longer.
+ // (If this condition is false, then the other item is a suffix
of
+ // this one, and so the other item is left as-is.)
+
+ // Mark this abbreviation as ambiguous.
+ abbrevToItem.put(abbrev, null);
+ // Re-insert the other item with its longer abbreviation.
+ if (other != null) { insert(abbrevToItem, other, abbrev); }
+ }
+ if (other == null && item.equals(abbrev)) {
+ // Item is a suffix of an existing item. Insert it and do not
attempt
+ // to find a longer abbreviation (which would infinitely
recurse).
+ abbrevToItem.put(abbrev, item);
+ } else {
+ // Try again to insert this item, with a longer abbreviation.
+ insert(abbrevToItem, item, abbrev);
}
- insert(abbrevToUri, item, abbrev);
}
}
}
Index: tests/com/google/caja/util/AbbreviatorTest.java
diff --git a/tests/com/google/caja/util/AbbreviatorTest.java
b/tests/com/google/caja/util/AbbreviatorTest.java
index
e727aacbb9723ddc2d16dcbf1651752102a6166c..71dcc57efa9949354ddb20d10067ea9d5fc7f30d
100644
--- a/tests/com/google/caja/util/AbbreviatorTest.java
+++ b/tests/com/google/caja/util/AbbreviatorTest.java
@@ -17,6 +17,7 @@ package com.google.caja.util;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
+import java.util.LinkedHashSet;
import junit.framework.TestCase;
@@ -30,7 +31,7 @@ public class AbbreviatorTest extends TestCase {
public final void testAbbreviator() {
Abbreviator a = new Abbreviator(
- new HashSet<String>(Arrays.asList(
+ new LinkedHashSet<String>(Arrays.asList(
"/a/b/c", "/a/d/e", "/a/d/f", "/a/g/h/c",
"/h/i", "/h/j/", "/", "/c", "/d", "x", "")),
"/");
@@ -48,9 +49,10 @@ public class AbbreviatorTest extends TestCase {
assertEquals("/notpresent",
a.unambiguousAbbreviationFor("/notpresent"));
}
- public final void testSetContainsSuffixOfOtherMember() {
+ public final void testSetContainsSuffixOfOtherMember_Ordering1() {
+ // Using order-preserving LinkedHashSet to test specific insertion
ordering.
Abbreviator a = new Abbreviator(
- new HashSet<String>(Arrays.asList(
+ new LinkedHashSet<String>(Arrays.asList(
"foo:bar/z", "foo:foo:baz/z", "foo:baz/z", "foo:bar:far/z")),
":");
assertEquals("bar/z", a.unambiguousAbbreviationFor("foo:bar/z"));
@@ -59,4 +61,17 @@ public class AbbreviatorTest extends TestCase {
"foo:foo:baz/z", a.unambiguousAbbreviationFor("foo:foo:baz/z"));
assertEquals("far/z", a.unambiguousAbbreviationFor("foo:bar:far/z"));
}
+
+ public final void testSetContainsSuffixOfOtherMember_Ordering2() {
+ // Using order-preserving LinkedHashSet to test specific insertion
ordering.
+ Abbreviator a = new Abbreviator(
+ new LinkedHashSet<String>(Arrays.asList(
+ "foo:bar/z", "foo:baz/z", "foo:foo:baz/z", "foo:bar:far/z")),
+ ":");
+ assertEquals("bar/z", a.unambiguousAbbreviationFor("foo:bar/z"));
+ assertEquals("foo:baz/z", a.unambiguousAbbreviationFor("foo:baz/z"));
+ assertEquals(
+ "foo:foo:baz/z", a.unambiguousAbbreviationFor("foo:foo:baz/z"));
+ assertEquals("far/z", a.unambiguousAbbreviationFor("foo:bar:far/z"));
+ }
}