Modified:
branches/groups/src/esmre.py
branches/groups/test/test_esmre.py
Log:
Collect hints from simple groups.
Fails to skip optional groups.
Modified: branches/groups/src/esmre.py
==============================================================================
--- branches/groups/src/esmre.py (original)
+++ branches/groups/src/esmre.py Wed Oct 1 11:14:25 2008
@@ -57,28 +57,7 @@
return self
-class InGroupState(object):
- def __init__(self, parent_state):
- self.parent_state = parent_state
-
- def process_byte(self, ch):
- if ch == ")":
- return self.parent_state
-
- elif ch == "(":
- return InGroupState(self)
-
- elif ch == "[":
- return InClassState(self)
-
- elif ch == "\\":
- return InBackslashState(self)
-
- else:
- return self
-
-
-class RootState(object):
+class CollectingState(object):
def __init__(self):
self.hints = [""]
@@ -94,7 +73,7 @@
self.hints.append("")
def forget_all_hints(self):
- self.hints = []
+ self.hints = [""]
def append_to_current_hint(self, ch):
self.hints[-1] += ch
@@ -126,10 +105,45 @@
return InBackslashState(self)
elif ch == "|":
- raise StopIteration
+ return self.alternation_state()
else:
return self
+
+ def alternation_state(self):
+ raise NotImplementedError
+
+
+class RootState(CollectingState):
+ def alternation_state(self):
+ raise StopIteration
+
+
+class InGroupState(CollectingState):
+ def __init__(self, parent_state):
+ CollectingState.__init__(self)
+ self.parent_state = parent_state
+ self.had_alternation = False
+
+ def update_hints(self, ch):
+ if ch == ")":
+ if not self.had_alternation:
+ self.parent_state.hints.extend(self.hints)
+ else:
+ CollectingState.update_hints(self, ch)
+
+ def next_state(self, ch):
+ if ch == ")":
+ return self.close_group_state()
+ else:
+ return CollectingState.next_state(self, ch)
+
+ def close_group_state(self):
+ return self.parent_state
+
+ def alternation_state(self):
+ self.had_alternation = True
+ return self
def hints(regex):
Modified: branches/groups/test/test_esmre.py
==============================================================================
--- branches/groups/test/test_esmre.py (original)
+++ branches/groups/test/test_esmre.py Wed Oct 1 11:14:25 2008
@@ -24,7 +24,7 @@
class HintExtractionTests(unittest.TestCase):
def checkHints(self, expected_hints, regex):
- self.assertEqual(expected_hints, esmre.hints(regex))
+ self.assertEqual(set(expected_hints), set(esmre.hints(regex)))
def testSimpleString(self):
self.checkHints(["yarr"], r"yarr")
@@ -42,7 +42,7 @@
self.checkHints(["ava", "st me harties"],
r"ava+st me harties")
- def testSkipsGroups(self):
+ def testSkipsGroupsWithAlternation(self):
self.checkHints(["Hoist the ", ", ye ", "!"],
r"Hoist the (mizzen mast|main brace), "
r"ye (landlubbers|scurvy dogs)!")
@@ -56,7 +56,7 @@
r"Hard to .+!")
def testSkipsNestedGroups(self):
- self.checkHints(["Squark!"],
+ self.checkHints(["Squark!", " Pieces of ", "!"],
r"Squark!( Pieces of (.+)!)")
def testSkipsCharacterClass(self):
@@ -68,7 +68,7 @@
r":=([)D])X")
def testSkipsBackslashMetacharacters(self):
- self.checkHints(["Cap'n", " "],
+ self.checkHints(["Cap'n", " ", " Beard"],
r"Cap'n\b ([\S] Beard)")
def testBackslashBracketDoesNotCloseGroup(self):
@@ -80,7 +80,7 @@
r":=[)D\]]X")
def testSkipsMetacharactersAfterGroups(self):
- self.checkHints(["Yo ", " and a bottle of rum"],
+ self.checkHints(["Yo ", "ho ", " and a bottle of rum"],
r"Yo (ho )+ and a bottle of rum")
def testSkipsRepetionBraces(self):
@@ -99,6 +99,12 @@
def testOnlyGroupGivesEmptyResult(self):
self.checkHints([], r"(rum|grog)")
+ def testGetsHintsFromGroups(self):
+ self.checkHints(["/"], r"([0-3][0-9]/[0-1][0-9]/[1-2][0-9]{3})")
+
+ def testSkipsOptionalGroups(self):
+ self.checkHints(["Shiver me timbers!"],
+ r"Shiver me timbers!( Arrr!)?")
class ShortlistTests(unittest.TestCase):
def checkShortlist(self, expected_shortlist, hints):