Modified:
branches/groups/src/esmre.py
branches/groups/test/test_esmre.py
Log:
Skip extension groups.
Modified: branches/groups/src/esmre.py
==============================================================================
--- branches/groups/src/esmre.py (original)
+++ branches/groups/src/esmre.py Wed Oct 1 12:05:30 2008
@@ -2,7 +2,7 @@
# encoding: utf-8
# esmre.py - clue-indexed regular expressions module
-# Copyright (C) 2007 Tideway Systems Limited.
+# Copyright (C) 2007-2008 Tideway Systems Limited.
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -97,7 +97,7 @@
def next_state(self, ch):
if ch == "(":
- return InGroupState(self)
+ return StartOfGroupState(self)
elif ch == "[":
return InClassState(self)
@@ -123,6 +123,17 @@
raise StopIteration
+class StartOfGroupState(object):
+ def __init__(self, parent_state):
+ self.parent_state = parent_state
+
+ def process_byte(self, ch):
+ if ch == "?":
+ return StartOfExtensionGroupState(self.parent_state)
+ else:
+ return InGroupState(self.parent_state).process_byte(ch)
+
+
class InGroupState(CollectingState):
def __init__(self, parent_state):
CollectingState.__init__(self)
@@ -148,6 +159,11 @@
def alternation_state(self):
self.had_alternation = True
return self
+
+
+class StartOfExtensionGroupState(InGroupState):
+ def update_hints(self, ch):
+ pass
def hints(regex):
Modified: branches/groups/test/test_esmre.py
==============================================================================
--- branches/groups/test/test_esmre.py (original)
+++ branches/groups/test/test_esmre.py Wed Oct 1 12:05:30 2008
@@ -2,7 +2,7 @@
# encoding: utf-8
# esmre_tests.py - tests for esmre module
-# Copyright (C) 2007 Tideway Systems Limited.
+# Copyright (C) 2007-2008 Tideway Systems Limited.
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -105,6 +105,34 @@
def testSkipsOptionalGroups(self):
self.checkHints(["Shiver me timbers!"],
r"Shiver me timbers!( Arrr!)?")
+
+ def testSkipsMostExtensionGroups(self):
+ for regex in [
+ # set flag
+ r"(?i)(?L)(?m)(?s)(?u)(?x)",
+
+ # non-grouping paren
+ r"(?:foo)",
+
+ # comment
+ r"(?#foo)",
+
+ # lookahead
+ r"(?=foo)",
+
+ # negative lookahead
+ r"(?!foo)",
+
+ # lookbehind
+ r"(?<=foo)",
+
+ # negative lookbehind
+ r"(?<!foo)",
+
+ # conditional match
+ r"(?(1)foo|bar)"]:
+
+ self.checkHints([], regex)
class ShortlistTests(unittest.TestCase):
def checkShortlist(self, expected_shortlist, hints):