[PATCH 2 of 3] py3: use raw strings in regular expressions literals that contain backslashes

5 views
Skip to first unread message

Antonio Muci

unread,
Nov 10, 2023, 7:09:38 AM11/10/23
to thg...@googlegroups.com, a....@inwind.it
# HG changeset patch
# User Antonio Muci <a....@inwind.it>
# Date 1699615195 -3600
# Fri Nov 10 12:19:55 2023 +0100
# Branch stable
# Node ID 7df99fd57e48d48760e27694483f59ddb658b89e
# Parent 82e55282396f68e782dc3c831400a15e6a83adb9
py3: use raw strings in regular expressions literals that contain backslashes

The problem was already present with python versions <= 3.11, but only appeared
at runtime. From python 3.12 onwards it becomes possible to statically reproduce
it by forcing a bytecode compilation.

The change is useful for future-proofing the code base, since future python
versions will start to exit with a runtime exception.

From https://docs.python.org/3/whatsnew/3.12.html#other-language-changes :
A backslash-character pair that is not a valid escape sequence now generates
a SyntaxWarning, instead of DeprecationWarning. For example,
re.compile("\d+\.\d+") now emits a SyntaxWarning ("\d" is an invalid escape
sequence, use raw strings for regular expression: re.compile(r"\d+\.\d+")).
In a future Python version, SyntaxError will eventually be raised, instead
of SyntaxWarning.

Command to reproduce the problem:
find . -name "__pycache__" -print0 | xargs -0 rm -rf && python3.12 -B -m compileall -q .

Here the removal of __pycache__ is just a hackish way of forcing a bytecode
regeneration.

diff --git a/contrib/generate_gitlab_ci_yml.py b/contrib/generate_gitlab_ci_yml.py
--- a/contrib/generate_gitlab_ci_yml.py
+++ b/contrib/generate_gitlab_ci_yml.py
@@ -36,7 +36,7 @@ def gen_update_to_mercurial_rev(hgver):
if hgver in ['stable', 'default']:
update_to = hgver
else:
- update_to = f"""'tag("re:\A{re.escape(hgver)}")'"""
+ update_to = rf"""'tag("re:\A{re.escape(hgver)}")'"""
return [
'hg -R /ci/repos/mercurial pull',
f'hg -R /ci/repos/mercurial update {update_to}',
diff --git a/contrib/packaging/thgpackaging/util.py b/contrib/packaging/thgpackaging/util.py
--- a/contrib/packaging/thgpackaging/util.py
+++ b/contrib/packaging/thgpackaging/util.py
@@ -236,7 +236,7 @@ def get_qt_dependencies(
)

for l in dump.splitlines():
- m = re.search(b'^ ([^. ]+\.dll)', l, re.IGNORECASE)
+ m = re.search(rb'^ ([^. ]+\.dll)', l, re.IGNORECASE)

if not m:
continue
diff --git a/tortoisehg/hgqt/customtools.py b/tortoisehg/hgqt/customtools.py
--- a/tortoisehg/hgqt/customtools.py
+++ b/tortoisehg/hgqt/customtools.py
@@ -1030,7 +1030,7 @@ class HookConfigDialog(CustomConfigDialo
'conflicts not resolved), <tt>$HG_ERROR=1</tt>.'),
)

- _rehookname: Pattern[str] = re.compile('^[^=\s]*$')
+ _rehookname: Pattern[str] = re.compile(r'^[^=\s]*$')

def __init__(
self,
diff --git a/tortoisehg/hgqt/fileview.py b/tortoisehg/hgqt/fileview.py
--- a/tortoisehg/hgqt/fileview.py
+++ b/tortoisehg/hgqt/fileview.py
@@ -724,7 +724,7 @@ class _AbstractViewControl(QObject):
return p


-_diffHeaderRegExp = re.compile("^@@ -[0-9]+,[0-9]+ \+[0-9]+,[0-9]+ @@")
+_diffHeaderRegExp = re.compile(r"^@@ -[0-9]+,[0-9]+ \+[0-9]+,[0-9]+ @@")

class _DiffViewControl(_AbstractViewControl):
"""Display the unified diff in HgFileView"""
diff --git a/tortoisehg/hgqt/messageentry.py b/tortoisehg/hgqt/messageentry.py
--- a/tortoisehg/hgqt/messageentry.py
+++ b/tortoisehg/hgqt/messageentry.py
@@ -66,7 +66,7 @@ class MessageEntry(qscilib.Scintilla):
self.customContextMenuRequested.connect(self.menuRequested)
self.applylexer()

- self._re_boundary = re.compile('[0-9i#]+\.|\(?[0-9i#]+\)|\(@\)')
+ self._re_boundary = re.compile(r'[0-9i#]+\.|\(?[0-9i#]+\)|\(@\)')

def setText(self, text):
result = super(MessageEntry, self).setText(text)
diff --git a/tortoisehg/hgqt/qtlib.py b/tortoisehg/hgqt/qtlib.py
--- a/tortoisehg/hgqt/qtlib.py
+++ b/tortoisehg/hgqt/qtlib.py
@@ -232,7 +232,7 @@ def editfiles(repo, files, lineno=None,
# back to older tortoisehg.editor OpenAtLine parsing
cmdline = b' '.join([toolpath] + files) # default
try:
- regexp = re.compile(b'\[([^\]]*)\]')
+ regexp = re.compile(rb'\[([^\]]*)\]')
expanded = []
pos = 0
for m in regexp.finditer(toolpath):
diff --git a/tortoisehg/hgqt/visdiff.py b/tortoisehg/hgqt/visdiff.py
--- a/tortoisehg/hgqt/visdiff.py
+++ b/tortoisehg/hgqt/visdiff.py
@@ -80,7 +80,7 @@ if hglib.TYPE_CHECKING:


# Match parent2 first, so 'parent1?' will match both parent1 and parent
-_regex = b'\$(parent2|parent1?|child|plabel1|plabel2|clabel|repo|phash1|phash2|chash)'
+_regex = rb'\$(parent2|parent1?|child|plabel1|plabel2|clabel|repo|phash1|phash2|chash)'

_nonexistant = _('[non-existant]')


Reply all
Reply to author
Forward
0 new messages