Commit: patch 9.2.0137: [security]: crash with composing char in collection range

2 views
Skip to first unread message

Christian Brabandt

unread,
Mar 11, 2026, 3:17:00 PM (12 days ago) Mar 11
to vim...@googlegroups.com
patch 9.2.0137: [security]: crash with composing char in collection range

Commit: https://github.com/vim/vim/commit/36d6e87542cf823d833e451e09a90ee429899cec
Author: Christian Brabandt <c...@256bit.org>
Date: Wed Mar 11 14:16:29 2026 +0100

patch 9.2.0137: [security]: crash with composing char in collection range

Problem: Using a composing character as the end of a range inside a
collection may corrupt the NFA postfix stack
(Nathan Mills, after v9.1.0011)
Solution: When a character is used as the endpoint of a range, do not emit
its composing characters separately. Range handling only uses
the base codepoint.

supported by AI

Github Advisory:
https://github.com/vim/vim/security/advisories/GHSA-9phh-423r-778r

Signed-off-by: Christian Brabandt <c...@256bit.org>

diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c
index 6a9581d99..807bc203c 100644
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -1765,6 +1765,7 @@ collection:
if (*endp == ']')
{
int plen;
+ bool range_endpoint;
/*
* Try to reverse engineer character classes. For example,
* recognize that [0-9] stands for \d and [A-Za-z_] for \h,
@@ -1812,6 +1813,7 @@ collection:
while (regparse < endp)
{
int oldstartc = startc;
+ range_endpoint = false;

startc = -1;
got_coll_char = FALSE;
@@ -1975,6 +1977,7 @@ collection:
if (emit_range)
{
int endc = startc;
+ range_endpoint = true;

startc = oldstartc;
if (startc > endc)
@@ -2053,7 +2056,14 @@ collection:
}
}

- if (enc_utf8 && (utf_ptr2len(regparse) != (plen = utfc_ptr2len(regparse))))
+ //
+ // If this character was consumed as the end of a range, do not emit its
+ // composing characters separately. Range handling only uses the base
+ // codepoint; emitting the composing part again would duplicate the
+ // character in the postfix stream and corrupt the NFA stack.
+ //
+ if (!range_endpoint && enc_utf8 &&
+ (utf_ptr2len(regparse) != (plen = utfc_ptr2len(regparse))))
{
int i = utf_ptr2len(regparse);

@@ -3187,7 +3197,10 @@ nfa_max_width(nfa_state_T *startstate, int depth)
++len;
if (state->c != NFA_ANY)
{
- // skip over the characters
+ // Skip over the compiled collection.
+ // malformed NFAs must not crash width estimation.
+ if (state->out1 == NULL || state->out1->out == NULL)
+ return -1;
state = state->out1->out;
continue;
}
diff --git a/src/testdir/test_regexp_utf8.vim b/src/testdir/test_regexp_utf8.vim
index 6eb6bf6fa..d33c03c42 100644
--- a/src/testdir/test_regexp_utf8.vim
+++ b/src/testdir/test_regexp_utf8.vim
@@ -633,4 +633,22 @@ func Test_replace_multibyte_match_in_multi_lines()
set ignorecase&vim re&vim
endfun

+func Test_regex_collection_range_with_composing_crash()
+ " Regression test: composing char in collection range caused NFA crash/E874
+ new
+ call setline(1, ['00', '0ֻ', '01'])
+ let patterns = [ '0[0-0ֻ]\@<!','0[0ֻ]\@<!']
+
+ for pat in patterns
+ " Should compile and execute without crash or error
+ for re in range(3)
+ let regex = '\%#=' .. re .. pat
+ call search(regex)
+ call assert_fails($"/{regex}\<cr>", 'E486:')
+ endfor
+ endfor
+
+ bwipe!
+endfunc
+
" vim: shiftwidth=2 sts=2 expandtab
diff --git a/src/version.c b/src/version.c
index e58c12baa..79a30caba 100644
--- a/src/version.c
+++ b/src/version.c
@@ -734,6 +734,8 @@ static char *(features[]) =

static int included_patches[] =
{ /* Add new patch number below this line */
+/**/
+ 137,
/**/
136,
/**/
Reply all
Reply to author
Forward
0 new messages