Patch 7.3.1112

308 views
Skip to first unread message

Bram Moolenaar

unread,
Jun 4, 2013, 12:29:24 PM6/4/13
to vim...@googlegroups.com

Patch 7.3.1112
Problem: New regexp engine: \%V not supported.
Solution: Implement \%V. Add tests.
Files: src/regexp.c, src/regexp_nfa.c, src/testdir/test64.in,
src/testdir/test64.ok


*** ../vim-7.3.1111/src/regexp.c 2013-06-03 19:41:01.000000000 +0200
--- src/regexp.c 2013-06-04 18:28:12.000000000 +0200
***************
*** 4165,4170 ****
--- 4165,4249 ----
}

#endif
+ #ifdef FEAT_VISUAL
+ static int reg_match_visual __ARGS((void));
+
+ /*
+ * Return TRUE if the current reginput position matches the Visual area.
+ */
+ static int
+ reg_match_visual()
+ {
+ pos_T top, bot;
+ linenr_T lnum;
+ colnr_T col;
+ win_T *wp = reg_win == NULL ? curwin : reg_win;
+ int mode;
+ colnr_T start, end;
+ colnr_T start2, end2;
+ colnr_T cols;
+
+ /* Check if the buffer is the current buffer. */
+ if (reg_buf != curbuf || VIsual.lnum == 0)
+ return FALSE;
+
+ if (VIsual_active)
+ {
+ if (lt(VIsual, wp->w_cursor))
+ {
+ top = VIsual;
+ bot = wp->w_cursor;
+ }
+ else
+ {
+ top = wp->w_cursor;
+ bot = VIsual;
+ }
+ mode = VIsual_mode;
+ }
+ else
+ {
+ if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
+ {
+ top = curbuf->b_visual.vi_start;
+ bot = curbuf->b_visual.vi_end;
+ }
+ else
+ {
+ top = curbuf->b_visual.vi_end;
+ bot = curbuf->b_visual.vi_start;
+ }
+ mode = curbuf->b_visual.vi_mode;
+ }
+ lnum = reglnum + reg_firstlnum;
+ if (lnum < top.lnum || lnum > bot.lnum)
+ return FALSE;
+
+ if (mode == 'v')
+ {
+ col = (colnr_T)(reginput - regline);
+ if ((lnum == top.lnum && col < top.col)
+ || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
+ return FALSE;
+ }
+ else if (mode == Ctrl_V)
+ {
+ getvvcol(wp, &top, &start, NULL, &end);
+ getvvcol(wp, &bot, &start2, NULL, &end2);
+ if (start2 < start)
+ start = start2;
+ if (end2 > end)
+ end = end2;
+ if (top.col == MAXCOL || bot.col == MAXCOL)
+ end = MAXCOL;
+ cols = win_linetabsize(wp, regline, (colnr_T)(reginput - regline));
+ if (cols < start || cols > end - (*p_sel == 'e'))
+ return FALSE;
+ }
+ return TRUE;
+ }
+ #endif
+
#define ADVANCE_REGINPUT() mb_ptr_adv(reginput)

/*
***************
*** 4347,4426 ****

case RE_VISUAL:
#ifdef FEAT_VISUAL
! /* Check if the buffer is the current buffer. and whether the
! * position is inside the Visual area. */
! if (reg_buf != curbuf || VIsual.lnum == 0)
! status = RA_NOMATCH;
! else
! {
! pos_T top, bot;
! linenr_T lnum;
! colnr_T col;
! win_T *wp = reg_win == NULL ? curwin : reg_win;
! int mode;
!
! if (VIsual_active)
! {
! if (lt(VIsual, wp->w_cursor))
! {
! top = VIsual;
! bot = wp->w_cursor;
! }
! else
! {
! top = wp->w_cursor;
! bot = VIsual;
! }
! mode = VIsual_mode;
! }
! else
! {
! if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
! {
! top = curbuf->b_visual.vi_start;
! bot = curbuf->b_visual.vi_end;
! }
! else
! {
! top = curbuf->b_visual.vi_end;
! bot = curbuf->b_visual.vi_start;
! }
! mode = curbuf->b_visual.vi_mode;
! }
! lnum = reglnum + reg_firstlnum;
! col = (colnr_T)(reginput - regline);
! if (lnum < top.lnum || lnum > bot.lnum)
! status = RA_NOMATCH;
! else if (mode == 'v')
! {
! if ((lnum == top.lnum && col < top.col)
! || (lnum == bot.lnum
! && col >= bot.col + (*p_sel != 'e')))
! status = RA_NOMATCH;
! }
! else if (mode == Ctrl_V)
! {
! colnr_T start, end;
! colnr_T start2, end2;
! colnr_T cols;
!
! getvvcol(wp, &top, &start, NULL, &end);
! getvvcol(wp, &bot, &start2, NULL, &end2);
! if (start2 < start)
! start = start2;
! if (end2 > end)
! end = end2;
! if (top.col == MAXCOL || bot.col == MAXCOL)
! end = MAXCOL;
! cols = win_linetabsize(wp,
! regline, (colnr_T)(reginput - regline));
! if (cols < start || cols > end - (*p_sel == 'e'))
! status = RA_NOMATCH;
! }
! }
! #else
! status = RA_NOMATCH;
#endif
break;

case RE_LNUM:
--- 4426,4434 ----

case RE_VISUAL:
#ifdef FEAT_VISUAL
! if (!reg_match_visual())
#endif
+ status = RA_NOMATCH;
break;

case RE_LNUM:
*** ../vim-7.3.1111/src/regexp_nfa.c 2013-06-04 17:47:00.000000000 +0200
--- src/regexp_nfa.c 2013-06-04 18:22:04.000000000 +0200
***************
*** 178,183 ****
--- 178,184 ----
NFA_VCOL, /* Match cursor virtual column */
NFA_VCOL_GT, /* Match > cursor virtual column */
NFA_VCOL_LT, /* Match < cursor virtual column */
+ NFA_VISUAL, /* Match Visual area */

NFA_FIRST_NL = NFA_ANY + ADD_NL,
NFA_LAST_NL = NFA_NUPPER + ADD_NL,
***************
*** 960,967 ****
break;

case 'V':
! /* TODO: not supported yet */
! return FAIL;
break;

case '[':
--- 961,967 ----
break;

case 'V':
! EMIT(NFA_VISUAL);
break;

case '[':
***************
*** 4731,4736 ****
--- 4731,4743 ----
if (result)
addstate_here(thislist, t->state->out, &t->subs,
t->pim, &listidx);
+ break;
+
+ case NFA_VISUAL:
+ result = reg_match_visual();
+ if (result)
+ addstate_here(thislist, t->state->out, &t->subs,
+ t->pim, &listidx);
break;

default: /* regular character */
*** ../vim-7.3.1111/src/testdir/test64.in 2013-06-02 16:07:05.000000000 +0200
--- src/testdir/test64.in 2013-06-04 18:07:59.000000000 +0200
***************
*** 458,463 ****
--- 458,471 ----
:.yank
Go p:"
:"
+ :" Check matching Visual area
+ /^Visual:
+ jfxvfx:s/\%Ve/E/g
+ jV:s/\%Va/A/g
+ jfx fxj:s/\%Vo/O/g
+ :/^Visual/+1,/^Visual/+4yank
+ Go p:"
+ :"
:" Check patterns matching cursor position.
:func! Postest()
new
***************
*** 520,523 ****
--- 528,537 ----
asdfasd<yy
xxstart3

+ Visual:
+ thexe the thexethe
+ andaxand andaxand
+ oooxofor foroxooo
+ oooxofor foroxooo
+
Results of test64:
*** ../vim-7.3.1111/src/testdir/test64.ok 2013-06-02 16:07:05.000000000 +0200
--- src/testdir/test64.ok 2013-06-04 18:08:08.000000000 +0200
***************
*** 857,862 ****
--- 857,867 ----
ghi

xxstart3
+
+ thexE thE thExethe
+ AndAxAnd AndAxAnd
+ oooxOfOr fOrOxooo
+ oooxOfOr fOrOxooo
-0-
ffo
bob
*** ../vim-7.3.1111/src/version.c 2013-06-04 17:47:00.000000000 +0200
--- src/version.c 2013-06-04 18:26:28.000000000 +0200
***************
*** 730,731 ****
--- 730,733 ----
{ /* Add new patch number below this line */
+ /**/
+ 1112,
/**/

--
hundred-and-one symptoms of being an internet addict:
87. Everyone you know asks why your phone line is always busy ...and
you tell them to send an e-mail.

/// Bram Moolenaar -- Br...@Moolenaar.net -- http://www.Moolenaar.net \\\
/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\ an exciting new programming language -- http://www.Zimbu.org ///
\\\ help me help AIDS victims -- http://ICCF-Holland.org ///

Christian Brabandt

unread,
Jun 5, 2013, 2:03:18 AM6/5/13
to vim...@googlegroups.com
On Tue, June 4, 2013 18:29, Bram Moolenaar wrote:
>
> Patch 7.3.1112
> Problem: New regexp engine: \%V not supported.
> Solution: Implement \%V. Add tests.
> Files: src/regexp.c, src/regexp_nfa.c, src/testdir/test64.in,
> src/testdir/test64.ok

The good thing about this is, it behaves exactly like the old engine.
The bad thing is, the old engine has an long standing bug with \%V ;(

e.g:

vim -u NONE -N
ifoobar<esc>
0ve<esc>
/\%Vfoobar\%V

I should have probably provided a fix long ago.

regards,
Christian

Ingo Karkat

unread,
Jun 5, 2013, 2:56:34 AM6/5/13
to vim...@googlegroups.com
On 05-Jun-2013 08:03 +0200, Christian Brabandt wrote:

> [...] the old engine has an long standing bug with \%V ;(
>
> e.g:
>
> vim -u NONE -N
> ifoobar<esc>
> 0ve<esc>
> /\%Vfoobar\%V
>
> I should have probably provided a fix long ago.

Where is the bug, that there's no match?! That's intended, as the last
\%V matches zero-width _after_ the end of "foobar", but the visual
selection ends on the "r". You'd have to use /\%Vfooba\%Vr here. (The
example at :help \%V is wrong about this corner case, too.)

The general case is this ugly beast: /\%Vfoobar\%(\%V\|\%(\%V.\)\@<=\).
Since restricting the match to inside the entire selection is such a
common use case (see the related vis.vim plugin), I think it would be
worthwhile to have a special atom (e.g. \%<V) that matches if the
_preceding character_ is inside the selection, allowing this much nicer
pattern: /\%Vfoobar\%<V

-- regards, ingo

Christian Brabandt

unread,
Jun 5, 2013, 3:10:04 AM6/5/13
to vim...@googlegroups.com
On Wed, June 5, 2013 08:56, Ingo Karkat wrote:
> On 05-Jun-2013 08:03 +0200, Christian Brabandt wrote:
>
>> [...] the old engine has an long standing bug with \%V ;(
>>
>> e.g:
>>
>> vim -u NONE -N
>> ifoobar<esc>
>> 0ve<esc>
>> /\%Vfoobar\%V
>>
>> I should have probably provided a fix long ago.
>
> Where is the bug, that there's no match?! That's intended, as the last
> \%V matches zero-width _after_ the end of "foobar", but the visual
> selection ends on the "r". You'd have to use /\%Vfooba\%Vr here. (The
> example at :help \%V is wrong about this corner case, too.)

I am not sure, it is intended. I certainly wouldn't expect this (and
the doc is wrong in this regard, as you said). Since
\%V is zero-width, I would expect the \%V to still match the end of the
visual selection.

> The general case is this ugly beast: /\%Vfoobar\%(\%V\|\%(\%V.\)\@<=\).

Yes, very ugly and not easily understandable.

> Since restricting the match to inside the entire selection is such a
> common use case (see the related vis.vim plugin), I think it would be
> worthwhile to have a special atom (e.g. \%<V) that matches if the
> _preceding character_ is inside the selection, allowing this much nicer
> pattern: /\%Vfoobar\%<V

I can understand why this happens but nevertheless it is an annoying
corner-case to consider when using the \%V atom, so I'd like to have
this issue resolved.

regards,
Christian

Ingo Karkat

unread,
Jun 5, 2013, 3:40:00 AM6/5/13
to vim...@googlegroups.com
On 05-Jun-2013 09:10 +0200, Christian Brabandt wrote:

> On Wed, June 5, 2013 08:56, Ingo Karkat wrote:
>> On 05-Jun-2013 08:03 +0200, Christian Brabandt wrote:
>>
>>> [...] the old engine has an long standing bug with \%V ;(
>>>
>>> e.g:
>>>
>>> vim -u NONE -N
>>> ifoobar<esc>
>>> 0ve<esc>
>>> /\%Vfoobar\%V
>>>
>>> I should have probably provided a fix long ago.
>>
>> Where is the bug, that there's no match?! That's intended, as the last
>> \%V matches zero-width _after_ the end of "foobar", but the visual
>> selection ends on the "r". You'd have to use /\%Vfooba\%Vr here. (The
>> example at :help \%V is wrong about this corner case, too.)
>
> I am not sure, it is intended. I certainly wouldn't expect this (and
> the doc is wrong in this regard, as you said). Since
> \%V is zero-width, I would expect the \%V to still match the end of the
> visual selection.

Zero-width means: Matches at the current position, but does not consume
the character; i.e. the next match is made at the same position.
What is needed at the final atom is: Matches at the previous position
(that has already been consumed). I don't know how difficult that is to
implement in the regular expression engine(s), but I think such a new
\%<V atom would be helpful.

In my opinion it is hard to change the existing \%V in a way to fit both
uses: It just matches anywhere inside a selection, and has no notion of
start or end of selection. Therefore, it's presumably difficult to
change its behavior at the end of the selection without introducing
off-by-one errors elsewhere. I'm already struggling to come up with a
good and precise documentation for the new behavior; the current one is
at least simple to grasp (when you understand zero-width matches). And
it would be a compatibility-breaking change.

>> The general case is this ugly beast: /\%Vfoobar\%(\%V\|\%(\%V.\)\@<=\).
>
> Yes, very ugly and not easily understandable.
>
>> Since restricting the match to inside the entire selection is such a
>> common use case (see the related vis.vim plugin), I think it would be
>> worthwhile to have a special atom (e.g. \%<V) that matches if the
>> _preceding character_ is inside the selection, allowing this much nicer
>> pattern: /\%Vfoobar\%<V
>
> I can understand why this happens but nevertheless it is an annoying
> corner-case to consider when using the \%V atom, so I'd like to have
> this issue resolved.

Yes, at least the help should warn about this corner case. I ran into it
(and came up with the complex correct regexp) in some of my plugins, but
I think \%V is one of the more obscure atoms few people use. Probably
more users rely on the vis.vim plugin, which solves the problem in a
completely different way.

-- regards, ingo

Marcin Szamotulski

unread,
Jun 5, 2013, 5:20:19 AM6/5/13
to vim...@googlegroups.com
> --
> --
> You received this message from the "vim_dev" maillist.
> Do not top-post! Type your reply below the text you are replying to.
> For more information, visit http://www.vim.org/maillist.php
>
> ---
> You received this message because you are subscribed to the Google Groups "vim_dev" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to vim_dev+u...@googlegroups.com.
> For more options, visit https://groups.google.com/groups/opt_out.
>

And what about this pattern:
/\%Vfooba\%Vr

Thanks I did not know this pattern and it is very handy. I always felt
that the search commands /, ? should have a range, so that one can limit
the search to part of a text. What about having two commands :<range>/
and :<range>? for searches limited to the <range> of lines.

Best regards,
Marcin

Ingo Karkat

unread,
Jun 5, 2013, 4:53:22 AM6/5/13
to vim...@googlegroups.com
That's fine (but hard to read) for literal text, but difficult when you
have a regular expression with multiple branches. Thinking of custom
commands that take a pattern, it's close to impossible to parse the
regular expression to insert the \%V before the match of the last character.

> Thanks I did not know this pattern and it is very handy. I always felt
> that the search commands /, ? should have a range, so that one can limit
> the search to part of a text. What about having two commands :<range>/
> and :<range>? for searches limited to the <range> of lines.

You can use the range-search plugin
(http://www.vim.org/scripts/script.php?script_id=1396) for that.

-- regards, ingo

Bram Moolenaar

unread,
Jun 5, 2013, 6:43:28 AM6/5/13
to Christian Brabandt, vim...@googlegroups.com
It's not a bug, as others have mentioned. This can't be changed without
being incompatible.

Other items have the same problem, that you intuitively expect them to
match on the previous character. Thus only doing something for the
Visual area is not the right solution.

We could have some kind of modifier, apply the zero-width match on the
previous position. Could also have it for other matches, e.g. to check
if the previous character is "<". Currently patterns use \@<=, which is
very expensive (it tries many starting positions).

Perhaps we can use \@0<= for this. \@1<= doesn't do quite the same
thing, since it checks if the end of the match is the current position,
which won't work for zero-width matches.

--
From "know your smileys":
:-H Is missing teeth

Marcin Szamotulski

unread,
Jun 5, 2013, 8:40:56 AM6/5/13
to vim...@googlegroups.com
> > And what about this pattern:
> > /\%Vfooba\%Vr
>
> That's fine (but hard to read) for literal text, but difficult when you
> have a regular expression with multiple branches. Thinking of custom
> commands that take a pattern, it's close to impossible to parse the
> regular expression to insert the \%V before the match of the last character.
>
> > Thanks I did not know this pattern and it is very handy. I always felt
> > that the search commands /, ? should have a range, so that one can limit
> > the search to part of a text. What about having two commands :<range>/
> > and :<range>? for searches limited to the <range> of lines.
>
> You can use the range-search plugin
> (http://www.vim.org/scripts/script.php?script_id=1396) for that.
>
> -- regards, ingo
>

Thanks,
Marcin

David Fishburn

unread,
Jun 5, 2013, 7:52:35 AM6/5/13
to vim_dev
...
> Thanks I did not know this pattern and it is very handy.  I always felt
> that the search commands /, ? should have a range, so that one can limit
> the search to part of a text.  What about having two commands :<range>/
> and :<range>? for searches limited to the <range> of lines.

You can use the range-search plugin
(http://www.vim.org/scripts/script.php?script_id=1396) for that.

I use this one:
vis : Extended Visual Mode Commands, Substitutes, and Searches
By Dr. Chip

It creates a simple mapping // to do a range search over my highlighted region.  I find it quite convienent.

David

Andy Wokula

unread,
Jun 5, 2013, 7:56:29 AM6/5/13
to vim...@googlegroups.com
Am 05.06.2013 08:56, schrieb Ingo Karkat:
> On 05-Jun-2013 08:03 +0200, Christian Brabandt wrote:
>
>> [...] the old engine has an long standing bug with \%V ;(
>>
>> e.g:
>>
>> vim -u NONE -N
>> ifoobar<esc>
>> 0ve<esc>
>> /\%Vfoobar\%V
>>
>> I should have probably provided a fix long ago.
>
> Where is the bug, that there's no match?! That's intended, as the last
> \%V matches zero-width _after_ the end of "foobar", but the visual
> selection ends on the "r". You'd have to use /\%Vfooba\%Vr here. (The
> example at :help \%V is wrong about this corner case, too.)
>
> The general case is this ugly beast: /\%Vfoobar\%(\%V\|\%(\%V.\)\@<=\).

Keep it simple ...:
/\%Vfoobar\%(\%V.\)\@<=

(Any actual use case for the /ugly beast/?)

> Since restricting the match to inside the entire selection is such a
> common use case (see the related vis.vim plugin), I think it would be
> worthwhile to have a special atom (e.g. \%<V) that matches if the
> _preceding character_ is inside the selection, allowing this much nicer
> pattern: /\%Vfoobar\%<V

Looks like \%<V would be the same as \%(\%V.\)\@<=

> -- regards, ingo

--
Andy

Ingo Karkat

unread,
Jun 5, 2013, 8:27:09 AM6/5/13
to vim...@googlegroups.com
On 05-Jun-2013 13:56 +0200, Andy Wokula wrote:

> Am 05.06.2013 08:56, schrieb Ingo Karkat:
>> On 05-Jun-2013 08:03 +0200, Christian Brabandt wrote:
>>
>>> [...] the old engine has an long standing bug with \%V ;(
>>>
>>> e.g:
>>>
>>> vim -u NONE -N
>>> ifoobar<esc>
>>> 0ve<esc>
>>> /\%Vfoobar\%V
>>>
>>> I should have probably provided a fix long ago.
>>
>> Where is the bug, that there's no match?! That's intended, as the last
>> \%V matches zero-width _after_ the end of "foobar", but the visual
>> selection ends on the "r". You'd have to use /\%Vfooba\%Vr here. (The
>> example at :help \%V is wrong about this corner case, too.)
>>
>> The general case is this ugly beast: /\%Vfoobar\%(\%V\|\%(\%V.\)\@<=\).
>
> Keep it simple ...:
> /\%Vfoobar\%(\%V.\)\@<=

You're technically right, maybe it was irrational fear of bad
performance (from the help: "For speed it's often much better to avoid
this multi.") that made me introduce the branch for the common case.
Confession: I've never measured this, though.

> (Any actual use case for the /ugly beast/?)

As I've mentioned, when you don't have a hard-coded "foobar", but a
passed regular expression e.g. in a custom command.

>> Since restricting the match to inside the entire selection is such a
>> common use case (see the related vis.vim plugin), I think it would be
>> worthwhile to have a special atom (e.g. \%<V) that matches if the
>> _preceding character_ is inside the selection, allowing this much nicer
>> pattern: /\%Vfoobar\%<V
>
> Looks like \%<V would be the same as \%(\%V.\)\@<=

Right, but hopefully with better performance. The main benefit would be
that it's easier to remember and type. Bram has just suggested a generic
\@0<= elsewhere in this thread, but I think even that (\%V\@0<=) would
be too hard to remember.

-- regards, ingo

Bram Moolenaar

unread,
Jun 5, 2013, 1:02:52 PM6/5/13
to Ingo Karkat, vim...@googlegroups.com

Ingo Karkat wrote:

> On 05-Jun-2013 13:56 +0200, Andy Wokula wrote:
>
> > Am 05.06.2013 08:56, schrieb Ingo Karkat:
> >> On 05-Jun-2013 08:03 +0200, Christian Brabandt wrote:
> >>
> >>> [...] the old engine has an long standing bug with \%V ;(
> >>>
> >>> e.g:
> >>>
> >>> vim -u NONE -N
> >>> ifoobar<esc>
> >>> 0ve<esc>
> >>> /\%Vfoobar\%V
> >>>
> >>> I should have probably provided a fix long ago.
> >>
> >> Where is the bug, that there's no match?! That's intended, as the last
> >> \%V matches zero-width _after_ the end of "foobar", but the visual
> >> selection ends on the "r". You'd have to use /\%Vfooba\%Vr here. (The
> >> example at :help \%V is wrong about this corner case, too.)
> >>
> >> The general case is this ugly beast: /\%Vfoobar\%(\%V\|\%(\%V.\)\@<=\).
> >
> > Keep it simple ...:
> > /\%Vfoobar\%(\%V.\)\@<=
>
> You're technically right, maybe it was irrational fear of bad
> performance (from the help: "For speed it's often much better to avoid
> this multi.") that made me introduce the branch for the common case.
> Confession: I've never measured this, though.

It should be a lot faster with a byte length limit:

/\%Vfoobar\%(\%V.\)\@1<=


--
From "know your smileys":
(:-# Said something he shouldn't have

Ingo Karkat

unread,
Jun 5, 2013, 3:09:54 PM6/5/13
to vim...@googlegroups.com, Bram Moolenaar
Agreed; I want that syntax, too :-) But honestly, would you want to
write that interactively for each selected-limited search?! (It's fine
for write-once-use-often mappings and commands.) Something like
/\%Vfoobar\%<V
I would have a chance to remember.

-- regards, ingo
Reply all
Reply to author
Forward
0 new messages