Patch 8.2.4695
Problem: JSON encoding could be faster.
Solution: Optimize encoding JSON strings. (closes #10086)
Files: src/json.c, src/testdir/test_json.vim
*** ../vim-8.2.4694/src/json.c 2022-04-04 15:16:50.738014123 +0100
--- src/json.c 2022-04-05 15:02:26.659250519 +0100
***************
*** 114,150 ****
}
#endif
static void
write_string(garray_T *gap, char_u *str)
{
char_u *res = str;
char_u numbuf[NUMBUFLEN];
if (res == NULL)
- ga_concat(gap, (char_u *)"\"\"");
- else
{
! #if defined(USE_ICONV)
! vimconv_T conv;
! char_u *converted = NULL;
! if (!enc_utf8)
! {
! // Convert the text from 'encoding' to utf-8, the JSON string is
! // always utf-8.
! conv.vc_type = CONV_NONE;
! convert_setup(&conv, p_enc, (char_u*)"utf-8");
! if (conv.vc_type != CONV_NONE)
! converted = res = string_convert(&conv, res, NULL);
! convert_setup(&conv, NULL, NULL);
! }
#endif
! ga_append(gap, '"');
! while (*res != NUL)
{
! int c;
! // always use utf-8 encoding, ignore 'encoding'
! c = utf_ptr2char(res);
switch (c)
{
--- 114,185 ----
}
#endif
+ /*
+ * Lookup table to quickly know if the given ASCII character must be escaped.
+ */
+ static const char ascii_needs_escape[128] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x0.
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x1.
+ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x2.
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x3.
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4.
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x5.
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x6.
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7.
+ };
+
+ /*
+ * Encode the utf-8 encoded string "str" into "gap".
+ */
static void
write_string(garray_T *gap, char_u *str)
{
char_u *res = str;
char_u numbuf[NUMBUFLEN];
+ char_u *from;
+ #if defined(USE_ICONV)
+ vimconv_T conv;
+ char_u *converted = NULL;
+ #endif
+ int c;
if (res == NULL)
{
! ga_concat(gap, (char_u *)"\"\"");
! return;
! }
! #if defined(USE_ICONV)
! if (!enc_utf8)
! {
! // Convert the text from 'encoding' to utf-8, because a JSON string is
! // always utf-8.
! conv.vc_type = CONV_NONE;
! convert_setup(&conv, p_enc, (char_u*)"utf-8");
! if (conv.vc_type != CONV_NONE)
! converted = res = string_convert(&conv, res, NULL);
! convert_setup(&conv, NULL, NULL);
! }
#endif
! ga_append(gap, '"');
! // `from` is the beginning of a sequence of bytes we can directly copy from
! // the input string, avoiding the overhead associated to decoding/encoding
! // them.
! from = res;
! while ((c = *res) != NUL)
! {
! // always use utf-8 encoding, ignore 'encoding'
! if (c < 0x80)
{
! if (!ascii_needs_escape[c])
! {
! res += 1;
! continue;
! }
!
! if (res != from)
! ga_concat_len(gap, from, res - from);
! from = res + 1;
switch (c)
{
***************
*** 164,188 ****
ga_append(gap, c);
break;
default:
! if (c >= 0x20)
! {
! numbuf[utf_char2bytes(c, numbuf)] = NUL;
! ga_concat(gap, numbuf);
! }
! else
! {
! vim_snprintf((char *)numbuf, NUMBUFLEN,
! "\\u%04lx", (long)c);
! ga_concat(gap, numbuf);
! }
}
! res += utf_ptr2len(res);
}
! ga_append(gap, '"');
#if defined(USE_ICONV)
! vim_free(converted);
#endif
- }
}
/*
--- 199,241 ----
ga_append(gap, c);
break;
default:
! vim_snprintf((char *)numbuf, NUMBUFLEN, "\\u%04lx",
! (long)c);
! ga_concat(gap, numbuf);
}
!
! res += 1;
}
! else
! {
! int l = utf_ptr2len(res);
!
! if (l > 1)
! {
! res += l;
! continue;
! }
!
! // Invalid utf-8 sequence, replace it with the Unicode replacement
! // character U+FFFD.
! if (res != from)
! ga_concat_len(gap, from, res - from);
! from = res + 1;
!
! numbuf[utf_char2bytes(0xFFFD, numbuf)] = NUL;
! ga_concat(gap, numbuf);
!
! res += l;
! }
! }
!
! if (res != from)
! ga_concat_len(gap, from, res - from);
!
! ga_append(gap, '"');
#if defined(USE_ICONV)
! vim_free(converted);
#endif
}
/*
*** ../vim-8.2.4694/src/testdir/test_json.vim 2021-02-08 20:53:05.592963320 +0000
--- src/testdir/test_json.vim 2022-04-05 14:55:22.151639261 +0100
***************
*** 107,112 ****
--- 107,115 ----
call assert_equal('"café"', json_encode("caf\xe9"))
let &encoding = save_encoding
+ " Invalid utf-8 sequences are replaced with U+FFFD (replacement character)
+ call assert_equal('"foo' . "\ufffd" . '"', json_encode("foo\xAB"))
+
call assert_fails('echo json_encode(function("tr"))', 'E1161: Cannot json encode a func')
call assert_fails('echo json_encode([function("tr")])', 'E1161: Cannot json encode a func')
*** ../vim-8.2.4694/src/version.c 2022-04-05 15:07:08.210791582 +0100
--- src/version.c 2022-04-05 14:56:43.963566990 +0100
***************
*** 748,749 ****
--- 748,751 ----
{ /* Add new patch number below this line */
+ /**/
+ 4695,
/**/
--
There are only two hard things in programming: Cache invalidation,
naming things and off-by-one errors.
/// Bram Moolenaar -- Br...@Moolenaar.net --
http://www.Moolenaar.net \\\
/// \\\
\\\ sponsor Vim, vote for features --
http://www.Vim.org/sponsor/ ///
\\\ help me help AIDS victims --
http://ICCF-Holland.org ///