Patch 9.0.0795

Bram Moolenaar

unread,

Oct 19, 2022, 9:03:25 AM10/19/22

to vim...@googlegroups.com

Patch 9.0.0795
Problem: readblob() always reads the whole file.
Solution: Add arguments to read part of the file. (Ken Takata,
closes #11402)
Files: runtime/doc/builtin.txt, src/blob.c, src/proto/blob.pro,
src/evalfunc.c, src/filepath.c, src/testdir/test_blob.vim

*** ../vim-9.0.0794/runtime/doc/builtin.txt 2022-10-11 21:51:09.962103580 +0100
--- runtime/doc/builtin.txt 2022-10-19 13:39:21.057744387 +0100
***************
*** 445,451 ****
rand([{expr}]) Number get pseudo-random number
range({expr} [, {max} [, {stride}]])
List items from {expr} to {max}
! readblob({fname}) Blob read a |Blob| from {fname}
readdir({dir} [, {expr} [, {dict}]])
List file names in {dir} selected by {expr}
readdirex({dir} [, {expr} [, {dict}]])
--- 445,452 ----
rand([{expr}]) Number get pseudo-random number
range({expr} [, {max} [, {stride}]])
List items from {expr} to {max}
! readblob({fname} [, {offset} [, {size}]])
! Blob read a |Blob| from {fname}
readdir({dir} [, {expr} [, {dict}]])
List file names in {dir} selected by {expr}
readdirex({dir} [, {expr} [, {dict}]])
***************
*** 6846,6855 ****
GetExpr()->range()
<

! readblob({fname}) *readblob()*
Read file {fname} in binary mode and return a |Blob|.
When the file can't be opened an error message is given and
the result is an empty |Blob|.
Also see |readfile()| and |writefile()|.

--- 6848,6868 ----
GetExpr()->range()
<

! readblob({fname} [, {offset} [, {size}]]) *readblob()*
Read file {fname} in binary mode and return a |Blob|.
+ If {offset} is specified, read the file from the specified
+ offset. If it is a negative value, it is used as an offset
+ from the end of the file. E.g., to read the last 12 bytes: >
+ readblob('file.bin', -12)
+ < If {size} is specified, only the specified size will be read.
+ E.g. to read the first 100 bytes of a file: >
+ readblob('file.bin', 0, 100)
+ < If {size} is -1 or omitted, the whole data starting from
+ {offset} will be read.
When the file can't be opened an error message is given and
the result is an empty |Blob|.
+ When trying to read bytes beyond the end of the file the
+ result is an empty blob.
Also see |readfile()| and |writefile()|.

*** ../vim-9.0.0794/src/blob.c 2022-09-28 16:16:10.256335629 +0100
--- src/blob.c 2022-10-19 13:51:50.733479740 +0100
***************
*** 182,203 ****
}

/*
! * Read "blob" from file "fd".
* Return OK or FAIL.
*/
int
! read_blob(FILE *fd, blob_T *blob)
{
struct stat st;

if (fstat(fileno(fd), &st) < 0)
return FAIL;
! if (ga_grow(&blob->bv_ga, st.st_size) == FAIL)
! return FAIL;
! blob->bv_ga.ga_len = st.st_size;
if (fread(blob->bv_ga.ga_data, 1, blob->bv_ga.ga_len, fd)
< (size_t)blob->bv_ga.ga_len)
return FAIL;
return OK;
}

--- 182,233 ----
}

/*
! * Read blob from file "fd".
! * Caller has allocated a blob in "rettv".
* Return OK or FAIL.
*/
int
! read_blob(FILE *fd, typval_T *rettv, off_T offset, off_T size_arg)
{
+ blob_T *blob = rettv->vval.v_blob;
struct stat st;
+ int whence;
+ off_T size = size_arg;

if (fstat(fileno(fd), &st) < 0)
+ return FAIL; // can't read the file, error
+
+ if (offset >= 0)
+ {
+ if (size == -1)
+ // size may become negative, checked below
+ size = st.st_size - offset;
+ whence = SEEK_SET;
+ }
+ else
+ {
+ if (size == -1)
+ size = -offset;
+ whence = SEEK_END;
+ }
+ // Trying to read bytes that aren't there results in an empty blob, not an
+ // error.
+ if (size < 0 || size > st.st_size)
+ return OK;
+ if (vim_fseek(fd, offset, whence) != 0)
+ return OK;
+
+ if (ga_grow(&blob->bv_ga, (int)size) == FAIL)
return FAIL;
! blob->bv_ga.ga_len = (int)size;
if (fread(blob->bv_ga.ga_data, 1, blob->bv_ga.ga_len, fd)
< (size_t)blob->bv_ga.ga_len)
+ {
+ // An empty blob is returned on error.
+ blob_free(rettv->vval.v_blob);
+ rettv->vval.v_blob = NULL;
return FAIL;
+ }
return OK;
}

*** ../vim-9.0.0794/src/proto/blob.pro 2022-09-22 17:06:56.299037474 +0100
--- src/proto/blob.pro 2022-10-19 13:47:18.125570201 +0100
***************
*** 10,16 ****
void blob_set(blob_T *blob, int idx, int byte);
void blob_set_append(blob_T *blob, int idx, int byte);
int blob_equal(blob_T *b1, blob_T *b2);
! int read_blob(FILE *fd, blob_T *blob);
int write_blob(FILE *fd, blob_T *blob);
char_u *blob2string(blob_T *blob, char_u **tofree, char_u *numbuf);
blob_T *string2blob(char_u *str);
--- 10,16 ----
void blob_set(blob_T *blob, int idx, int byte);
void blob_set_append(blob_T *blob, int idx, int byte);
int blob_equal(blob_T *b1, blob_T *b2);
! int read_blob(FILE *fd, typval_T *rettv, off_T offset, off_T size);
int write_blob(FILE *fd, blob_T *blob);
char_u *blob2string(blob_T *blob, char_u **tofree, char_u *numbuf);
blob_T *string2blob(char_u *str);
*** ../vim-9.0.0794/src/evalfunc.c 2022-10-13 22:12:07.164673822 +0100
--- src/evalfunc.c 2022-10-19 13:32:22.113929551 +0100
***************
*** 1078,1083 ****
--- 1078,1084 ----
static argcheck_T arg3_string_any_string[] = {arg_string, NULL, arg_string};
static argcheck_T arg3_string_bool_bool[] = {arg_string, arg_bool, arg_bool};
static argcheck_T arg3_string_number_bool[] = {arg_string, arg_number, arg_bool};
+ static argcheck_T arg3_string_number_number[] = {arg_string, arg_number, arg_number};
static argcheck_T arg3_string_or_dict_bool_dict[] = {arg_string_or_dict_any, arg_bool, arg_dict_any};
static argcheck_T arg3_string_string_bool[] = {arg_string, arg_string, arg_bool};
static argcheck_T arg3_string_string_dict[] = {arg_string, arg_string, arg_dict_any};
***************
*** 2339,2345 ****
ret_number, f_rand},
{"range", 1, 3, FEARG_1, arg3_number,
ret_list_number, f_range},
! {"readblob", 1, 1, FEARG_1, arg1_string,
ret_blob, f_readblob},
{"readdir", 1, 3, FEARG_1, arg3_string_any_dict,
ret_list_string, f_readdir},
--- 2340,2346 ----
ret_number, f_rand},
{"range", 1, 3, FEARG_1, arg3_number,
ret_list_number, f_range},
! {"readblob", 1, 3, FEARG_1, arg3_string_number_number,
ret_blob, f_readblob},
{"readdir", 1, 3, FEARG_1, arg3_string_any_dict,
ret_list_string, f_readdir},
*** ../vim-9.0.0794/src/filepath.c 2022-10-08 12:52:04.317689786 +0100
--- src/filepath.c 2022-10-19 13:47:30.741565924 +0100
***************
*** 1792,1807 ****
long cnt = 0;
char_u *p; // position in buf
char_u *start; // start of current line

if (argvars[1].v_type != VAR_UNKNOWN)
{
! if (STRCMP(tv_get_string(&argvars[1]), "b") == 0)
! binary = TRUE;
! if (STRCMP(tv_get_string(&argvars[1]), "B") == 0)
! blob = TRUE;

! if (argvars[2].v_type != VAR_UNKNOWN)
! maxline = (long)tv_get_number(&argvars[2]);
}

if ((blob ? rettv_blob_alloc(rettv) : rettv_list_alloc(rettv)) == FAIL)
--- 1792,1818 ----
long cnt = 0;
char_u *p; // position in buf
char_u *start; // start of current line
+ off_T offset = 0;
+ off_T size = -1;

if (argvars[1].v_type != VAR_UNKNOWN)
{
! if (always_blob)
! {
! offset = (off_T)tv_get_number(&argvars[1]);
! if (argvars[2].v_type != VAR_UNKNOWN)
! size = (off_T)tv_get_number(&argvars[2]);
! }
! else
! {
! if (STRCMP(tv_get_string(&argvars[1]), "b") == 0)
! binary = TRUE;
! if (STRCMP(tv_get_string(&argvars[1]), "B") == 0)
! blob = TRUE;

! if (argvars[2].v_type != VAR_UNKNOWN)
! maxline = (long)tv_get_number(&argvars[2]);
! }
}

if ((blob ? rettv_blob_alloc(rettv) : rettv_list_alloc(rettv)) == FAIL)
***************
*** 1818,1836 ****
}
if (*fname == NUL || (fd = mch_fopen((char *)fname, READBIN)) == NULL)
{
! semsg(_(e_cant_open_file_str), *fname == NUL ? (char_u *)_("<empty>") : fname);
return;
}

if (blob)
{
! if (read_blob(fd, rettv->vval.v_blob) == FAIL)
! {
semsg(_(e_cant_read_file_str), fname);
- // An empty blob is returned on error.
- blob_free(rettv->vval.v_blob);
- rettv->vval.v_blob = NULL;
- }
fclose(fd);
return;
}
--- 1829,1843 ----
}
if (*fname == NUL || (fd = mch_fopen((char *)fname, READBIN)) == NULL)
{
! semsg(_(e_cant_open_file_str),
! *fname == NUL ? (char_u *)_("<empty>") : fname);
return;
}

if (blob)
{
! if (read_blob(fd, rettv, offset, size) == FAIL)
semsg(_(e_cant_read_file_str), fname);
fclose(fd);
return;
}
***************
*** 2007,2013 ****
void
f_readblob(typval_T *argvars, typval_T *rettv)
{
! if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
return;

read_file_or_blob(argvars, rettv, TRUE);
--- 2014,2024 ----
void
f_readblob(typval_T *argvars, typval_T *rettv)
{
! if (in_vim9script()
! && (check_for_string_arg(argvars, 0) == FAIL
! || check_for_opt_number_arg(argvars, 1) == FAIL
! || (argvars[1].v_type != VAR_UNKNOWN
! && check_for_opt_number_arg(argvars, 2) == FAIL)))
return;

read_file_or_blob(argvars, rettv, TRUE);
*** ../vim-9.0.0794/src/testdir/test_blob.vim 2022-09-17 21:07:52.103993150 +0100
--- src/testdir/test_blob.vim 2022-10-19 14:01:35.185295062 +0100
***************
*** 488,497 ****
--- 488,516 ----
call writefile(b, 'Xblob')
VAR br = readfile('Xblob', 'B')
call assert_equal(b, br)
+ VAR br2 = readblob('Xblob')
+ call assert_equal(b, br2)
+ VAR br3 = readblob('Xblob', 1)
+ call assert_equal(b[1 :], br3)
+ VAR br4 = readblob('Xblob', 1, 2)
+ call assert_equal(b[1 : 2], br4)
+ VAR br5 = readblob('Xblob', -3)
+ call assert_equal(b[-3 :], br5)
+ VAR br6 = readblob('Xblob', -3, 2)
+ call assert_equal(b[-3 : -2], br6)
+
+ VAR br1e = readblob('Xblob', 10000)
+ call assert_equal(0z, br1e)
+ VAR br2e = readblob('Xblob', -10000)
+ call assert_equal(0z, br2e)
+
call delete('Xblob')
END
call v9.CheckLegacyAndVim9Success(lines)

+ call assert_fails("call readblob('notexist')", 'E484:')
+ " TODO: How do we test for the E485 error?
+
" This was crashing when calling readfile() with a directory.
call assert_fails("call readfile('.', 'B')", 'E17: "." is a directory')
endfunc
*** ../vim-9.0.0794/src/version.c 2022-10-19 13:06:58.032690097 +0100
--- src/version.c 2022-10-19 13:34:02.001883334 +0100
***************
*** 697,698 ****
--- 697,700 ----
{ /* Add new patch number below this line */
+ /**/
+ 795,
/**/

--
TERRY GILLIAM PLAYED: PATSY (ARTHUR'S TRUSTY STEED), THE GREEN KNIGHT
SOOTHSAYER, BRIDGEKEEPER, SIR GAWAIN (THE FIRST TO BE
KILLED BY THE RABBIT)
"Monty Python and the Holy Grail" PYTHON (MONTY) PICTURES LTD

/// Bram Moolenaar -- Br...@Moolenaar.net -- http://www.Moolenaar.net \\\
/// \\\
\\\ sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ ///
\\\ help me help AIDS victims -- http://ICCF-Holland.org ///

Jürgen Krämer

unread,

Oct 21, 2022, 4:06:14 AM10/21/22

to vim...@googlegroups.com

Hi,

Bram Moolenaar schrieb am 19.10.2022 um 15:03:
[...]

>
> ! readblob({fname} [, {offset} [, {size}]]) *readblob()*
> Read file {fname} in binary mode and return a |Blob|.
> + If {offset} is specified, read the file from the specified
> + offset. If it is a negative value, it is used as an offset
> + from the end of the file. E.g., to read the last 12 bytes: >
> + readblob('file.bin', -12)
> + < If {size} is specified, only the specified size will be read.
> + E.g. to read the first 100 bytes of a file: >
> + readblob('file.bin', 0, 100)
> + < If {size} is -1 or omitted, the whole data starting from
> + {offset} will be read.
> When the file can't be opened an error message is given and
> the result is an empty |Blob|.
> + When trying to read bytes beyond the end of the file the
> + result is an empty blob.
> Also see |readfile()| and |writefile()|.

I think the second to last sentence needs a clarification. What happens if
reading starts inside the file, but the number of bytes to read would result
in reading beyond the end of file? For example, in the given example

readblob('file.bin', 0, 100)

what would the result be if the file is only 80 bytes large? An empty blob
or a blob with 80 bytes? A constructed, but similar example

readblob('file.bin', -10, function_that_might_return_a_number_larger_than_10())

Does the resulting blob contain 10 bytes if the function returns a number larger
than 10 or is the blob empty in this case?

(I would prefer readblob() to return as many bytes as possible and only to return
an empty blob if the reading *starts* beyond the end of the file. Currently I am
at a computer where I cannot update to the newest version, so I could not test
the actual behavior.)

Regards,
Jürgen

--
~
~
~
:wq

Bram Moolenaar

unread,

Oct 21, 2022, 6:26:17 AM10/21/22

to vim...@googlegroups.com

Yeah, it is simpler if readblob() returns what it has, instead of
returning nothing if you ask for one byte too many.

For example, if you want to get the last 100 bytes of a file, and it
turns out the file is shorter, you can get the whole file. If you
really needed 100 bytes you can easily check the length of the blob.

With the current behavior you would first have to get the size of the
file and compute the arguments yourself.

--
System administrators are just like women: You can't live with them and you
can't live without them.

Reply all

Reply to author

Forward