At some point between Diffutils 2.8.7 and 2.9, the code that handles
binary files was removed from io.c. The changes below resurrect the
correct behavior. To accomplish this, I needed to add the binary-io
module, and make changes there as well. Patches are below.
* lib/binary-io.h (SET_BINARY) [O_BINARY]: Return the previous mode.
(UNSET_BINARY): New macro, to reset I/O to text mode.
(SET_BINARY, UNSET_BINARY): Include _WIN32 in the conditional for
MS systems.
* src/io.c: Include binary-io.h.
(sip, read_files): Switch file I/O to binary mode and back as
appropriate, to support binary files on systems that distinguish
between text and binary I/O.
/* Rotate an unsigned value to the left. */
#define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
@@ -110,12 +111,25 @@ sip (struct file_data *current, bool ski
if (! skip_test)
{
/* Check first part of file to see if it's a binary file. */
-
- /* FIXME: if O_BINARY, this should revert to text mode
- if the file is not binary. */
+ bool binary_file;
+ int prev_mode = SET_BINARY (current->desc);
file_block_read (current, current->bufsize);
- return binary_file_p (current->buffer, current->buffered);
+ binary_file = binary_file_p (current->buffer, current->buffered);
+ if (prev_mode != O_BINARY)
+ {
+ /* Revert to text mode and seek back to the beginning to
+ reread the file. Use relative seek, since file
+ descriptors like stdin might not start at offset
+ zero. */
+
+ if (lseek (current->desc, -current->buffered, SEEK_CUR) == -1)
+ pfatal_with_name (current->name);
+ (void) UNSET_BINARY (current->desc);
+ current->buffered = 0;
+ current->eof = false;
+ }
+ return binary_file;
}
}
@@ -761,7 +775,8 @@ read_files (struct file_data filevec[], }
if (appears_binary)
{
- /* FIXME: If O_BINARY, this should set both files to binary mode. */
+ (void) SET_BINARY (filevec[0].desc);
+ (void) SET_BINARY (filevec[1].desc);
return true;
}
/* SET_BINARY (fd);
- changes the file descriptor fd to perform binary I/O. */
+ changes the file descriptor fd to perform binary I/O, returns
+ the previous I/O mode.
+
+ UNSET_BINARY (fd);
+ changes the file descriptor fd to perform text I/O, returns
+ the previous I/O mode. */
#if O_BINARY
-# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__
+# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__ || defined _WIN32
# include <io.h> /* declares setmode() */
# else
# define setmode _setmode
@@ -40,13 +45,15 @@
/* Avoid putting stdin/stdout in binary mode if it is connected to
the console, because that would make it impossible for the user
to interrupt the program through Ctrl-C or Ctrl-Break. */
-# define SET_BINARY(fd) ((void) (!isatty (fd) ? (setmode (fd, O_BINARY), 0) : 0))
+# define SET_BINARY(fd) (!isatty (fd) ? setmode (fd, O_BINARY) : -1)
# else
-# define SET_BINARY(fd) ((void) setmode (fd, O_BINARY))
+# define SET_BINARY(fd) (setmode (fd, O_BINARY))
# endif
+# define UNSET_BINARY(fd) (setmode (fd, O_TEXT))
#else
/* On reasonable systems, binary I/O is the default. */
-# define SET_BINARY(fd) /* do nothing */ ((void) 0)
+# define SET_BINARY(fd) /* do nothing */ (O_BINARY)
+# define UNSET_BINARY(fd) /* do nothing */ (-1)
#endif
> Date: Sat, 12 May 2012 19:17:01 +0300
> From: Eli Zaretskii <e...@gnu.org>
> At some point between Diffutils 2.8.7 and 2.9, the code that handles
> binary files was removed from io.c. The changes below resurrect the
> correct behavior. To accomplish this, I needed to add the binary-io
> module, and make changes there as well. Patches are below.
Sorry, I found 2 subtle mistakes in the patches I sent. Please use
the ones below instead:
* lib/binary-io.h (SET_BINARY) [O_BINARY]: Return the previous mode.
(UNSET_BINARY): New macro, to reset I/O to text mode.
(SET_BINARY, UNSET_BINARY): Include _WIN32 in the conditional for
MS systems.
* src/io.c: Include binary-io.h.
(sip, read_files): Switch file I/O to binary mode and back as
appropriate, to support binary files on systems that distinguish
between text and binary I/O.
/* Rotate an unsigned value to the left. */
#define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
@@ -110,12 +111,25 @@ sip (struct file_data *current, bool ski
if (! skip_test)
{
/* Check first part of file to see if it's a binary file. */
-
- /* FIXME: if O_BINARY, this should revert to text mode
- if the file is not binary. */
+ bool binary_file;
+ int prev_mode = SET_BINARY (current->desc);
file_block_read (current, current->bufsize);
- return binary_file_p (current->buffer, current->buffered);
+ binary_file = binary_file_p (current->buffer, current->buffered);
+ if (prev_mode != O_BINARY)
+ {
+ /* Revert to text mode and seek back to the beginning to
+ reread the file. Use relative seek, since file
+ descriptors like stdin might not start at offset
+ zero. */
+
+ if (lseek (current->desc, -current->buffered, SEEK_CUR) == -1)
+ pfatal_with_name (current->name);
+ (void) UNSET_BINARY (current->desc);
+ current->buffered = 0;
+ current->eof = false;
+ }
+ return binary_file;
}
}
@@ -761,7 +775,8 @@ read_files (struct file_data filevec[], }
if (appears_binary)
{
- /* FIXME: If O_BINARY, this should set both files to binary mode. */
+ (void) SET_BINARY (filevec[0].desc);
+ (void) SET_BINARY (filevec[1].desc);
return true;
}
/* SET_BINARY (fd);
- changes the file descriptor fd to perform binary I/O. */
+ changes the file descriptor fd to perform binary I/O, returns
+ the previous I/O mode.
+
+ UNSET_BINARY (fd);
+ changes the file descriptor fd to perform text I/O, returns
+ the previous I/O mode. */
#if O_BINARY
-# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__
+# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__ || defined __MINGW32__
# include <io.h> /* declares setmode() */
# else
# define setmode _setmode
@@ -40,13 +45,15 @@
/* Avoid putting stdin/stdout in binary mode if it is connected to
the console, because that would make it impossible for the user
to interrupt the program through Ctrl-C or Ctrl-Break. */
-# define SET_BINARY(fd) ((void) (!isatty (fd) ? (setmode (fd, O_BINARY), 0) : 0))
+# define SET_BINARY(fd) (!isatty (fd) ? setmode (fd, O_BINARY) : O_BINARY)
# else
-# define SET_BINARY(fd) ((void) setmode (fd, O_BINARY))
+# define SET_BINARY(fd) (setmode (fd, O_BINARY))
# endif
+# define UNSET_BINARY(fd) (setmode (fd, O_TEXT))
#else
/* On reasonable systems, binary I/O is the default. */
-# define SET_BINARY(fd) /* do nothing */ ((void) 0)
+# define SET_BINARY(fd) /* do nothing */ (O_BINARY)
+# define UNSET_BINARY(fd) /* do nothing */ (-1)
#endif
> * lib/binary-io.h (SET_BINARY) [O_BINARY]: Return the previous mode.
> (UNSET_BINARY): New macro, to reset I/O to text mode.
> (SET_BINARY, UNSET_BINARY): Include _WIN32 in the conditional for
> MS systems. ^^^^^^
Eli Zaretskii wrote:
> At some point between Diffutils 2.8.7 and 2.9, the code that handles
> binary files was removed from io.c. The changes below resurrect the
> correct behavior. To accomplish this, I needed to add the binary-io
> module, and make changes there as well.
But regarding <binary-io.h>, I am unhappy about another macro UNSET_BINARY.
If you need
1. to reset the value to O_TEXT,
2. to get the previous value in return,
then I would suggest to use a function
int setmode (int fd, int o_mode);
exactly like in Cygwin, so that no code is needed on Cygwin.
If we had the chance to design a new API would prefer
bool set_binary_mode (int fd, bool binary);
but this interface would have no significant advantage over the well-known
Cygwin one.
> --- lib/binary-io.h~1 2012-05-12 17:31:43.639250000 +0300
> +++ lib/binary-io.h 2012-05-12 18:19:53.842375000 +0300
> -# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__
> +# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__ || defined _WIN32
> # include <io.h> /* declares setmode() */
> # else
> # define setmode _setmode
This change is wrong. The MSVC headers don't unconditionally declare
'setmode' and 'fileno'.
And, please, for consistency with the other codes, use
"defined _WIN32 || defined __WIN32__", not just "defined _WIN32".
Eli Zaretskii wrote:
> -# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__
> +# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__ || defined __MINGW32__
> # include <io.h> /* declares setmode() */
> # else
> # define setmode _setmode
What is the rationale/benefit for this proposed change?
> From: Bruno Haible <br...@clisp.org>
> Cc: bug-gnu-ut...@gnu.org
> Date: Sat, 12 May 2012 19:41:39 +0200
> But regarding <binary-io.h>, I am unhappy about another macro UNSET_BINARY.
> If you need
> 1. to reset the value to O_TEXT,
> 2. to get the previous value in return,
> then I would suggest to use a function
> int setmode (int fd, int o_mode);
> exactly like in Cygwin, so that no code is needed on Cygwin.
You mean, us it directly, not through a macro? But then why does
gnulib provide SET_BINARY as a macro? I'm probably missing something
here.
Using UNSET_BINARY allowed me to avoid any #ifdef's in the application
code. If I use setmode, then #ifdef's are necessary.
> If we had the chance to design a new API would prefer
> bool set_binary_mode (int fd, bool binary);
In fact, Diffutils 2.8.7 used precisely such an API.
> > -# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__
> > +# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__ || defined _WIN32
> > # include <io.h> /* declares setmode() */
> > # else
> > # define setmode _setmode
> This change is wrong. The MSVC headers don't unconditionally declare
> 'setmode' and 'fileno'.
Yes, that was my mistake. I changed this to __MINGW32__ in a
followup (MinGW does have io.h that declares 'setmode').
> And, please, for consistency with the other codes, use
> "defined _WIN32 || defined __WIN32__", not just "defined _WIN32".
I think this is a moot point, since _WIN32 will be replaced by
__MINGW32__.
> From: Bruno Haible <br...@clisp.org>
> Cc: bug-gnu-ut...@gnu.org
> Date: Sat, 12 May 2012 19:47:54 +0200
> Eli Zaretskii wrote:
> > -# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__
> > +# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__ || defined __MINGW32__
> > # include <io.h> /* declares setmode() */
> > # else
> > # define setmode _setmode
> What is the rationale/benefit for this proposed change?
MinGW provides 'setmode' whose declaration is in io.h.
Eli Zaretskii wrote:
> > Eli Zaretskii wrote:
> > > -# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__
> > > +# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__ || defined __MINGW32__
> > > # include <io.h> /* declares setmode() */
> > > # else
> > > # define setmode _setmode
> > What is the rationale/benefit for this proposed change?
> MinGW provides 'setmode' whose declaration is in io.h.
But mingw's declaration of 'fileno' in stdio.h is not always enabled
(only if !defined _NO_OLDNAMES). Therefore the #else branch is needed
on mingw.
> From: Bruno Haible <br...@clisp.org>
> Cc: bug-gnu...@gnu.org, bug-gnu-ut...@gnu.org
> Date: Sat, 12 May 2012 20:58:46 +0200
> Eli Zaretskii wrote:
> > > Eli Zaretskii wrote:
> > > > -# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__
> > > > +# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__ || defined __MINGW32__
> > > > # include <io.h> /* declares setmode() */
> > > > # else
> > > > # define setmode _setmode
> > > What is the rationale/benefit for this proposed change?
> > MinGW provides 'setmode' whose declaration is in io.h.
> But mingw's declaration of 'fileno' in stdio.h is not always enabled
> (only if !defined _NO_OLDNAMES).
_NO_OLDNAMES is undefined by default.
> Therefore the #else branch is needed on mingw.
That's okay, but then io.h should be included anyway, because it
provides the prototype of _setmode as well. According to MSDN, the
same is true for MSVC.
* lib/binary-io.h [O_BINARY]: Include io.h on all systems that
have non-zero O_BINARY.
(O_TEXT) [!O_BINARY]: Define if not defined.
(setmode) [!O_BINARY]: Define to do nothing and return O_BINARY.
* src/io.c: Include binary-io.h.
(sip, read_files): Switch file I/O to binary mode and back as
appropriate, to support binary files on systems that distinguish
between text and binary I/O.
/* Rotate an unsigned value to the left. */
#define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
@@ -110,12 +111,25 @@ sip (struct file_data *current, bool ski
if (! skip_test)
{
/* Check first part of file to see if it's a binary file. */
-
- /* FIXME: if O_BINARY, this should revert to text mode
- if the file is not binary. */
+ bool binary_file;
+ int prev_mode = setmode (current->desc, O_BINARY);
file_block_read (current, current->bufsize);
- return binary_file_p (current->buffer, current->buffered);
+ binary_file = binary_file_p (current->buffer, current->buffered);
+ if (prev_mode != O_BINARY)
+ {
+ /* Revert to text mode and seek back to the beginning to
+ reread the file. Use relative seek, since file
+ descriptors like stdin might not start at offset
+ zero. */
+
+ if (lseek (current->desc, -current->buffered, SEEK_CUR) == -1)
+ pfatal_with_name (current->name);
+ (void) setmode (current->desc, O_TEXT);
+ current->buffered = 0;
+ current->eof = false;
+ }
+ return binary_file;
}
}
@@ -761,7 +775,8 @@ read_files (struct file_data filevec[], }
if (appears_binary)
{
- /* FIXME: If O_BINARY, this should set both files to binary mode. */
+ (void) setmode (filevec[0].desc, O_BINARY);
+ (void) setmode (filevec[1].desc, O_BINARY);
return true;
}
--- lib/binary-io.h~1 2012-05-12 17:31:43.639250000 +0300
+++ lib/binary-io.h 2012-05-12 22:50:07.217375000 +0300
@@ -28,9 +28,8 @@
/* SET_BINARY (fd);
changes the file descriptor fd to perform binary I/O. */
#if O_BINARY
-# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__
-# include <io.h> /* declares setmode() */
-# else
+# include <io.h> /* declares setmode() */
+# if !defined __EMX__ && !defined __DJGPP__ && !defined __CYGWIN__
# define setmode _setmode
# undef fileno
# define fileno _fileno
@@ -46,7 +45,14 @@
# endif
#else
/* On reasonable systems, binary I/O is the default. */
+# ifndef O_BINARY
+# define O_BINARY 0
+# endif
+# ifndef O_TEXT
+# define O_TEXT 0
+# endif
# define SET_BINARY(fd) /* do nothing */ ((void) 0)
+# define setmode(fd,m) (O_BINARY)
#endif
Eli Zaretskii wrote:
> > > > > -# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__
> > > > > +# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__ || defined __MINGW32__
> > > > > # include <io.h> /* declares setmode() */
> > > > > # else
> > > > > # define setmode _setmode
> > > > What is the rationale/benefit for this proposed change?
> > > MinGW provides 'setmode' whose declaration is in io.h.
> > But mingw's declaration of 'fileno' in stdio.h is not always enabled
> > (only if !defined _NO_OLDNAMES).
> _NO_OLDNAMES is undefined by default.
But it may be defined.
> > Therefore the #else branch is needed on mingw.
> That's okay, but then io.h should be included anyway, because it
> provides the prototype of _setmode as well. According to MSDN, the
> same is true for MSVC.
<io.h> is not needed here. When I add a use of setmode() to the unit test,
with the existing code, I do not get a warning about _setmode() being
undeclared - neither with mingw nor with MSVC. (Maybe this is because
<fcntl.h> already includes <io.h>.)
There is no need to change the code to #include a non-POSIX include file.
binary-io: Define setmode function.
* lib/binary-io.h (setmode): Define if not defined by the platform.
(SET_BINARY): Define in terms of setmode always.
* tests/test-binary-io.c (main): Accept an argument, and test either
setmode or SET_BINARY depending on the argument.
* tests/test-binary-io.sh: Invoke test-binary-io twice, with an argument.
Clean up also t-bin-out0.tmp.
--- lib/binary-io.h.orig Sat May 12 23:23:39 2012
+++ lib/binary-io.h Sat May 12 22:23:04 2012
@@ -25,8 +25,9 @@
so we include it here first. */
#include <stdio.h>
-/* SET_BINARY (fd);
- changes the file descriptor fd to perform binary I/O. */
+/* setmode (fd, mode)
+ sets the binary/text I/O mode of file descriptor fd to the given mode
+ (must be O_BINARY or O_TEXT) and returns the previous mode. */
#if O_BINARY
# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__
# include <io.h> /* declares setmode() */
@@ -35,18 +36,21 @@
# undef fileno
# define fileno _fileno
# endif
-# ifdef __DJGPP__
-# include <unistd.h> /* declares isatty() */
- /* Avoid putting stdin/stdout in binary mode if it is connected to
- the console, because that would make it impossible for the user
- to interrupt the program through Ctrl-C or Ctrl-Break. */
-# define SET_BINARY(fd) ((void) (!isatty (fd) ? (setmode (fd, O_BINARY), 0) : 0))
-# else
-# define SET_BINARY(fd) ((void) setmode (fd, O_BINARY))
-# endif
#else
- /* On reasonable systems, binary I/O is the default. */
-# define SET_BINARY(fd) /* do nothing */ ((void) 0)
+ /* On reasonable systems, binary I/O is the only choice. */
+# define setmode(fd, mode) ((void) (fd), (void) (mode), O_BINARY)
+#endif
+
+/* SET_BINARY (fd);
+ changes the file descriptor fd to perform binary I/O. */
+#ifdef __DJGPP__
+# include <unistd.h> /* declares isatty() */
+ /* Avoid putting stdin/stdout in binary mode if it is connected to
+ the console, because that would make it impossible for the user
+ to interrupt the program through Ctrl-C or Ctrl-Break. */
+# define SET_BINARY(fd) ((void) (!isatty (fd) ? (setmode (fd, O_BINARY), 0) : 0))
+#else
+# define SET_BINARY(fd) ((void) setmode (fd, O_BINARY))
#endif
#endif /* _BINARY_H */
--- tests/test-binary-io.c.orig Sat May 12 23:23:39 2012
+++ tests/test-binary-io.c Sat May 12 22:27:33 2012
@@ -30,26 +30,40 @@
#include "macros.h"
int
-main ()
+main (int argc, char *argv[])
{
/* Test the O_BINARY macro. */
{
int fd =
- open ("t-bin-out2.tmp", O_CREAT | O_TRUNC | O_RDWR | O_BINARY, 0600);
+ open ("t-bin-out0.tmp", O_CREAT | O_TRUNC | O_RDWR | O_BINARY, 0600);
if (write (fd, "Hello\n", 6) < 0)
exit (1);
close (fd);
}
{
struct stat statbuf;
- if (stat ("t-bin-out2.tmp", &statbuf) < 0)
+ if (stat ("t-bin-out0.tmp", &statbuf) < 0)
exit (1);
ASSERT (statbuf.st_size == 6);
}
- /* Test the SET_BINARY macro. */
- SET_BINARY (1);
- fputs ("Hello\n", stdout);
+ switch (argv[1][0])
+ {
+ case '1':
+ /* Test the setmode() function. */
+ setmode (1, O_BINARY);
+ fputs ("Hello\n", stdout);
+ break;
+
+ case '2':
+ /* Test the SET_BINARY macro. */
+ SET_BINARY (1);
+ fputs ("Hello\n", stdout);
+ break;
+
+ default:
+ break;
+ }
return 0;
}
--- tests/test-binary-io.sh.orig Sat May 12 23:23:39 2012
+++ tests/test-binary-io.sh Sat May 12 22:28:28 2012
@@ -3,9 +3,11 @@
tmpfiles=""
trap 'rm -fr $tmpfiles' 1 2 3 15
That would clash with the setmode function defined
in <unistd.h> in FreeBSD etc., which is partly why we
removed this stuff from diffutils.
I agree that it'd be nicer to have a function
with an additional argument. But I'd rather
not use a name like 'setmode' that clashes with BSD.
'set_binary_mode' would be OK, but wouldn't it
be cleaner to use fcntl? The standard way to
set and clear O_* flags is fcntl, so shouldn't
it look like this?
Paul Eggert wrote:
> > I would suggest to use a function
> > int setmode (int fd, int o_mode);
> That would clash with the setmode function defined
> in <unistd.h> in FreeBSD etc.
Thanks for the heads-up. Indeed MacOS X, FreeBSD, NetBSD, Minix all have
the same kind of getmode/setmode functions.
> 'set_binary_mode' would be OK
Good. New proposal attached below.
> but wouldn't it
> be cleaner to use fcntl? The standard way to
> set and clear O_* flags is fcntl, so shouldn't
> it look like this?
> flags = fcntl (fd, F_GETFL);
This can't be easily implemented on native Windows. In particular, there
is no way to retrieve the O_APPEND flag. And the read/write flags are
hard to retrieve without making system calls as well.
binary-io: Define set_binary_mode function.
* lib/binary-io.h (set_binary_mode): New function.
(SET_BINARY): Define in terms of set_binary_mode.
* tests/test-binary-io.c (main): Accept an argument, and test either
set_binary_mode or SET_BINARY depending on the argument.
* tests/test-binary-io.sh: Invoke test-binary-io twice, with an argument.
Clean up also t-bin-out0.tmp.
--- lib/binary-io.h.orig Sun May 13 03:58:11 2012
+++ lib/binary-io.h Sun May 13 03:57:27 2012
@@ -25,28 +25,33 @@
so we include it here first. */
#include <stdio.h>
-/* SET_BINARY (fd);
- changes the file descriptor fd to perform binary I/O. */
+/* set_binary_mode (fd, mode)
+ sets the binary/text I/O mode of file descriptor fd to the given mode
+ (must be O_BINARY or O_TEXT) and returns the previous mode. */
#if O_BINARY
# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__
# include <io.h> /* declares setmode() */
+# define set_binary_mode setmode
# else
-# define setmode _setmode
+# define set_binary_mode _setmode
# undef fileno
# define fileno _fileno
# endif
-# ifdef __DJGPP__
-# include <unistd.h> /* declares isatty() */
- /* Avoid putting stdin/stdout in binary mode if it is connected to
- the console, because that would make it impossible for the user
- to interrupt the program through Ctrl-C or Ctrl-Break. */
-# define SET_BINARY(fd) ((void) (!isatty (fd) ? (setmode (fd, O_BINARY), 0) : 0))
-# else
-# define SET_BINARY(fd) ((void) setmode (fd, O_BINARY))
-# endif
#else
- /* On reasonable systems, binary I/O is the default. */
-# define SET_BINARY(fd) /* do nothing */ ((void) 0)
+ /* On reasonable systems, binary I/O is the only choice. */
+# define set_binary_mode(fd, mode) ((void) (fd), (void) (mode), O_BINARY)
+#endif
+
+/* SET_BINARY (fd);
+ changes the file descriptor fd to perform binary I/O. */
+#ifdef __DJGPP__
+# include <unistd.h> /* declares isatty() */
+ /* Avoid putting stdin/stdout in binary mode if it is connected to
+ the console, because that would make it impossible for the user
+ to interrupt the program through Ctrl-C or Ctrl-Break. */
+# define SET_BINARY(fd) ((void) (!isatty (fd) ? (set_binary_mode (fd, O_BINARY), 0) : 0))
+#else
+# define SET_BINARY(fd) ((void) set_binary_mode (fd, O_BINARY))
#endif
#endif /* _BINARY_H */
--- tests/test-binary-io.c.orig Sun May 13 03:58:11 2012
+++ tests/test-binary-io.c Sun May 13 03:56:50 2012
@@ -30,26 +30,40 @@
#include "macros.h"
int
-main ()
+main (int argc, char *argv[])
{
/* Test the O_BINARY macro. */
{
int fd =
- open ("t-bin-out2.tmp", O_CREAT | O_TRUNC | O_RDWR | O_BINARY, 0600);
+ open ("t-bin-out0.tmp", O_CREAT | O_TRUNC | O_RDWR | O_BINARY, 0600);
if (write (fd, "Hello\n", 6) < 0)
exit (1);
close (fd);
}
{
struct stat statbuf;
- if (stat ("t-bin-out2.tmp", &statbuf) < 0)
+ if (stat ("t-bin-out0.tmp", &statbuf) < 0)
exit (1);
ASSERT (statbuf.st_size == 6);
}
- /* Test the SET_BINARY macro. */
- SET_BINARY (1);
- fputs ("Hello\n", stdout);
+ switch (argv[1][0])
+ {
+ case '1':
+ /* Test the set_binary_mode() function. */
+ set_binary_mode (1, O_BINARY);
+ fputs ("Hello\n", stdout);
+ break;
+
+ case '2':
+ /* Test the SET_BINARY macro. */
+ SET_BINARY (1);
+ fputs ("Hello\n", stdout);
+ break;
+
+ default:
+ break;
+ }
return 0;
}
--- tests/test-binary-io.sh.orig Sun May 13 03:58:11 2012
+++ tests/test-binary-io.sh Sat May 12 22:28:28 2012
@@ -3,9 +3,11 @@
tmpfiles=""
trap 'rm -fr $tmpfiles' 1 2 3 15
* src/io.c: Include binary-io.h.
(sip, read_files): Switch file I/O to binary mode and back as
appropriate, to support binary files on systems that distinguish
between text and binary I/O.
/* Rotate an unsigned value to the left. */
#define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
@@ -110,12 +111,25 @@ sip (struct file_data *current, bool ski
if (! skip_test)
{
/* Check first part of file to see if it's a binary file. */
-
- /* FIXME: if O_BINARY, this should revert to text mode
- if the file is not binary. */
+ bool binary_file;
+ int prev_mode = set_binary_mode (current->desc, O_BINARY);
file_block_read (current, current->bufsize);
- return binary_file_p (current->buffer, current->buffered);
+ binary_file = binary_file_p (current->buffer, current->buffered);
+ if (prev_mode != O_BINARY)
+ {
+ /* Revert to text mode and seek back to the beginning to
+ reread the file. Use relative seek, since file
+ descriptors like stdin might not start at offset
+ zero. */
+
+ if (lseek (current->desc, -current->buffered, SEEK_CUR) == -1)
+ pfatal_with_name (current->name);
+ (void) set_binary_mode (current->desc, O_TEXT);
+ current->buffered = 0;
+ current->eof = false;
+ }
+ return binary_file;
}
}
@@ -761,7 +775,8 @@ read_files (struct file_data filevec[], }
if (appears_binary)
{
- /* FIXME: If O_BINARY, this should set both files to binary mode. */
+ (void) set_binary_mode (filevec[0].desc, O_BINARY);
+ (void) set_binary_mode (filevec[1].desc, O_BINARY);
return true;
}
OK. Since I don't want all callers to use '(void)' casts, I'm making it
a function, like you say. An inline function, since I don't like to penalize
the compiled code on GNU systems for such no-ops.
Bruno
2012-05-13 Bruno Haible <br...@clisp.org>
Paul Eggert <egg...@cs.ucla.edu>
binary-io: Define set_binary_mode function.
* lib/binary-io.h (set_binary_mode): New function.
(SET_BINARY): Define in terms of set_binary_mode.
* modules/binary-io (configure.ac): Require AC_C_INLINE.
* tests/test-binary-io.c (main): Accept an argument, and test either
set_binary_mode or SET_BINARY depending on the argument.
* tests/test-binary-io.sh: Invoke test-binary-io twice, with an
argument. Clean up also t-bin-out0.tmp.
--- lib/binary-io.h.orig Sun May 13 22:54:11 2012
+++ lib/binary-io.h Sun May 13 22:51:50 2012
@@ -25,28 +25,41 @@
so we include it here first. */
#include <stdio.h>
-/* SET_BINARY (fd);
- changes the file descriptor fd to perform binary I/O. */
+/* set_binary_mode (fd, mode)
+ sets the binary/text I/O mode of file descriptor fd to the given mode
+ (must be O_BINARY or O_TEXT) and returns the previous mode. */
#if O_BINARY
# if defined __EMX__ || defined __DJGPP__ || defined __CYGWIN__
# include <io.h> /* declares setmode() */
+# define set_binary_mode setmode
# else
-# define setmode _setmode
+# define set_binary_mode _setmode
# undef fileno
# define fileno _fileno
# endif
-# ifdef __DJGPP__
-# include <unistd.h> /* declares isatty() */
- /* Avoid putting stdin/stdout in binary mode if it is connected to
- the console, because that would make it impossible for the user
- to interrupt the program through Ctrl-C or Ctrl-Break. */
-# define SET_BINARY(fd) ((void) (!isatty (fd) ? (setmode (fd, O_BINARY), 0) : 0))
-# else
-# define SET_BINARY(fd) ((void) setmode (fd, O_BINARY))
-# endif
#else
- /* On reasonable systems, binary I/O is the default. */
-# define SET_BINARY(fd) /* do nothing */ ((void) 0)
+ /* On reasonable systems, binary I/O is the only choice. */
+ /* Use an inline function rather than a macro, to avoid gcc warnings
+ "warning: statement with no effect". */
+static inline int
+set_binary_mode (int fd, int mode)
+{
+ (void) fd;
+ (void) mode;
+ return O_BINARY;
+}
+#endif
+
+/* SET_BINARY (fd);
+ changes the file descriptor fd to perform binary I/O. */
+#ifdef __DJGPP__
+# include <unistd.h> /* declares isatty() */
+ /* Avoid putting stdin/stdout in binary mode if it is connected to
+ the console, because that would make it impossible for the user
+ to interrupt the program through Ctrl-C or Ctrl-Break. */
+# define SET_BINARY(fd) ((void) (!isatty (fd) ? (set_binary_mode (fd, O_BINARY), 0) : 0))
+#else
+# define SET_BINARY(fd) ((void) set_binary_mode (fd, O_BINARY))
#endif
#endif /* _BINARY_H */
--- modules/binary-io.orig Sun May 13 22:54:11 2012
+++ modules/binary-io Sun May 13 22:47:52 2012
@@ -8,6 +8,7 @@
fcntl-h
configure.ac:
+AC_REQUIRE([AC_C_INLINE])
Makefile.am:
lib_SOURCES += binary-io.h
--- tests/test-binary-io.c.orig Sun May 13 22:54:12 2012
+++ tests/test-binary-io.c Sun May 13 03:56:50 2012
@@ -30,26 +30,40 @@
#include "macros.h"
int
-main ()
+main (int argc, char *argv[])
{
/* Test the O_BINARY macro. */
{
int fd =
- open ("t-bin-out2.tmp", O_CREAT | O_TRUNC | O_RDWR | O_BINARY, 0600);
+ open ("t-bin-out0.tmp", O_CREAT | O_TRUNC | O_RDWR | O_BINARY, 0600);
if (write (fd, "Hello\n", 6) < 0)
exit (1);
close (fd);
}
{
struct stat statbuf;
- if (stat ("t-bin-out2.tmp", &statbuf) < 0)
+ if (stat ("t-bin-out0.tmp", &statbuf) < 0)
exit (1);
ASSERT (statbuf.st_size == 6);
}
- /* Test the SET_BINARY macro. */
- SET_BINARY (1);
- fputs ("Hello\n", stdout);
+ switch (argv[1][0])
+ {
+ case '1':
+ /* Test the set_binary_mode() function. */
+ set_binary_mode (1, O_BINARY);
+ fputs ("Hello\n", stdout);
+ break;
+
+ case '2':
+ /* Test the SET_BINARY macro. */
+ SET_BINARY (1);
+ fputs ("Hello\n", stdout);
+ break;
+
+ default:
+ break;
+ }
return 0;
}
--- tests/test-binary-io.sh.orig Sun May 13 22:54:12 2012
+++ tests/test-binary-io.sh Sat May 12 22:28:28 2012
@@ -3,9 +3,11 @@
tmpfiles=""
trap 'rm -fr $tmpfiles' 1 2 3 15
Thanks for the diffutils patch. We're a bit better off with something
even closer to the pre-2006 approach, which took care to convert
size_t to off_t before negating it, as that is important on hosts where
sizeof (size_t) < sizeof (off_t). Also, it's cleaner to use set_binary_mode
uniformly rather than set_binary_mode sometimes and SET_BINARY others.
Here's a revised patch that I hope catches all the issues. I plan to
push it after Bruno's binary-io.h patches are pushed.
Use binary mode when testing for binary files.
This reverts the 2006-01-05 change and modernizes to the current API.
Idea suggested by Eli Zaretskii in:
http://lists.gnu.org/archive/html/bug-gnu-utils/2012-05/msg00066.html * src/cmp.c (main):
* src/diff.c (main, compare_files):
Use set_binary_mode rather than SET_BINARY.
* src/diff.c (compare_files): Omit unnecessary use of O_BINARY.
* src/io.c (sip): Sample unknown files in binary mode, to see
whether they are binary.
(read_files): Read binary files in binary mode.
diff --git a/src/cmp.c b/src/cmp.c
index 1387ec1..32b25e6 100644
--- a/src/cmp.c
+++ b/src/cmp.c
@@ -293,7 +293,7 @@ main (int argc, char **argv)
{
file_desc[f1] = STDIN_FILENO;
if (O_BINARY && ! isatty (STDIN_FILENO))
- SET_BINARY (STDIN_FILENO);
+ set_binary_mode (STDIN_FILENO, O_BINARY);
}
else
file_desc[f1] = open (file[f1], O_RDONLY | O_BINARY, 0);
diff --git a/src/diff.c b/src/diff.c
index b316afe..79a4726 100644
--- a/src/diff.c
+++ b/src/diff.c
@@ -524,7 +524,7 @@ main (int argc, char **argv)
#if O_BINARY
binary = true;
if (! isatty (STDOUT_FILENO))
- SET_BINARY (STDOUT_FILENO);
+ set_binary_mode (STDOUT_FILENO, O_BINARY);
#endif
break;
@@ -1111,8 +1111,8 @@ compare_files (struct comparison const *parent,
else if (STREQ (cmp.file[f].name, "-"))
{
cmp.file[f].desc = STDIN_FILENO;
- if (O_BINARY && binary && ! isatty (STDIN_FILENO))
- SET_BINARY (STDIN_FILENO);
+ if (binary && ! isatty (STDIN_FILENO))
+ set_binary_mode (STDIN_FILENO, O_BINARY);
if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0)
cmp.file[f].desc = ERRNO_ENCODE (errno);
else
diff --git a/src/io.c b/src/io.c
index 5a631a5..3abffb5 100644
--- a/src/io.c
+++ b/src/io.c
@@ -19,6 +19,7 @@
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include "diff.h"
+#include <binary-io.h>
#include <cmpbuf.h>
#include <file-type.h>
#include <xalloc.h>
@@ -111,11 +112,24 @@ sip (struct file_data *current, bool skip_test)
{
/* Check first part of file to see if it's a binary file. */
- /* FIXME: if O_BINARY, this should revert to text mode
- if the file is not binary. */
-
+ int prev_mode = set_binary_mode (current->desc, O_BINARY);
+ off_t buffered;
file_block_read (current, current->bufsize);
- return binary_file_p (current->buffer, current->buffered);
+ buffered = current->buffered;
+
+ if (prev_mode != O_BINARY)
+ {
+ /* Revert to text mode and seek back to the start to reread
+ the file. Use relative seek, since file descriptors
+ like stdin might not start at offset zero. */
+ if (lseek (current->desc, - buffered, SEEK_CUR) < 0)
+ pfatal_with_name (current->name);
+ set_binary_mode (current->desc, prev_mode);
+ current->buffered = 0;
+ current->eof = false;
+ }
+
+ return binary_file_p (current->buffer, buffered);
}
}
@@ -761,7 +775,8 @@ read_files (struct file_data filevec[], bool pretend_binary)
}
if (appears_binary)
{
- /* FIXME: If O_BINARY, this should set both files to binary mode. */
+ set_binary_mode (filevec[0].desc, O_BINARY);
+ set_binary_mode (filevec[1].desc, O_BINARY);
return true;
}
> On 05/12/2012 10:41 AM, Bruno Haible wrote:
>> I would suggest to use a function
>> int setmode (int fd, int o_mode);
> That would clash with the setmode function defined
> in <unistd.h> in FreeBSD etc., which is partly why we
> removed this stuff from diffutils.
> I agree that it'd be nicer to have a function
> with an additional argument. But I'd rather
> not use a name like 'setmode' that clashes with BSD.
> 'set_binary_mode' would be OK, but wouldn't it
> be cleaner to use fcntl? The standard way to
> set and clear O_* flags is fcntl, so shouldn't
> it look like this?
No, because cygwin doesn't do it that way. The only way to change
between O_BINARY and O_TEXT on cygwin is via setmode(), and it is not
worth wrapping fcntl() to make that an alternate interface.
-- Eric Blake ebl...@redhat.com +1-919-301-3266
Libvirt virtualization library http://libvirt.org
> Date: Sun, 13 May 2012 19:37:33 -0700
> From: Paul Eggert <egg...@cs.ucla.edu>
> CC: bug-gnu...@gnu.org, bug-gnu-ut...@gnu.org
> Thanks for the diffutils patch. We're a bit better off with something
> even closer to the pre-2006 approach, which took care to convert
> size_t to off_t before negating it, as that is important on hosts where
> sizeof (size_t) < sizeof (off_t). Also, it's cleaner to use set_binary_mode
> uniformly rather than set_binary_mode sometimes and SET_BINARY others.
> Here's a revised patch that I hope catches all the issues. I plan to
> push it after Bruno's binary-io.h patches are pushed.