[PATCH, maint] setup: make sure git_dir path is in a permanent buffer, getenv(3) case

18 views
Skip to first unread message

Kirill Smelkov

unread,
Nov 11, 2010, 1:08:23 PM11/11/10
to Junio C Hamano, g...@vger.kernel.org, msy...@googlegroups.com, Kirill Smelkov, Jonathan Nieder
getenv(3) returns not-permanent buffer which may be changed by e.g.
putenv(3) call (*).

In practice I've noticed this when trying to do `git commit -m abc`
inside msysgit under wine, getting

$ git commit -m abc
fatal: could not open 'DIR=.git/COMMIT_EDITMSG': No such file or directory
^^^^
(notice introduced 'DIR=' artifact.)

The problem was showing itself only with -m option, and actually, as
debugging showed, originally

git_dir = getenv("GIT_DIR")

returned pointer to

"GIT_DIR=.git\0"
^
git_dir

, we stored it in git_dir, than, after processing -m git-commit option,
we did setenv("GIT_EDITOR", ":") which as (*) says changed environment
variables memory layout - something like this

"...\0GIT_DIR=.git\0"
^
git_dir

and oops - we got wrong git_dir.

Avoid that by strdupping getenv("GIT_DIR") result like we did in 06f354
(setup: make sure git dir path is in a permanent buffer). Unfortunately
this also shows that other getenv usage inside git needs auditing...

(*) from man 3 getenv:

The implementation of getenv() is not required to be reentrant. The
string pointed to by the return value of getenv() may be statically
allocated, and can be modified by a subsequent call to getenv(),
putenv(3), setenv(3), or unsetenv(3).

Cc: Jonathan Nieder <jrni...@gmail.com>
Signed-off-by: Kirill Smelkov <ki...@mns.spb.ru>
---
environment.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/environment.c b/environment.c
index eaf908b..d5021e8 100644
--- a/environment.c
+++ b/environment.c
@@ -88,6 +88,7 @@ const char * const local_repo_env[LOCAL_REPO_ENV_SIZE + 1] = {
static void setup_git_env(void)
{
git_dir = getenv(GIT_DIR_ENVIRONMENT);
+ git_dir = git_dir ? xstrdup(git_dir) : NULL;
if (!git_dir) {
git_dir = read_gitfile_gently(DEFAULT_GIT_DIR_ENVIRONMENT);
git_dir = git_dir ? xstrdup(git_dir) : NULL;
--
1.7.3.2.161.g3089c

Jonathan Nieder

unread,
Nov 11, 2010, 1:17:28 PM11/11/10
to Kirill Smelkov, Junio C Hamano, g...@vger.kernel.org, msy...@googlegroups.com
Kirill Smelkov wrote:

> getenv(3) returns not-permanent buffer which may be changed by e.g.
> putenv(3) call (*).

Yikes. Thanks for the example.

> --- a/environment.c
> +++ b/environment.c
> @@ -88,6 +88,7 @@ const char * const local_repo_env[LOCAL_REPO_ENV_SIZE + 1] = {
> static void setup_git_env(void)
> {
> git_dir = getenv(GIT_DIR_ENVIRONMENT);
> + git_dir = git_dir ? xstrdup(git_dir) : NULL;
> if (!git_dir) {
> git_dir = read_gitfile_gently(DEFAULT_GIT_DIR_ENVIRONMENT);
> git_dir = git_dir ? xstrdup(git_dir) : NULL;

Maybe we can avoid (some) repetition like this?

diff --git a/environment.c b/environment.c
index de5581f..942f1e4 100644
--- a/environment.c
+++ b/environment.c
@@ -87,25 +87,31 @@ const char * const local_repo_env[LOCAL_REPO_ENV_SIZE + 1] = {


static void setup_git_env(void)
{
git_dir = getenv(GIT_DIR_ENVIRONMENT);

- if (!git_dir) {
- git_dir = read_gitfile_gently(DEFAULT_GIT_DIR_ENVIRONMENT);
- git_dir = git_dir ? xstrdup(git_dir) : NULL;
- }
if (!git_dir)
+ git_dir = read_gitfile_gently(DEFAULT_GIT_DIR_ENVIRONMENT);
+ if (git_dir)
+ git_dir = xstrdup(git_dir);
+ else
git_dir = DEFAULT_GIT_DIR_ENVIRONMENT;
+
git_object_dir = getenv(DB_ENVIRONMENT);
- if (!git_object_dir) {
- git_object_dir = xmalloc(strlen(git_dir) + 9);
- sprintf(git_object_dir, "%s/objects", git_dir);
- }
+ if (git_object_dir)
+ git_object_dir = xstrdup(git_object_dir);
+ else
+ git_object_dir = git_pathdup("objects");
+
git_index_file = getenv(INDEX_ENVIRONMENT);
- if (!git_index_file) {
- git_index_file = xmalloc(strlen(git_dir) + 7);
- sprintf(git_index_file, "%s/index", git_dir);
- }
+ if (git_index_file)
+ git_index_file = xstrdup(git_index_file);
+ else
+ git_index_file = git_pathdup("index");
+
git_graft_file = getenv(GRAFT_ENVIRONMENT);
- if (!git_graft_file)
+ if (git_graft_file)
+ git_graft_file = xstrdup(git_graft_file);
+ else
git_graft_file = git_pathdup("info/grafts");
+
if (getenv(NO_REPLACE_OBJECTS_ENVIRONMENT))
read_replace_refs = 0;
}

Kirill Smelkov

unread,
Nov 12, 2010, 9:03:29 AM11/12/10
to Jonathan Nieder, Junio C Hamano, g...@vger.kernel.org, msy...@googlegroups.com
On Thu, Nov 11, 2010 at 12:17:28PM -0600, Jonathan Nieder wrote:
> Kirill Smelkov wrote:
>
> > getenv(3) returns not-permanent buffer which may be changed by e.g.
> > putenv(3) call (*).
>
> Yikes. Thanks for the example.

Nevermind. However it was not so fun to debug :)

To me it gets hairy and we don't cover all and even most getenv cases.
Look e.g. in commit.c:

static void determine_author_info(void)
{
char *name, *email, *date;

name = getenv("GIT_AUTHOR_NAME");
email = getenv("GIT_AUTHOR_EMAIL");
date = getenv("GIT_AUTHOR_DATE");

/* ... */

if (signoff) {
struct strbuf sob = STRBUF_INIT;
int i;

strbuf_addstr(&sob, sign_off_header);
strbuf_addstr(&sob, fmt_name(getenv("GIT_COMMITTER_NAME"),
getenv("GIT_COMMITTER_EMAIL")));

/* ... */

notes.c:

struct notes_rewrite_cfg *init_copy_notes_for_rewrite(const char *cmd)
{
struct notes_rewrite_cfg *c = xmalloc(sizeof(struct notes_rewrite_cfg));
const char *rewrite_mode_env = getenv(GIT_NOTES_REWRITE_MODE_ENVIRONMENT);
const char *rewrite_refs_env = getenv(GIT_NOTES_REWRITE_REF_ENVIRONMENT);


editor.c:

const char *git_editor(void)
{
const char *editor = getenv("GIT_EDITOR");
const char *terminal = getenv("TERM");

http-backend.c:

static void run_service(const char **argv)
{
const char *encoding = getenv("HTTP_CONTENT_ENCODING");
const char *user = getenv("REMOTE_USER");
const char *host = getenv("REMOTE_ADDR");


etc...


To me, it's very unfortunate that subsequent getenv() could overwrite
previous getenv() result, but according to `man 3 getenv` all these
places are buggy.

Maybe we'll need something like our own xgetenv() which will keep vars
in some kind of hash tab so that get/put on other vars do not interfere
with what was originally returned by xgetenv().

I don't know.

Unfortunately I can't afford myself to dive into all this, so please
choose what you like more.


Thanks,
Kirill

Jonathan Nieder

unread,
Nov 12, 2010, 11:03:32 AM11/12/10
to Kirill Smelkov, Junio C Hamano, g...@vger.kernel.org, msy...@googlegroups.com
Kirill Smelkov wrote:

> static void run_service(const char **argv)
> {
> const char *encoding = getenv("HTTP_CONTENT_ENCODING");
> const char *user = getenv("REMOTE_USER");
> const char *host = getenv("REMOTE_ADDR");
>
>
> etc...
>
>
> To me, it's very unfortunate that subsequent getenv() could overwrite
> previous getenv() result, but according to `man 3 getenv` all these
> places are buggy.

Right, but do we know of any platforms that work that way currently?
We could make getenv() rotate between a few buffers on such platforms
(probably 10 or so would take care of the longest runs).

> Maybe we'll need something like our own xgetenv() which will keep vars
> in some kind of hash tab so that get/put on other vars do not interfere
> with what was originally returned by xgetenv().

For examples that store the result like you pointed out (which store the
result from getenv), something like that would be needed if we want
them to work on platforms where putenv shifts everything.

> Unfortunately I can't afford myself to dive into all this, so please
> choose what you like more.

I think we ought to fix this properly in the end. But if you want a
quick workaround, maybe the vcs-svn/string_pool lib could help you.

Hope that helps,
Jonathan

Kirill Smelkov

unread,
Nov 12, 2010, 12:20:28 PM11/12/10
to Jonathan Nieder, Junio C Hamano, g...@vger.kernel.org, msy...@googlegroups.com
On Fri, Nov 12, 2010 at 10:03:32AM -0600, Jonathan Nieder wrote:
> Kirill Smelkov wrote:
>
> > static void run_service(const char **argv)
> > {
> > const char *encoding = getenv("HTTP_CONTENT_ENCODING");
> > const char *user = getenv("REMOTE_USER");
> > const char *host = getenv("REMOTE_ADDR");
> >
> >
> > etc...
> >
> >
> > To me, it's very unfortunate that subsequent getenv() could overwrite
> > previous getenv() result, but according to `man 3 getenv` all these
> > places are buggy.
>
> Right, but do we know of any platforms that work that way currently?

I don't. Actually I was really surprised after reading getenv manual
about that.

> We could make getenv() rotate between a few buffers on such platforms
> (probably 10 or so would take care of the longest runs).

I think it would be hard to get right (is 10 enough? on which platform?
this rarely happens after all...), and also why introduce special case?

> > Maybe we'll need something like our own xgetenv() which will keep vars
> > in some kind of hash tab so that get/put on other vars do not interfere
> > with what was originally returned by xgetenv().
>
> For examples that store the result like you pointed out (which store the
> result from getenv), something like that would be needed if we want
> them to work on platforms where putenv shifts everything.
>
> > Unfortunately I can't afford myself to dive into all this, so please
> > choose what you like more.
>
> I think we ought to fix this properly in the end. But if you want a
> quick workaround, maybe the vcs-svn/string_pool lib could help you.

No, I'm not in a hurry - better to fix this properly. Though personally,
I've already scratched my itch here.


Thanks,
Kirill

Jonathan Nieder

unread,
Nov 12, 2010, 1:59:49 PM11/12/10
to Kirill Smelkov, Junio C Hamano, g...@vger.kernel.org, msy...@googlegroups.com
Kirill Smelkov wrote:
> On Fri, Nov 12, 2010 at 10:03:32AM -0600, Jonathan Nieder wrote:

>> Right, but do we know of any platforms that work that way currently?
>
> I don't. Actually I was really surprised after reading getenv manual
> about that.

Here's an artificial one. If someone shows up interested in cleaning
the getenv() usage, something like this could make it easier to
maintain the result.

Before then, it provides a chance to see how invasive the changes
would need to be to support such a theoretical unfriendly platform.
The results don't look so good.

> No, I'm not in a hurry - better to fix this properly. Though personally,
> I've already scratched my itch here.

Thanks for reporting.

-- 8< --
Subject: add GETENV_POISON option to simulate unfriendly getenv()

Traditionally, getenv() returns a pointer into the environment structure,
and on typical platforms the pointed-to value remains valid until that
environment variable gets a new value.

On some platforms (e.g., wine), unfortunately, it does not remain valid
even after an unrelated setenv() call.

The standard even allows getenv to return its result in a static buffer
(meaning it would not remain valid after another getenv() call). So if
we want to be maximally portable, we should always copy the return
value from getenv() before fetching another value from the environment.

This patch adds a GETENV_POISON option to demonstrate how hard that
would be. When GETENV_POISON is set, getenv is replaced with a wrapper
that clobbers its old return value after each call, in the hope that
broken callers might notice.

Signed-off-by: Jonathan Nieder <jrni...@gmail.com>
---
Makefile | 4 ++++
cache.h | 20 --------------------
compat/getenv-poison.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
git-compat-util.h | 25 +++++++++++++++++++++++++
4 files changed, 73 insertions(+), 20 deletions(-)
create mode 100644 compat/getenv-poison.c

diff --git a/Makefile b/Makefile
index 1f1ce04..e16d10e 100644
--- a/Makefile
+++ b/Makefile
@@ -1443,6 +1443,10 @@ ifdef INTERNAL_QSORT
COMPAT_CFLAGS += -DINTERNAL_QSORT
COMPAT_OBJS += compat/qsort.o
endif
+ifdef GETENV_POISON
+ COMPAT_CFLAGS += -DGETENV_POISON
+ COMPAT_OBJS += compat/getenv-poison.o
+endif
ifdef RUNTIME_PREFIX
COMPAT_CFLAGS += -DRUNTIME_PREFIX
endif
diff --git a/cache.h b/cache.h
index 33decd9..574dc8f 100644
--- a/cache.h
+++ b/cache.h
@@ -438,26 +438,6 @@ extern void verify_non_filename(const char *prefix, const char *name);

extern int init_db(const char *template_dir, unsigned int flags);

-#define alloc_nr(x) (((x)+16)*3/2)
-
-/*
- * Realloc the buffer pointed at by variable 'x' so that it can hold
- * at least 'nr' entries; the number of entries currently allocated
- * is 'alloc', using the standard growing factor alloc_nr() macro.
- *
- * DO NOT USE any expression with side-effect for 'x', 'nr', or 'alloc'.
- */
-#define ALLOC_GROW(x, nr, alloc) \
- do { \
- if ((nr) > alloc) { \
- if (alloc_nr(alloc) < (nr)) \
- alloc = (nr); \
- else \
- alloc = alloc_nr(alloc); \
- x = xrealloc((x), alloc * sizeof(*(x))); \
- } \
- } while (0)
-
/* Initialize and use the cache information */
extern int read_index(struct index_state *);
extern int read_index_preload(struct index_state *, const char **pathspec);
diff --git a/compat/getenv-poison.c b/compat/getenv-poison.c
new file mode 100644
index 0000000..a88ec85
--- /dev/null
+++ b/compat/getenv-poison.c
@@ -0,0 +1,44 @@
+/*
+ * getenv(3) says:
+ * The implementation of getenv() is not required to be reentrant.
+ * The string pointed to by the return value of getenv() may be
+ * statically allocated, and can be modified by a subsequent call
+ * to getenv(), putenv(3), setenv(3), or unsetenv(3).
+ *
+ * This file provides an unpleasant but conformant getenv()
+ * implementation, for tests.
+ */
+#include "../git-compat-util.h"
+#undef getenv
+
+static void poison_buffer(char *buf, size_t buflen)
+{
+ if (!buflen)
+ return;
+ memset(buf, '\xa5', buflen - 1);
+ buf[buflen - 1] = '\0';
+}
+
+static void fill_buffer(char **buf, size_t *alloc, const char *str)
+{
+ size_t len = strlen(str) + 1;
+ ALLOC_GROW(*buf, len, *alloc);
+ memcpy(*buf, str, len);
+}
+
+char *gitgetenv(const char *name)
+{
+ static char *envvar_array[2];
+ static size_t envvar_len[2];
+ static unsigned int index;
+ const char *value;
+
+ poison_buffer(envvar_array[index], envvar_len[index]);
+ index = (index + 1) % 2;
+
+ value = getenv(name);
+ if (!value)
+ return NULL;
+ fill_buffer(&envvar_array[index], &envvar_len[index], value);
+ return envvar_array[index];
+}
diff --git a/git-compat-util.h b/git-compat-util.h
index 2af8d3e..1f6a2ce 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -298,6 +298,11 @@ extern ssize_t read_in_full(int fd, void *buf, size_t count);
extern int gitsetenv(const char *, const char *, int);
#endif

+#ifdef GETENV_POISON
+#define getenv gitgetenv
+extern char *gitgetenv(const char *name);
+#endif
+
#ifdef NO_MKDTEMP
#define mkdtemp gitmkdtemp
extern char *gitmkdtemp(char *);
@@ -421,6 +426,26 @@ static inline int has_extension(const char *filename, const char *ext)
return len > extlen && !memcmp(filename + len - extlen, ext, extlen);
}

+#define alloc_nr(x) (((x)+16)*3/2)
+
+/*
+ * Realloc the buffer pointed at by variable 'x' so that it can hold
+ * at least 'nr' entries; the number of entries currently allocated
+ * is 'alloc', using the standard growing factor alloc_nr() macro.
+ *
+ * DO NOT USE any expression with side-effect for 'x', 'nr', or 'alloc'.
+ */
+#define ALLOC_GROW(x, nr, alloc) \
+ do { \
+ if ((nr) > alloc) { \
+ if (alloc_nr(alloc) < (nr)) \
+ alloc = (nr); \
+ else \
+ alloc = alloc_nr(alloc); \
+ x = xrealloc((x), alloc * sizeof(*(x))); \
+ } \
+ } while (0)
+
/* Sane ctype - no locale, and works with signed chars */
#undef isascii
#undef isspace
--
1.7.2.3.557.gab647.dirty

Reply all
Reply to author
Forward
0 new messages