By running this implementation on Moe's repo structure:
mkdir bummer && cd bummer; for ((i=0;i<100;i++)); do
mkdir $i && pushd $i;
for ((j=0;j<1000;j++)); do echo "$j" >$j; done;
popd;
done
We see the following speedups:
git add .
-------------------
old: 00:00:23(.087)
new: 00:00:21(.512) 1.07x
git status
-------------------
old: 00:00:03(.306)
new: 00:00:01(.684) 1.96x
git clean -dxf
-------------------
old: 00:00:01(.918)
new: 00:00:00(.295) 6.50x
Signed-off-by: Marius Storm-Olsen <mar...@trolltech.com>
---
It would be nice if MinGW/Windows people would give this a thorough
testing to ensure that's it's pristine. It seems fine, and I've not
stumbled over anything myself.
Of course, if you have status.showUntrackedFiles = no, then you'll
not get any speedups, since the read_directory_recursive loop is
never entered. People with a standard setup, however, should
experience a significant speedup.
compat/mingw.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/mingw.h | 28 ++++++++++++++++++++++++++
2 files changed, 87 insertions(+), 0 deletions(-)
diff --git a/compat/mingw.c b/compat/mingw.c
index 2839d9d..f52de3e 100644
--- a/compat/mingw.c
+++ b/compat/mingw.c
@@ -1139,3 +1139,62 @@ int link(const char *oldpath, const char *newpath)
}
return 0;
}
+
+#ifndef NO_MINGW_REPLACE_READDIR
+/* MinGW readdir implementation to avoid extra lstats for Git */
+struct mingw_DIR
+{
+ struct _finddata_t dd_dta; /* disk transfer area for this dir */
+ struct mingw_dirent dd_dir; /* Our own implementation, including d_type */
+ long dd_handle; /* _findnext handle */
+ int dd_stat; /* 0 = next entry to read is first entry, -1 = off the end, positive = 0 based index of next entry */
+ char dd_name[1]; /* given path for dir with search pattern (struct is extended) */
+};
+
+struct dirent *mingw_readdir(DIR *dir)
+{
+ WIN32_FIND_DATAA buf;
+ HANDLE handle;
+ struct mingw_DIR *mdir = (struct mingw_DIR*)dir;
+
+ if (!dir->dd_handle) {
+ errno = EBADF; /* No set_errno for mingw */
+ return NULL;
+ }
+
+ if (dir->dd_handle == (long)INVALID_HANDLE_VALUE && dir->dd_stat == 0)
+ {
+ handle = FindFirstFileA(dir->dd_name, &buf);
+ DWORD lasterr = GetLastError();
+ dir->dd_handle = (long)handle;
+ if (handle == INVALID_HANDLE_VALUE && (lasterr != ERROR_NO_MORE_FILES)) {
+ errno = err_win_to_posix(lasterr);
+ return NULL;
+ }
+ } else if (dir->dd_handle == (long)INVALID_HANDLE_VALUE) {
+ return NULL;
+ } else if (!FindNextFileA((HANDLE)dir->dd_handle, &buf)) {
+ DWORD lasterr = GetLastError();
+ FindClose((HANDLE)dir->dd_handle);
+ dir->dd_handle = (long)INVALID_HANDLE_VALUE;
+ /* POSIX says you shouldn't set errno when readdir can't
+ find any more files; so, if another error we leave it set. */
+ if (lasterr != ERROR_NO_MORE_FILES)
+ errno = err_win_to_posix(lasterr);
+ return NULL;
+ }
+
+ /* We get here if `buf' contains valid data. */
+ strcpy(dir->dd_dir.d_name, buf.cFileName);
+ ++dir->dd_stat;
+
+ /* Set file type, based on WIN32_FIND_DATA */
+ mdir->dd_dir.d_type = 0;
+ if (buf.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
+ mdir->dd_dir.d_type |= DT_DIR;
+ else
+ mdir->dd_dir.d_type |= DT_REG;
+
+ return (struct dirent*)&dir->dd_dir;
+}
+#endif // !NO_MINGW_REPLACE_READDIR
diff --git a/compat/mingw.h b/compat/mingw.h
index 762eb14..104b310 100644
--- a/compat/mingw.h
+++ b/compat/mingw.h
@@ -233,3 +233,31 @@ int main(int argc, const char **argv) \
return mingw_main(argc, argv); \
} \
static int mingw_main(c,v)
+
+#ifndef NO_MINGW_REPLACE_READDIR
+/*
+ * A replacement of readdir, to ensure that it reads the file type at
+ * the same time. This avoid extra unneeded lstats in git on MinGW
+ */
+#undef DT_UNKNOWN
+#undef DT_DIR
+#undef DT_REG
+#undef DT_LNK
+#define DT_UNKNOWN 0
+#define DT_DIR 1
+#define DT_REG 2
+#define DT_LNK 3
+
+struct mingw_dirent
+{
+ long d_ino; /* Always zero. */
+ union {
+ unsigned short d_reclen; /* Always zero. */
+ unsigned char d_type; /* Reimplementation adds this */
+ };
+ unsigned short d_namlen; /* Length of name in d_name. */
+ char d_name[FILENAME_MAX]; /* File name. */
+};
+#define dirent mingw_dirent
+#define readdir(x) mingw_readdir(x)
+#endif // !NO_MINGW_REPLACE_READDIR
--
1.6.2.2.472.gf61f7.dirty
Well done!
> +struct mingw_dirent
> +{
> + long d_ino; /* Always zero. */
> + union {
> + unsigned short d_reclen; /* Always zero. */
> + unsigned char d_type; /* Reimplementation adds this */
> + };
VERY sneaky! I was wondering why you could get away without replacing
opendir and closedir, and why you still defined a replacement mingw_DIR
that contains the replacement mingw_dirent, until I noticed this unnamed
union.
Since we don't use d_reclen anywhere in the code, wouldn't you get away with
#define d_type d_reclen
unless the type (short vs. char) makes a difference. Or would you say that
doing that would be even more sneaky?
> + unsigned short d_namlen; /* Length of name in d_name. */
> + char d_name[FILENAME_MAX]; /* File name. */
> +};
> +#define dirent mingw_dirent
> +#define readdir(x) mingw_readdir(x)
> +#endif // !NO_MINGW_REPLACE_READDIR
-- Hannes
I'm sure it could be done just with a define. However, given the
remaining unused variables, I was wondering about also packing in
permission bits and file modification time in there, to optimize the
status checking even further. That way, on Windows, we would only need
one 'readdir' pass to check the whole repository, with no lstats
whatsoever. So, this was patch was a 'primer' for that, hence the union
with a proper uchar for the d_type.
However, that would also mean a significant change in the status
checking code, as it first lstat's ever file in the index, then uses
read_directory + lstat's for others. I guess that'll be too big of a
change in core code, so the vision is moot?
I'd be ok to just use the define, provided that it compiles cleanly of
course, if the above seems too ambitious. :-) I kinda feel like the
current code is more clean though :)
--
.marius
With a comment in the commit message, it would have been clear, perhaps.
I'll carry this in my (private) tree for a while with the below squashed
in to avoid a lot of warnings.
-- Hannes
diff --git a/compat/mingw.h b/compat/mingw.h
index 104b310..16ec76b 100644
--- a/compat/mingw.h
+++ b/compat/mingw.h
@@ -260,4 +260,5 @@ struct mingw_dirent
};
#define dirent mingw_dirent
#define readdir(x) mingw_readdir(x)
+struct dirent *mingw_readdir(DIR *dir);
#endif // !NO_MINGW_REPLACE_READDIR
What happened to this patch? Is there any reason it can not be included?
I can confirm the factor 2 speedup which is very noticeable. Especially
when working with "git gui" which does git status very often.
cheers Heiko
Msysgit 1.6.3 (http://code.google.com/p/msysgit/downloads/list) contains
this patch ontop of baseline 1.6.3. It hasn't made it into the mainline
yet though. All in good time :-)
--
.marius