I just wanted to make sure core had seen this blog post (with comments
narrowing down the scope) about #inject, #each, and #times (at least)
being slower on OS X (http://antoniocangiano.com/2008/03/25/inject-
each-and-times-methods-much-slower-in-ruby-19/). I believe I'm correct
in finding that revision 15124 is when those methods went from faster
than 1.8.6 to much slower.
HTH,
Chris
In message "Re: some Enumerable methods slower in 1.9 on OS X after revision 15124"
It seems that sigsetjmp() is significantly slower on OS X. I
contacted Nobu about this issue. I believe he will address soon.
matz.
At Thu, 27 Mar 2008 05:20:57 +0900,
Yukihiro Matsumoto wrote in [ruby-core:16037]:
> |I just wanted to make sure core had seen this blog post (with comments
> |narrowing down the scope) about #inject, #each, and #times (at least)
> |being slower on OS X (http://antoniocangiano.com/2008/03/25/inject-
> |each-and-times-methods-much-slower-in-ruby-19/). I believe I'm correct
> |in finding that revision 15124 is when those methods went from faster
> |than 1.8.6 to much slower.
>
> It seems that sigsetjmp() is significantly slower on OS X. I
> contacted Nobu about this issue. I believe he will address soon.
No, r15124 changed to use setjmp instead of _setjmp if
sigsetjmp exists, so sigsetjmp isn't used as well as earlier.
Citing from http://developer.apple.com/documentation/Darwin/Reference/ManPages/man3/setjmp.3.html:
The setjmp()/longjmp() pairs save and restore the signal mask while
_setjmp()/_longjmp() pairs save and restore only the register set and the
stack. (See sigprocmask(2).)
also:
The sigsetjmp()/siglongjmp() function pairs save and restore the signal
mask if the argument savemask is non-zero; otherwise, only the register
set and the stack are saved.
Should the priority be _setjmp > sigsetjmp > setjmp?
Index: trunk/vm_core.h
===================================================================
--- trunk/vm_core.h (revision 15843)
+++ trunk/vm_core.h (working copy)
@@ -347,5 +347,20 @@ enum rb_thread_status {
};
+#if defined(_setjmp) && !defined(HAVE__SETJMP)
+#define HAVE__SETJMP 1
+#endif
+#if defined(sigsetjmp) && !defined(HAVE_SIGSETJMP)
+#define HAVE_SIGSETJMP 1
+#endif
+#if defined(HAVE__SETJMP) || !defined(HAVE_SIGSETJMP)
+#define RUBY_USE_SIGSETJMP 0
+#else
+#define RUBY_USE_SIGSETJMP 1
+#endif
+#if RUBY_USE_SIGSETJMP
+typedef sigjmp_buf rb_jmpbuf_t;
+#else
typedef jmp_buf rb_jmpbuf_t;
+#endif
struct rb_vm_tag {
Index: trunk/gc.c
===================================================================
--- trunk/gc.c (revision 15843)
+++ trunk/gc.c (working copy)
@@ -53,7 +53,6 @@
int rb_io_fptr_finalize(struct rb_io_t*);
-#if !defined(setjmp) && defined(HAVE__SETJMP) && !defined(sigsetjmp) && !defined(HAVE_SIGSETJMP)
-#define setjmp(env) _setjmp(env)
-#endif
+#define rb_setjmp(env) ruby_setjmp(env)
+#define rb_jmp_buf rb_jmpbuf_t
/* Make alloca work the best possible way. */
@@ -1428,4 +1427,6 @@ obj_free(VALUE obj)
#ifdef __GNUC__
#if defined(__human68k__) || defined(DJGPP)
+#undef rb_setjmp
+#undef rb_jmp_buf
#if defined(__human68k__)
typedef unsigned long rb_jmp_buf[8];
@@ -1436,7 +1437,4 @@ _rb_setjmp:\n\
moveq.l #0,d0\n\
rts");
-#ifdef setjmp
-#undef setjmp
-#endif
#else
#if defined(DJGPP)
@@ -1459,6 +1457,4 @@ _rb_setjmp:\n\
#endif
int rb_setjmp (rb_jmp_buf);
-#define jmp_buf rb_jmp_buf
-#define setjmp rb_setjmp
#endif /* __human68k__ or DJGPP */
#endif /* __GNUC__ */
@@ -1471,5 +1467,5 @@ static void
mark_current_machine_context(rb_thread_t *th)
{
- jmp_buf save_regs_gc_mark;
+ rb_jmp_buf save_regs_gc_mark;
VALUE *stack_start, *stack_end;
@@ -1494,5 +1490,5 @@ mark_current_machine_context(rb_thread_t
FLUSH_REGISTER_WINDOWS;
/* This assumes that all registers are saved into the jmp_buf (and stack) */
- setjmp(save_regs_gc_mark);
+ rb_setjmp(save_regs_gc_mark);
mark_locations_array((VALUE*)save_regs_gc_mark,
sizeof(save_regs_gc_mark) / sizeof(VALUE));
Index: trunk/eval_intern.h
===================================================================
--- trunk/eval_intern.h (revision 15843)
+++ trunk/eval_intern.h (working copy)
@@ -82,7 +82,11 @@ char *strrchr(const char *, const char);
#endif
-#if !defined(setjmp) && defined(HAVE__SETJMP) && !defined(sigsetjmp) && !defined(HAVE_SIGSETJMP)
+#if RUBY_USE_SIGSETJMP
+#define ruby_setjmp(env) sigsetjmp((env), 0)
+#define ruby_longjmp(env,val) siglongjmp(env,val)
+#elif defined(_setjmp) || defined(HAVE__SETJMP)
#define ruby_setjmp(env) _setjmp(env)
#define ruby_longjmp(env,val) _longjmp(env,val)
+int _setjmp(), _longjmp();
#else
#define ruby_setjmp(env) setjmp(env)
Index: stable/eval.c
===================================================================
--- stable/eval.c (revision 15843)
+++ stable/eval.c (working copy)
@@ -194,8 +194,13 @@ static int volatile freebsd_clear_carry_
(j)->status)
#else
-# if !defined(setjmp) && defined(HAVE__SETJMP) && !defined(sigsetjmp) && !defined(HAVE_SIGSETJMP)
+# if RUBY_USE_SIGSETJMP
+# define ruby_setjmp(just_before_setjmp, env) \
+ ((just_before_setjmp), sigsetjmp((env), 0))
+# define ruby_longjmp(env,val) siglongjmp(env,val)
+# elif defined(_setjmp) || defined(HAVE__SETJMP)
# define ruby_setjmp(just_before_setjmp, env) \
((just_before_setjmp), _setjmp(env))
# define ruby_longjmp(env,val) _longjmp(env,val)
+int _setjmp(), _longjmp();
# else
# define ruby_setjmp(just_before_setjmp, env) \
Index: stable/gc.c
===================================================================
--- stable/gc.c (revision 15843)
+++ stable/gc.c (working copy)
@@ -38,7 +38,12 @@ void re_free_registers _((struct re_regi
void rb_io_fptr_finalize _((struct OpenFile*));
-#if !defined(setjmp) && defined(HAVE__SETJMP) && !defined(sigsetjmp) && !defined(HAVE_SIGSETJMP)
-#define setjmp(env) _setjmp(env)
+#if RUBY_USE_SIGSETJMP
+#define rb_setjmp(env) sigsetjmp((env), 0)
+#elif defined(_setjmp) || defined(HAVE__SETJMP)
+#define rb_setjmp(env) _setjmp(env)
+#else
+#define rb_setjmp(env) setjmp(env)
#endif
+#define rb_jmp_buf rb_jmpbuf_t
/* Make alloca work the best possible way. */
@@ -1284,4 +1289,6 @@ rb_gc_mark_frame(frame)
#ifdef __GNUC__
#if defined(__human68k__) || defined(DJGPP)
+#undef rb_setjmp
+#undef rb_jmp_buf
#if defined(__human68k__)
typedef unsigned long rb_jmp_buf[8];
@@ -1292,7 +1299,4 @@ _rb_setjmp:\n\
moveq.l #0,d0\n\
rts");
-#ifdef setjmp
-#undef setjmp
-#endif
#else
#if defined(DJGPP)
@@ -1315,6 +1319,4 @@ _rb_setjmp:\n\
#endif
int rb_setjmp (rb_jmp_buf);
-#define jmp_buf rb_jmp_buf
-#define setjmp rb_setjmp
#endif /* __human68k__ or DJGPP */
#endif /* __GNUC__ */
@@ -1365,5 +1367,5 @@ garbage_collect()
FLUSH_REGISTER_WINDOWS;
/* This assumes that all registers are saved into the jmp_buf (and stack) */
- setjmp(save_regs_gc_mark);
+ rb_setjmp(save_regs_gc_mark);
mark_locations_array((VALUE*)save_regs_gc_mark, sizeof(save_regs_gc_mark) / sizeof(VALUE *));
#if STACK_GROW_DIRECTION < 0
Index: stable/node.h
===================================================================
--- stable/node.h (revision 15843)
+++ stable/node.h (working copy)
@@ -388,6 +388,21 @@ typedef struct {
} rb_jmpbuf_t[1];
#else
+#if defined(_setjmp) && !defined(HAVE__SETJMP)
+#define HAVE__SETJMP 1
+#endif
+#if defined(sigsetjmp) && !defined(HAVE_SIGSETJMP)
+#define HAVE_SIGSETJMP 1
+#endif
+#if defined(HAVE__SETJMP) || !defined(HAVE_SIGSETJMP)
+#define RUBY_USE_SIGSETJMP 0
+#else
+#define RUBY_USE_SIGSETJMP 1
+#endif
+#if RUBY_USE_SIGSETJMP
+typedef sigjmp_buf rb_jmpbuf_t;
+#else
typedef jmp_buf rb_jmpbuf_t;
#endif
+#endif
enum rb_thread_status {
--
Nobu Nakada
N> No, r15124 changed to use setjmp instead of _setjmp if
N> sigsetjmp exists, so sigsetjmp isn't used as well as earlier.
Not sure but I think that, on FreeBSD 5.x and Mac OS X, setjmp() is in
reality sigsetjmp() when on linux it's _setjmp()
Guy Decoux
I think it may depend on the platform. I ran across this the other day
while trying to decide whether I could afford the overhead of a call to
rb_protect():
http://bugs.opensolaris.org/view_bug.do;jsessionid=3681352170809a64f236f8b6ec?bug_id=4404612
Quote:
One of the 'big wins' I discovered was to rebuild perl so that it used setjmp
instead of the default sigsetjmp. This gave about an 18% performance boost.
It turns out that the perl interpreter calls sigsetjmp every time it enters a
new block scope, and the libexacct module does this a lot, which is why the
setjmp/sigsetjmp difference is so noticeable. I wrote a micro benchmark to
time how long setjmp and sigsetjmp took.
Here are the results for sparc (450Mhz):
Loop overhead: 22 ns/iter
setjmp(jb): 37 ns/call
sigsetjmp(sjb, 0): 11774 ns/call
sigsetjmp(sjb, 1): 11891 ns/call
and here are the results for i386 (600MHz):
Loop overhead: 2 ns/iter
setjmp(jb): 25 ns/call
sigsetjmp(sjb, 0): 1963 ns/call
sigsetjmp(sjb, 1): 1975 ns/call
mongo$
Linux (laptop 360MHz) gives:
Loop overhead: 16 ns/iter
setjmp(jb): 101 ns/call
sigsetjmp(sjb, 0): 101 ns/call
sigsetjmp(sjb, 1): 1288 ns/call
It seems there exist platforms where setjmp is preferable over
sigsetjmp with the second parameter set to 0.
Paul
At Thu, 27 Mar 2008 22:31:31 +0900,
Paul Brannan wrote in [ruby-core:16049]:
>
> On Thu, Mar 27, 2008 at 02:26:51PM +0900, Nobuyoshi Nakada wrote:
> > Should the priority be _setjmp > sigsetjmp > setjmp?
>
> I think it may depend on the platform. I ran across this the other day
> while trying to decide whether I could afford the overhead of a call to
> rb_protect():
>
> http://bugs.opensolaris.org/view_bug.do;jsessionid=3681352170809a64f236f8b6ec?bug_id=4404612
>
> Quote:
(snip)
> It seems there exist platforms where setjmp is preferable over
> sigsetjmp with the second parameter set to 0.
Thank you for the info, it's interesting. On cygwin,
sigsetjmp() always stores the signal mask regardless the
SAVEMASK value, too. The parameter works as RESTOREMASK
instead.
This patch makes:
* if --with-setjmp-type configure option is given, use its value,
* if _setjmp is available, use it,
* if sigsetjmp is N/A, use setjmp,
* use setjmp on Solaris and Cygwin, or sigsetjmp on others.
Any more reasonable solutions?
Index: stable/configure.in
===================================================================
--- stable/configure.in (revision 15830)
+++ stable/configure.in (working copy)
@@ -573,4 +573,41 @@ AC_CHECK_FUNCS(fmod killpg wait4 waitpid
mktime timegm gettimeofday\
cosh sinh tanh round setuid setgid setenv unsetenv)
+
+AC_MSG_CHECKING(for setjmp type)
+AC_ARG_WITH(setjmp-type,
+ [ --with-setjmp-type select setjmp type], [
+ case $withval in
+ _setjmp) setjmp_prefix=_;;
+ sigsetjmp) setjmp_prefix=sig;;
+ setjmp) setjmp_prefix=;;
+ '') unset setjmp_prefix;;
+ *) AC_MSG_ERROR(invalid setjmp type: $withval);;
+ esac], [unset setjmp_prefix])
+if test ${setjmp_prefix+set}; then
+ if test "${setjmp_prefix}" && eval test '$ac_cv_'${setjmp_prefix}setjmp = no; then
+ AC_MSG_ERROR(${setjmp_prefix}setjmp is not available)
+ fi
+elif test "$ac_cv_func__setjmp" = yes; then
+ setjmp_prefix=_
+elif test "$ac_cv_func_sigsetjmp" = yes; then
+ case $target_os in
+ solaris*|cygwin*)
+ setjmp_prefix=;;
+ *)
+ setjmp_prefix=sig;;
+ esac
+else
+ setjmp_prefix=
+fi
+if test $setjmp_prefix = sig; then
+ setjmp_sigmask=yes
+else
+ unset setjmp_sigmask
+fi
+AC_MSG_RESULT(${setjmp_prefix}setjmp)
+AC_DEFINE_UNQUOTED([RUBY_SETJMP(env)], [${setjmp_prefix}setjmp(env${setjmp_sigmask+,0})])
+AC_DEFINE_UNQUOTED([RUBY_LONGJMP(env,val)], [${setjmp_prefix}longjmp(env,val)])
+AC_DEFINE_UNQUOTED(RUBY_JMP_BUF, ${setjmp_sigmask+${setjmp_prefix}}jmp_buf)
+
AC_ARG_ENABLE(setreuid,
[ --enable-setreuid use setreuid()/setregid() according to need even if obsolete.],
Index: stable/node.h
===================================================================
--- stable/node.h (revision 15830)
+++ stable/node.h (working copy)
@@ -388,5 +388,5 @@ typedef struct {
} rb_jmpbuf_t[1];
#else
-typedef jmp_buf rb_jmpbuf_t;
+typedef RUBY_JMP_BUF rb_jmpbuf_t;
#endif
Index: stable/eval.c
===================================================================
--- stable/eval.c (revision 15830)
+++ stable/eval.c (working copy)
@@ -194,12 +194,9 @@ static int volatile freebsd_clear_carry_
(j)->status)
#else
-# if !defined(setjmp) && defined(HAVE__SETJMP) && !defined(sigsetjmp) && !defined(HAVE_SIGSETJMP)
-# define ruby_setjmp(just_before_setjmp, env) \
- ((just_before_setjmp), _setjmp(env))
-# define ruby_longjmp(env,val) _longjmp(env,val)
-# else
-# define ruby_setjmp(just_before_setjmp, env) \
- ((just_before_setjmp), setjmp(env))
-# define ruby_longjmp(env,val) longjmp(env,val)
+# define ruby_setjmp(just_before_setjmp, env) \
+ ((just_before_setjmp), RUBY_SETJMP(env))
+# define ruby_longjmp(env,val) RUBY_LONGJMP(env,val)
+# ifdef __CYGWIN__
+int _setjmp(), _longjmp();
# endif
#endif
Index: stable/gc.c
===================================================================
--- stable/gc.c (revision 15830)
+++ stable/gc.c (working copy)
@@ -38,6 +38,8 @@
void rb_io_fptr_finalize _((struct OpenFile*));
-#if !defined(setjmp) && defined(HAVE__SETJMP) && !defined(sigsetjmp) && !defined(HAVE_SIGSETJMP)
-#define setjmp(env) _setjmp(env)
+#define rb_setjmp(env) RUBY_SETJMP(env)
+#define rb_jmp_buf rb_jmpbuf_t
+#ifdef __CYGWIN__
+int _setjmp(), _longjmp();
#endif
@@ -1284,4 +1286,6 @@ rb_gc_mark_frame(frame)
#ifdef __GNUC__
#if defined(__human68k__) || defined(DJGPP)
+#undef rb_setjmp
+#undef rb_jmp_buf
#if defined(__human68k__)
typedef unsigned long rb_jmp_buf[8];
@@ -1292,7 +1296,4 @@ _rb_setjmp:\n\
moveq.l #0,d0\n\
rts");
-#ifdef setjmp
-#undef setjmp
-#endif
#else
#if defined(DJGPP)
@@ -1315,6 +1316,4 @@ _rb_setjmp:\n\
#endif
int rb_setjmp (rb_jmp_buf);
-#define jmp_buf rb_jmp_buf
-#define setjmp rb_setjmp
#endif /* __human68k__ or DJGPP */
#endif /* __GNUC__ */
@@ -1365,5 +1364,5 @@ garbage_collect()
FLUSH_REGISTER_WINDOWS;
/* This assumes that all registers are saved into the jmp_buf (and stack) */
- setjmp(save_regs_gc_mark);
+ rb_setjmp(save_regs_gc_mark);
mark_locations_array((VALUE*)save_regs_gc_mark, sizeof(save_regs_gc_mark) / sizeof(VALUE *));
#if STACK_GROW_DIRECTION < 0
Index: trunk/configure.in
===================================================================
--- trunk/configure.in (revision 15830)
+++ trunk/configure.in (working copy)
@@ -707,4 +707,41 @@ AC_CHECK_FUNCS(fmod killpg wait4 waitpid
setuid setgid daemon select_large_fdset setenv unsetenv\
mktime timegm clock_gettime gettimeofday)
+
+AC_MSG_CHECKING(for setjmp type)
+AC_ARG_WITH(setjmp-type,
+ [ --with-setjmp-type select setjmp type], [
+ case $withval in
+ _setjmp) setjmp_prefix=_;;
+ sigsetjmp) setjmp_prefix=sig;;
+ setjmp) setjmp_prefix=;;
+ '') unset setjmp_prefix;;
+ *) AC_MSG_ERROR(invalid setjmp type: $withval);;
+ esac], [unset setjmp_prefix])
+if test ${setjmp_prefix+set}; then
+ if test "${setjmp_prefix}" && eval test '$ac_cv_'${setjmp_prefix}setjmp = no; then
+ AC_MSG_ERROR(${setjmp_prefix}setjmp is not available)
+ fi
+elif test "$ac_cv_func__setjmp" = yes; then
+ setjmp_prefix=_
+elif test "$ac_cv_func_sigsetjmp" = yes; then
+ case $target_os in
+ solaris*|cygwin*)
+ setjmp_prefix=;;
+ *)
+ setjmp_prefix=sig;;
+ esac
+else
+ setjmp_prefix=
+fi
+if test $setjmp_prefix = sig; then
+ setjmp_sigmask=yes
+else
+ unset setjmp_sigmask
+fi
+AC_MSG_RESULT(${setjmp_prefix}setjmp)
+AC_DEFINE_UNQUOTED([RUBY_SETJMP(env)], [${setjmp_prefix}setjmp(env${setjmp_sigmask+,0})])
+AC_DEFINE_UNQUOTED([RUBY_LONGJMP(env,val)], [${setjmp_prefix}longjmp(env,val)])
+AC_DEFINE_UNQUOTED(RUBY_JMP_BUF, ${setjmp_sigmask+${setjmp_prefix}}jmp_buf)
+
AC_ARG_ENABLE(setreuid,
[ --enable-setreuid use setreuid()/setregid() according to need even if obsolete.],
Index: trunk/eval_intern.h
===================================================================
--- trunk/eval_intern.h (revision 15830)
+++ trunk/eval_intern.h (working copy)
@@ -82,10 +82,8 @@ char *strrchr(const char *, const char);
#endif
-#if !defined(setjmp) && defined(HAVE__SETJMP) && !defined(sigsetjmp) && !defined(HAVE_SIGSETJMP)
-#define ruby_setjmp(env) _setjmp(env)
-#define ruby_longjmp(env,val) _longjmp(env,val)
-#else
-#define ruby_setjmp(env) setjmp(env)
-#define ruby_longjmp(env,val) longjmp(env,val)
+#define ruby_setjmp(env) RUBY_SETJMP(env)
+#define ruby_longjmp(env,val) RUBY_LONGJMP(env,val)
+#ifdef __CYGWIN__
+int _setjmp(), _longjmp();
#endif
Index: trunk/gc.c
===================================================================
--- trunk/gc.c (revision 15830)
+++ trunk/gc.c (working copy)
@@ -53,7 +53,6 @@
int rb_io_fptr_finalize(struct rb_io_t*);
-#if !defined(setjmp) && defined(HAVE__SETJMP) && !defined(sigsetjmp) && !defined(HAVE_SIGSETJMP)
-#define setjmp(env) _setjmp(env)
-#endif
+#define rb_setjmp(env) RUBY_SETJMP(env)
Index: trunk/vm_core.h
===================================================================
--- trunk/vm_core.h (revision 15830)
+++ trunk/vm_core.h (working copy)
@@ -347,5 +347,5 @@ enum rb_thread_status {
};
-typedef jmp_buf rb_jmpbuf_t;
+typedef RUBY_JMP_BUF rb_jmpbuf_t;
struct rb_vm_tag {
--
Nobu Nakada
This seems reasonable to me.
Also, I wonder if there is any benefit to __builtin_setjmp instead of
_setjmp?
Also, just thinking, I wonder how hard it would be to make use of dwarf2
exceptions on gcc via unwind.h?
(I think the API is described here:
http://www.ucw.cz/~hubicka/papers/abi/node25.html)
Paul
At Mon, 31 Mar 2008 22:07:03 +0900,
Paul Brannan wrote in [ruby-core:16086]:
> Also, I wonder if there is any benefit to __builtin_setjmp instead of
> _setjmp?
Unfortunately,
error: '__builtin_longjmp' second argument must be 1
it seems not be able to pass a variable. And combination with
_longjmp caused SEGV.
> Also, just thinking, I wonder how hard it would be to make use of dwarf2
> exceptions on gcc via unwind.h?
How faster is it?
> (I think the API is described here:
> http://www.ucw.cz/~hubicka/papers/abi/node25.html)
I wonder if it works systems other than ``provided by any AMD64
psABI-compliant system.''
--
Nobu Nakada
See attached for a workaround, but I was not able to measure any performance
benefit on gcc 3.3. :(
(but I'm also not sure how to construct a good benchmark where the
method call overhead doesn't dwarf what I'm trying to measure)
> it seems not be able to pass a variable. And combination with
> _longjmp caused SEGV.
>
> > Also, just thinking, I wonder how hard it would be to make use of dwarf2
> > exceptions on gcc via unwind.h?
>
> How faster is it?
I don't know; we'd have to try it and find out.
No one seems to have benchmarks, just blanket statements like "dwarf2
exceptions are faster than sjlj for non-exceptional cases".
Paul
At Wed, 2 Apr 2008 00:11:29 +0900,
Paul Brannan wrote in [ruby-core:16103]:
> > error: '__builtin_longjmp' second argument must be 1
>
> See attached for a workaround, but I was not able to measure any performance
> benefit on gcc 3.3. :(
Nothing for a workaround. :)
--
Nobu Nakada