[I should take a week to read this more carefully, but I'll ask now anyway ...]
Can you explain (again) the difference between aclock and sclock? Does
`box` increases aclock but don't increase sclock? Does `set-box!`
increase both? Is it possible to increase sclock without increasing
aclock?
Gustavo
On Sun, Sep 13, 2015 at 4:28 PM, <
mfl...@racket-lang.org> wrote:
> mflatt has updated `master' from 5ae7e54dac to ab2aaff6be.
>
http://git.racket-lang.org/plt/5ae7e54dac..ab2aaff6be
>
> =====[ One Commit ]=====================================================
> Directory summary:
> 11.0% pkgs/racket-test-core/tests/racket/
> 88.9% racket/src/racket/src/
>
> ~~~~~~~~~~
>
> ab2aaff Matthew Flatt <
mfl...@racket-lang.org> 2015-09-13 08:24
> :
> | optimizer: fix `let-values` splitting and allocation reordering
> |
> | First bug:
> |
> | When the optimize converts
> |
> | (let-values ([(X ...) (values M ...)])
> | ....)
> |
> | to
> |
> | (let ([X M] ...)
> | ....)
> |
> | it incorrectly attached a virtual timestamp to each "[X M]" binding
> | that corresponds to the timestamp after the whole `(values M ...)`.
> |
> | The solution is to approximate tracking the timestamp for invidual
> | expressions.
> |
> | Second bug:
> |
> | The compiler could reorder a continuation-capturing expression past
> | an allocation.
> |
> | The solution is to track allocations with a new virtual clock.
> :
> M pkgs/racket-test-core/tests/racket/optimize.rktl | 43 +++-
> M racket/src/racket/src/letrec_check.c | 2 +-
> M racket/src/racket/src/list.c | 14 +-
> M racket/src/racket/src/optimize.c | 266 +++++++++++++++++---
> M racket/src/racket/src/schpriv.h | 35 +--
> M racket/src/racket/src/vector.c | 4 +-
>
> =====[ Overall Diff ]===================================================
>
> pkgs/racket-test-core/tests/racket/optimize.rktl
> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> --- OLD/pkgs/racket-test-core/tests/racket/optimize.rktl
> +++ NEW/pkgs/racket-test-core/tests/racket/optimize.rktl
> @@ -2004,19 +2004,52 @@
> (test '((1) (2)) f (lambda (n) (set! v n) n))
> (test 2 values v)))
>
> +;; Make sure `values` splitting doesn't use wrong clock values
> +;; leading to reordering:
> +(test-comp '(lambda (p)
> + (define-values (x y) (values (car p) (cdr p)))
> + (values y x))
> + '(lambda (p)
> + (values (#%unsafe-cdr p) (car p)))
> + #f)
> +(test-comp '(lambda (p)
> + (define-values (x y) (values (car p) (cdr p)))
> + (values y x))
> + '(lambda (p)
> + (let ([x (car p)])
> + (values (unsafe-cdr p) x))))
> +
> (test-comp '(lambda (z)
> - ;; Moving `(list z)` before `(list (z 2))`
> - ;; would reorder, which is not allowed, so check
> - ;; that the optimizer can keep track:
> + ;; Moving `(list z)` after `(list (z 2))` is not allowed
> + ;; in case `(z 2)` captures a continuation:
> (let-values ([(a b) (values (list z) (list (z 2)))])
> - (list a b)))
> + (list b a)))
> + '(lambda (z)
> + (list (list (z 2)) (list z)))
> + #f)
> +(test-comp '(lambda (z)
> + (let-values ([(a b) (values (list (z 2)) (list z))])
> + (list a a b)))
> '(lambda (z)
> - (list (list z) (list (z 2)))))
> + (let ([a (list (z 2))])
> + (list a a (list z)))))
> +
> +;; It would be nice if the optimizer could do these two, but because it
> +;; involves temporarily reordering `(list z)` and `(list (z 2))`
> +;; (which is not allowed in case `(z 2)` captures a continuation),
> +;; the optimizer currently cannot manage it:
> +#;
> (test-comp '(lambda (z)
> (let-values ([(a b) (values (list (z 2)) (list z))])
> (list a b)))
> '(lambda (z)
> (list (list (z 2)) (list z))))
> +#;
> +(test-comp '(lambda (z)
> + (let-values ([(a b) (values (list z) (list (z 2)))])
> + (list a b)))
> + '(lambda (z)
> + (list (list z) (list (z 2)))))
>
> (test-comp '(module m racket/base
> ;; Reference to a ready module-level variable shouldn't
>
> racket/src/racket/src/letrec_check.c
> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> --- OLD/racket/src/racket/src/letrec_check.c
> +++ NEW/racket/src/racket/src/letrec_check.c
> @@ -457,7 +457,7 @@ static Scheme_Object *letrec_check_local(Scheme_Object *o, Letrec_Check_Frame *f
> static int is_effect_free_prim(Scheme_Object *rator)
> {
> if (SCHEME_PRIMP(rator)
> - && (SCHEME_PRIM_PROC_OPT_FLAGS(rator) & SCHEME_PRIM_IS_OMITABLE))
> + && (SCHEME_PRIM_PROC_OPT_FLAGS(rator) & SCHEME_PRIM_IS_OMITABLE_ANY))
> return 1;
>
> return 0;
>
> racket/src/racket/src/list.c
> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> --- OLD/racket/src/racket/src/list.c
> +++ NEW/racket/src/racket/src/list.c
> @@ -209,7 +209,7 @@ scheme_init_list (Scheme_Env *env)
> p = scheme_make_immed_prim(cons_prim, "cons", 2, 2);
> scheme_cons_proc = p;
> SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_BINARY_INLINED
> - | SCHEME_PRIM_IS_OMITABLE);
> + | SCHEME_PRIM_IS_OMITABLE_ALLOCATION);
> scheme_add_global_constant ("cons", p, env);
>
> p = scheme_make_folding_prim(scheme_checked_car, "car", 1, 1, 1);
> @@ -224,7 +224,7 @@ scheme_init_list (Scheme_Env *env)
> p = scheme_make_immed_prim(mcons_prim, "mcons", 2, 2);
> scheme_mcons_proc = p;
> SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_BINARY_INLINED
> - | SCHEME_PRIM_IS_OMITABLE);
> + | SCHEME_PRIM_IS_OMITABLE_ALLOCATION);
> scheme_add_global_constant ("mcons", p, env);
>
> p = scheme_make_immed_prim(scheme_checked_mcar, "mcar", 1, 1);
> @@ -263,7 +263,7 @@ scheme_init_list (Scheme_Env *env)
> SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
> | SCHEME_PRIM_IS_BINARY_INLINED
> | SCHEME_PRIM_IS_NARY_INLINED
> - | SCHEME_PRIM_IS_OMITABLE);
> + | SCHEME_PRIM_IS_OMITABLE_ALLOCATION);
> scheme_add_global_constant ("list", p, env);
>
> REGISTER_SO(scheme_list_star_proc);
> @@ -272,7 +272,7 @@ scheme_init_list (Scheme_Env *env)
> SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
> | SCHEME_PRIM_IS_BINARY_INLINED
> | SCHEME_PRIM_IS_NARY_INLINED
> - | SCHEME_PRIM_IS_OMITABLE);
> + | SCHEME_PRIM_IS_OMITABLE_ALLOCATION);
> scheme_add_global_constant ("list*", p, env);
>
> p = scheme_make_folding_prim(immutablep, "immutable?", 1, 1, 1);
> @@ -434,13 +434,13 @@ scheme_init_list (Scheme_Env *env)
> p = scheme_make_immed_prim(box, BOX, 1, 1);
> scheme_box_proc = p;
> SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
> - | SCHEME_PRIM_IS_OMITABLE);
> + | SCHEME_PRIM_IS_OMITABLE_ALLOCATION);
> scheme_add_global_constant(BOX, p, env);
>
> REGISTER_SO(scheme_box_immutable_proc);
> p = scheme_make_immed_prim(immutable_box, "box-immutable", 1, 1);
> scheme_box_immutable_proc = p;
> - SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_OMITABLE);
> + SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_OMITABLE_ALLOCATION);
> scheme_add_global_constant("box-immutable", p, env);
>
> REGISTER_SO(scheme_box_p_proc);
> @@ -765,7 +765,7 @@ scheme_init_unsafe_list (Scheme_Env *env)
> REGISTER_SO(scheme_unsafe_cons_list_proc);
> p = scheme_make_immed_prim(unsafe_cons_list, "unsafe-cons-list", 2, 2);
> SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_BINARY_INLINED
> - | SCHEME_PRIM_IS_OMITABLE);
> + | SCHEME_PRIM_IS_OMITABLE_ALLOCATION);
> scheme_add_global_constant ("unsafe-cons-list", p, env);
> scheme_unsafe_cons_list_proc = p;
>
>
> racket/src/racket/src/optimize.c
> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> --- OLD/racket/src/racket/src/optimize.c
> +++ NEW/racket/src/racket/src/optimize.c
> @@ -57,18 +57,26 @@ struct Optimize_Info
>
> /* Propagated up and down the chain: */
> int size;
> - int vclock; /* virtual clock that ticks for a side effect or branch;
> + int vclock; /* virtual clock that ticks for a side effect, a branch,
> + or a dependency on an earlier side-effect (such as a
> + previous guard on an unsafe operation's argument);
> the clock is only compared between binding sites and
> uses, so we can rewind the clock at a join after an
> increment that models a branch (if the branch is not
> taken or doesn't increment the clock) */
> - int kclock; /* virtual clock that ticks for a potential continuation capture */
> + int aclock; /* virtual clock that ticks for allocation without side effects,
> + for constraining the reordering of operations that might
> + capture a continuation */
> + int kclock; /* virtual clock that ticks for a potential continuation capture,
> + for constraining the movement of allocation operations */
> int sclock; /* virtual clock that ticks when space consumption is potentially observed */
> int psize;
> short inline_fuel, shift_fuel;
> char letrec_not_twice, enforce_const, use_psize, has_nonleaf;
> Scheme_Hash_Table *top_level_consts;
>
> + int maybe_values_argument; /* triggers an approximation for clock increments */
> +
> /* Set by expression optimization: */
> int single_result, preserves_marks; /* negative means "tentative", due to fixpoint in progress */
> int escapes; /* flag to signal that the expression allways escapes. When escapes is 1, it's assumed
> @@ -174,8 +182,10 @@ typedef struct Scheme_Once_Used {
> Scheme_Object *expr;
> int pos;
> int vclock;
> + int aclock;
> int kclock;
> int sclock;
> + int spans_k; /* potentially captures a continuation */
>
> int used;
> int delta;
> @@ -186,7 +196,7 @@ typedef struct Scheme_Once_Used {
> } Scheme_Once_Used;
>
> static Scheme_Once_Used *make_once_used(Scheme_Object *val, int pos,
> - int vclock, int kclock, int sclock,
> + int vclock, int aclock, int kclock, int sclock, int spans_k,
> Scheme_Once_Used *prev);
>
> #ifdef MZ_PRECISE_GC
> @@ -208,7 +218,7 @@ int scheme_is_functional_nonfailing_primitive(Scheme_Object *rator, int num_args
> /* return 2 => results are a constant when arguments are constants */
> {
> if (SCHEME_PRIMP(rator)
> - && (SCHEME_PRIM_PROC_OPT_FLAGS(rator) & (SCHEME_PRIM_IS_OMITABLE | SCHEME_PRIM_IS_UNSAFE_NONMUTATING))
> + && (SCHEME_PRIM_PROC_OPT_FLAGS(rator) & (SCHEME_PRIM_IS_OMITABLE_ANY | SCHEME_PRIM_IS_UNSAFE_NONMUTATING))
> && (num_args >= ((Scheme_Primitive_Proc *)rator)->mina)
> && (num_args <= ((Scheme_Primitive_Proc *)rator)->mu.maxa)
> && ((expected_vals < 0)
> @@ -2236,10 +2246,13 @@ static Scheme_Object *check_app_let_rator(Scheme_Object *app, Scheme_Object *rat
> return scheme_optimize_expr(orig_rator, info, context);
> }
>
> -static int is_nonmutating_primitive(Scheme_Object *rator, int n)
> +static int is_nonmutating_nondependant_primitive(Scheme_Object *rator, int n)
> +/* Does not include SCHEME_PRIM_IS_UNSAFE_OMITABLE, because those can
> + depend on earlier tests (explicit or implicit) for whether the
> + unsafe operation is defined */
> {
> if (SCHEME_PRIMP(rator)
> - && (SCHEME_PRIM_PROC_OPT_FLAGS(rator) & (SCHEME_PRIM_IS_OMITABLE))
> + && (SCHEME_PRIM_PROC_OPT_FLAGS(rator) & (SCHEME_PRIM_IS_OMITABLE | SCHEME_PRIM_IS_OMITABLE_ALLOCATION))
> && (n >= ((Scheme_Primitive_Proc *)rator)->mina)
> && (n <= ((Scheme_Primitive_Proc *)rator)->mu.maxa))
> return 1;
> @@ -2247,6 +2260,14 @@ static int is_nonmutating_primitive(Scheme_Object *rator, int n)
> return 0;
> }
>
> +static int is_primitive_allocating(Scheme_Object *rator, int n)
> +{
> + if (SCHEME_PRIM_PROC_OPT_FLAGS(rator) & (SCHEME_PRIM_IS_OMITABLE_ALLOCATION))
> + return 1;
> +
> + return 0;
> +}
> +
> static int is_noncapturing_primitive(Scheme_Object *rator, int n)
> {
> if (SCHEME_PRIMP(rator)) {
> @@ -2781,6 +2802,9 @@ static Scheme_Object *optimize_application(Scheme_Object *o, Optimize_Info *info
> le = optimize_for_inline(info, app->args[i], n - 1, app, NULL, NULL, &rator_flags, context, 1, 0);
> if (le)
> return le;
> + if (SAME_TYPE(app->args[0], scheme_values_func)
> + || SAME_TYPE(app->args[0], scheme_apply_proc))
> + info->maybe_values_argument = 1;
> rator_apply_escapes = info->escapes;
> }
> }
> @@ -2921,6 +2945,44 @@ static Scheme_Object *finish_optimize_any_application(Scheme_Object *app, Scheme
> return app;
> }
>
> +static void increment_clock_counts_for_application(GC_CAN_IGNORE int *_vclock,
> + GC_CAN_IGNORE int *_aclock,
> + GC_CAN_IGNORE int *_kclock,
> + GC_CAN_IGNORE int *_sclock,
> + Scheme_Object *rator,
> + int argc)
> +{
> + if (!is_nonmutating_nondependant_primitive(rator, argc))
> + *_vclock += 1;
> + else if (is_primitive_allocating(rator, argc))
> + *_aclock += 1;
> +
> + if (!is_noncapturing_primitive(rator, argc))
> + *_kclock += 1;
> +
> + if (!is_nonsaving_primitive(rator, argc))
> + *_sclock += 1;
> +}
> +
> +static void increment_clocks_for_application(Optimize_Info *info,
> + Scheme_Object *rator,
> + int argc)
> +{
> + int v, a, k, s;
> +
> + v = info->vclock;
> + a = info->aclock;
> + k = info->kclock;
> + s = info->sclock;
> +
> + increment_clock_counts_for_application(&v, &a, &k, &s, rator, argc);
> +
> + info->vclock = v;
> + info->aclock = a;
> + info->kclock = k;
> + info->sclock = s;
> +}
> +
> static Scheme_Object *finish_optimize_application(Scheme_App_Rec *app, Optimize_Info *info, int context, int rator_flags)
> {
> Scheme_Object *le;
> @@ -2932,13 +2994,8 @@ static Scheme_Object *finish_optimize_application(Scheme_App_Rec *app, Optimize_
> }
>
> info->size += 1;
> - if (!is_nonmutating_primitive(app->args[0], app->num_args))
> - info->vclock += 1;
> - if (!is_noncapturing_primitive(app->args[0], app->num_args))
> - info->kclock += 1;
> - if (!is_nonsaving_primitive(app->args[0], app->num_args))
> - info->sclock += 1;
> -
> + increment_clocks_for_application(info, app->args[0], app->num_args);
> +
> if (all_vals) {
> le = try_optimize_fold(app->args[0], NULL, (Scheme_Object *)app, info);
> if (le)
> @@ -3214,12 +3271,7 @@ static Scheme_Object *finish_optimize_application2(Scheme_App2_Rec *app, Optimiz
> return replace_tail_inside(le, inside, app->rand);
> }
>
> - if (!is_nonmutating_primitive(rator, 1))
> - info->vclock += 1;
> - if (!is_noncapturing_primitive(rator, 1))
> - info->kclock += 1;
> - if (!is_nonsaving_primitive(rator, 1))
> - info->sclock += 1;
> + increment_clocks_for_application(info, rator, 1);
>
> info->preserves_marks = !!(rator_flags & CLOS_PRESERVES_MARKS);
> info->single_result = !!(rator_flags & CLOS_SINGLE_RESULT);
> @@ -3476,6 +3528,10 @@ static Scheme_Object *optimize_application3(Scheme_Object *o, Optimize_Info *inf
> rator_apply_escapes = info->escapes;
> }
>
> + if (SAME_TYPE(app->rator, scheme_values_func)
> + || SAME_TYPE(app->rator, scheme_apply_proc))
> + info->maybe_values_argument = 1;
> +
> /* 1st arg */
>
> ty = wants_local_type_arguments(app->rator, 0);
> @@ -3548,12 +3604,7 @@ static Scheme_Object *finish_optimize_application3(Scheme_App3_Rec *app, Optimiz
> return le;
> }
>
> - if (!is_nonmutating_primitive(app->rator, 2))
> - info->vclock += 1;
> - if (!is_noncapturing_primitive(app->rator, 2))
> - info->kclock += 1;
> - if (!is_nonsaving_primitive(app->rator, 2))
> - info->sclock += 1;
> + increment_clocks_for_application(info, app->rator, 2);
>
> /* Check for (call-with-values (lambda () M) N): */
> if (SAME_OBJ(app->rator, scheme_call_with_values_proc)) {
> @@ -4218,9 +4269,9 @@ static Scheme_Object *optimize_branch(Scheme_Object *o, Optimize_Info *info, int
> Scheme_Branch_Rec *b;
> Scheme_Object *t, *tb, *fb;
> Scheme_Hash_Tree *init_types, *then_types;
> - int init_vclock, init_kclock, init_sclock;
> + int init_vclock, init_aclock, init_kclock, init_sclock;
> int then_escapes, then_preserves_marks, then_single_result;
> - int then_vclock, then_kclock, then_sclock;
> + int then_vclock, then_aclock, then_kclock, then_sclock;
> Optimize_Info_Sequence info_seq;
> Scheme_Object *pred;
>
> @@ -4333,6 +4384,7 @@ static Scheme_Object *optimize_branch(Scheme_Object *o, Optimize_Info *info, int
> info->vclock += 1; /* model branch as clock increment */
>
> init_vclock = info->vclock;
> + init_aclock = info->aclock;
> init_kclock = info->kclock;
> init_sclock = info->sclock;
> init_types = info->types;
> @@ -4346,11 +4398,13 @@ static Scheme_Object *optimize_branch(Scheme_Object *o, Optimize_Info *info, int
> then_single_result = info->single_result;
> then_escapes = info->escapes;
> then_vclock = info->vclock;
> + then_aclock = info->aclock;
> then_kclock = info->kclock;
> then_sclock = info->sclock;
>
> info->types = init_types;
> info->vclock = init_vclock;
> + info->aclock = init_aclock;
> info->kclock = init_kclock;
> info->sclock = init_sclock;
>
> @@ -4390,6 +4444,8 @@ static Scheme_Object *optimize_branch(Scheme_Object *o, Optimize_Info *info, int
>
> if (then_sclock > info->sclock)
> info->sclock = then_sclock;
> + if (then_aclock > info->aclock)
> + info->aclock = then_aclock;
>
> if ((init_vclock == then_vclock) && (init_vclock == info->vclock)) {
> /* we can rewind the vclock to just after the test, because the
> @@ -5644,6 +5700,74 @@ int scheme_might_invoke_call_cc(Scheme_Object *value)
> return !scheme_is_liftable(value, -1, 10, 0, 1);
> }
>
> +#define ADVANCE_CLOCKS_INIT_FUEL 3
> +
> +void advance_clocks_for_optimized(Scheme_Object *o,
> + GC_CAN_IGNORE int *_vclock,
> + GC_CAN_IGNORE int *_aclock,
> + GC_CAN_IGNORE int *_kclock,
> + GC_CAN_IGNORE int *_sclock,
> + Optimize_Info *info,
> + int fuel)
> +/* It's ok for this function to advance clocks *less* than
> + acurrately, but not more than acurrately */
> +{
> + Scheme_Object *rator = NULL;
> + int argc = 0;
> +
> + if (!fuel) return;
> +
> + switch (SCHEME_TYPE(o)) {
> + case scheme_application_type:
> + {
> + Scheme_App_Rec *app = (Scheme_App_Rec *)o;
> + int i;
> + for (i = 0; i < app->num_args; i++) {
> + advance_clocks_for_optimized(app->args[i+1],
> + _vclock, _aclock, _kclock, _sclock,
> + info, fuel - 1);
> + }
> + rator = app->args[0];
> + argc = app->num_args;
> + }
> + break;
> + case scheme_application2_type:
> + {
> + Scheme_App2_Rec *app = (Scheme_App2_Rec *)o;
> + advance_clocks_for_optimized(app->rand,
> + _vclock, _aclock, _kclock, _sclock,
> + info, fuel - 1);
> + rator = app->rator;
> + argc = 1;
> + break;
> + }
> + case scheme_application3_type:
> + {
> + Scheme_App3_Rec *app = (Scheme_App3_Rec *)o;
> + advance_clocks_for_optimized(app->rand1,
> + _vclock, _aclock, _kclock, _sclock,
> + info, fuel - 1);
> + advance_clocks_for_optimized(app->rand2,
> + _vclock, _aclock, _kclock, _sclock,
> + info, fuel - 1);
> + rator = app->rator;
> + argc = 2;
> + }
> + break;
> + default:
> + break;
> + }
> +
> + if (rator)
> + increment_clock_counts_for_application(_vclock, _aclock, _kclock, _sclock, rator, argc);
> +
> + if ((*_vclock > info->vclock)
> + || (*_aclock > info->aclock)
> + || (*_kclock > info->kclock)
> + || (*_sclock > info->sclock))
> + scheme_signal_error("internal error: optimizer clock tracking has gone wrong");
> +}
> +
> static int worth_lifting(Scheme_Object *v)
> {
> Scheme_Type lhs;
> @@ -5671,6 +5795,8 @@ scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, i
> int did_set_value, checked_once, skip_depth, unused_clauses, found_escapes;
> int remove_last_one = 0, inline_fuel, rev_bind_order;
> int post_bind = !(SCHEME_LET_FLAGS(head) & (SCHEME_LET_RECURSIVE | SCHEME_LET_STAR));
> + int pre_vclock, pre_aclock, pre_kclock, pre_sclock, increments_kclock;
> + int once_vclock, once_aclock, once_kclock, once_sclock, once_increments_kclock;
>
> # define pos_EARLIER(a, b) (rev_bind_order ? ((a) > (b)) : ((a) < (b)))
>
> @@ -5958,6 +6084,10 @@ scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, i
> }
>
> if (!skip_opts) {
> + pre_vclock = rhs_info->vclock;
> + pre_aclock = rhs_info->aclock;
> + pre_kclock = rhs_info->kclock;
> + pre_sclock = rhs_info->sclock;
> if (!found_escapes) {
> optimize_info_seq_step(rhs_info, &info_seq);
> value = scheme_optimize_expr(pre_body->value, rhs_info,
> @@ -5976,9 +6106,41 @@ scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, i
> body_info->escapes = 1;
> body_info->size++;
> }
> + once_vclock = rhs_info->vclock;
> + once_aclock = rhs_info->aclock;
> + once_kclock = rhs_info->kclock;
> + once_sclock = rhs_info->sclock;
> + increments_kclock = (once_kclock > pre_kclock);
> + once_increments_kclock = increments_kclock;
> } else {
> value = pre_body->value;
> --skip_opts;
> + if (skip_opts) {
> + /* when a `values` group is split, we've lost track of the
> + clock values for points between the `values` arguments;
> + we can conservatively assume the clock before the whole group
> + for the purpose of registering once-used variables,
> + but we can also conservatively advance the clock: */
> + advance_clocks_for_optimized(value,
> + &pre_vclock, &pre_aclock, &pre_kclock, &pre_sclock,
> + rhs_info,
> + ADVANCE_CLOCKS_INIT_FUEL);
> + once_vclock = pre_vclock;
> + once_aclock = pre_aclock;
> + once_kclock = pre_kclock;
> + once_sclock = pre_sclock;
> + } else {
> + /* end of split group, so rhs_info clock is right */
> + once_vclock = rhs_info->vclock;
> + once_aclock = rhs_info->aclock;
> + once_kclock = rhs_info->kclock;
> + once_sclock = rhs_info->sclock;
> + }
> + if (increments_kclock) {
> + /* note that we conservatively assume that a member of a split
> + advance the kclock, unless we can easily show otherwise */
> + once_increments_kclock = 1;
> + }
> }
>
> if (undiscourage) {
> @@ -6030,7 +6192,7 @@ scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, i
> int *new_flags;
> int cnt;
>
> - /* This conversion may reorder the expressions. */
> + /* This conversion reorders the expressions if rev_bind_order. */
> if (pre_body->count) {
> if (rev_bind_order)
> cnt = 0;
> @@ -6089,6 +6251,18 @@ scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, i
> body = (Scheme_Object *)naya;
> value = pre_body->value;
> pos = pre_body->position;
> +
> + if (skip_opts) {
> + /* Use "pre" clocks: */
> + advance_clocks_for_optimized(value,
> + &pre_vclock, &pre_aclock, &pre_kclock, &pre_sclock,
> + rhs_info,
> + ADVANCE_CLOCKS_INIT_FUEL);
> + once_vclock = pre_vclock;
> + once_aclock = pre_aclock;
> + once_kclock = pre_kclock;
> + once_sclock = pre_sclock;
> + }
> } else {
> /* We've dropped this clause entirely. */
> i++;
> @@ -6193,7 +6367,8 @@ scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, i
> /* used only once; we may be able to shift the expression to the use
> site, instead of binding to a temporary */
> once_used = make_once_used(value, pos,
> - rhs_info->vclock, rhs_info->kclock, rhs_info->sclock,
> + once_vclock, once_aclock, once_kclock, once_sclock,
> + once_increments_kclock,
> NULL);
> if (!last_once_used)
> first_once_used = once_used;
> @@ -6215,7 +6390,8 @@ scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, i
> if (cnt == 1) {
> /* Need to register as once-used, in case of copy propagation */
> once_used = make_once_used(NULL, pos+i,
> - rhs_info->vclock, rhs_info->kclock, rhs_info->sclock,
> + once_vclock, once_aclock, once_kclock, once_sclock,
> + once_increments_kclock,
> NULL);
> if (!last_once_used)
> first_once_used = once_used;
> @@ -6424,6 +6600,7 @@ scheme_optimize_lets(Scheme_Object *form, Optimize_Info *info, int for_inline, i
> info->single_result = body_info->single_result;
> info->preserves_marks = body_info->preserves_marks;
> info->vclock = body_info->vclock;
> + info->aclock = body_info->aclock;
> info->kclock = body_info->kclock;
> info->sclock = body_info->sclock;
>
> @@ -6593,7 +6770,7 @@ optimize_closure_compilation(Scheme_Object *_data, Optimize_Info *info, int cont
> Scheme_Object *code, *ctx;
> Closure_Info *cl;
> mzshort dcs, *dcm;
> - int i, cnt, init_vclock, init_kclock, init_sclock;
> + int i, cnt, init_vclock, init_aclock, init_kclock, init_sclock;
> Scheme_Once_Used *first_once_used = NULL, *last_once_used = NULL;
>
> data = (Scheme_Closure_Data *)_data;
> @@ -6605,6 +6782,7 @@ optimize_closure_compilation(Scheme_Object *_data, Optimize_Info *info, int cont
> SCHEME_LAMBDA_FRAME);
>
> init_vclock = info->vclock;
> + init_aclock = info->aclock;
> init_kclock = info->kclock;
> init_sclock = info->sclock;
>
> @@ -6630,7 +6808,7 @@ optimize_closure_compilation(Scheme_Object *_data, Optimize_Info *info, int cont
> cnt = ((cl->local_flags[i] & SCHEME_USE_COUNT_MASK) >> SCHEME_USE_COUNT_SHIFT);
> if (cnt == 1) {
> last_once_used = make_once_used(NULL, i,
> - info->vclock, info->kclock, info->sclock,
> + info->vclock, info->aclock, info->kclock, info->sclock, 0,
> last_once_used);
> if (!first_once_used) first_once_used = last_once_used;
> optimize_propagate(info, i, (Scheme_Object *)last_once_used, 1);
> @@ -6684,6 +6862,7 @@ optimize_closure_compilation(Scheme_Object *_data, Optimize_Info *info, int cont
>
> /* closure itself is not an effect */
> info->vclock = init_vclock;
> + info->aclock = init_aclock;
> info->kclock = init_kclock;
> info->sclock = init_sclock;
> info->escapes = 0;
> @@ -7608,6 +7787,8 @@ Scheme_Object *scheme_optimize_expr(Scheme_Object *expr, Optimize_Info *info, in
> if (SAME_TYPE(SCHEME_TYPE(val), scheme_once_used_type)) {
> Scheme_Once_Used *o = (Scheme_Once_Used *)val;
> if (((o->vclock == info->vclock)
> + && ((o->aclock == info->aclock)
> + || !o->spans_k)
> && ((context & OPT_CONTEXT_SINGLED)
> || single_valued_noncm_expression(o->expr, 5)))
> || movable_expression(o->expr, info, o->delta, o->cross_lambda,
> @@ -7617,20 +7798,32 @@ Scheme_Object *scheme_optimize_expr(Scheme_Object *expr, Optimize_Info *info, in
> val = optimize_clone(1, o->expr, info, o->delta, 0);
> if (val) {
> int save_fuel = info->inline_fuel, save_no_types = info->no_types;
> - int save_vclock, save_kclock, save_sclock;
> + int save_vclock, save_aclock, save_kclock, save_sclock;
> info->size -= 1;
> o->used = 1;
> info->inline_fuel = 0; /* no more inlining; o->expr was already optimized */
> info->no_types = 1; /* cannot used inferred types, in case `val' inferred them */
> save_vclock = info->vclock; /* allowed to move => no change to clocks */
> + save_aclock = info->aclock;
> save_kclock = info->kclock;
> save_sclock = info->sclock;
>
> val = scheme_optimize_expr(val, info, context);
>
> + if (info->maybe_values_argument) {
> + /* Although `val` could be counted as taking 0 time, we advance
> + the clock conservatively to be consistent with `values`
> + splitting. */
> + advance_clocks_for_optimized(val,
> + &save_vclock, &save_aclock, &save_kclock, &save_sclock,
> + info,
> + ADVANCE_CLOCKS_INIT_FUEL);
> + }
> +
> info->inline_fuel = save_fuel;
> info->no_types = save_no_types;
> info->vclock = save_vclock;
> + info->aclock = save_aclock;
> info->kclock = save_kclock;
> info->sclock = save_sclock;
> return val;
> @@ -8468,7 +8661,7 @@ static void optimize_propagate(Optimize_Info *info, int pos, Scheme_Object *valu
> }
>
> static Scheme_Once_Used *make_once_used(Scheme_Object *val, int pos,
> - int vclock, int kclock, int sclock,
> + int vclock, int aclock, int kclock, int sclock, int spans_k,
> Scheme_Once_Used *prev)
> {
> Scheme_Once_Used *o;
> @@ -8479,8 +8672,10 @@ static Scheme_Once_Used *make_once_used(Scheme_Object *val, int pos,
> o->expr = val;
> o->pos = pos;
> o->vclock = vclock;
> + o->aclock = aclock;
> o->kclock = kclock;
> o->sclock = sclock;
> + o->spans_k = spans_k;
>
> if (prev)
> prev->next = o;
> @@ -8853,10 +9048,12 @@ static Optimize_Info *optimize_info_add_frame(Optimize_Info *info, int orig, int
> naya->top_level_consts = info->top_level_consts;
> naya->context = info->context;
> naya->vclock = info->vclock;
> + naya->aclock = info->aclock;
> naya->kclock = info->kclock;
> naya->sclock = info->sclock;
> naya->escapes = info->escapes;
> naya->init_kclock = info->kclock;
> + naya->maybe_values_argument = info->maybe_values_argument;
> naya->use_psize = info->use_psize;
> naya->logger = info->logger;
> naya->no_types = info->no_types;
> @@ -8888,6 +9085,7 @@ static void optimize_info_done(Optimize_Info *info, Optimize_Info *parent)
>
> parent->size += info->size;
> parent->vclock = info->vclock;
> + parent->aclock = info->aclock;
> parent->kclock = info->kclock;
> parent->sclock = info->sclock;
> parent->escapes = info->escapes;
>
> racket/src/racket/src/schpriv.h
> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> --- OLD/racket/src/racket/src/schpriv.h
> +++ NEW/racket/src/racket/src/schpriv.h
> @@ -61,25 +61,28 @@
>
>
> /* We support 2^SCHEME_PRIM_OPT_INDEX_SIZE combinations of optimization flags: */
> -#define SCHEME_PRIM_IS_UNARY_INLINED 1
> -#define SCHEME_PRIM_IS_BINARY_INLINED 2
> -#define SCHEME_PRIM_IS_NARY_INLINED 4
> -#define SCHEME_PRIM_IS_UNSAFE_OMITABLE 8
> -#define SCHEME_PRIM_IS_OMITABLE 16
> -#define SCHEME_PRIM_IS_UNSAFE_FUNCTIONAL 32
> -#define SCHEME_PRIM_WANTS_FLONUM_FIRST 64
> -#define SCHEME_PRIM_WANTS_FLONUM_SECOND 128
> -#define SCHEME_PRIM_WANTS_FLONUM_THIRD 256
> -#define SCHEME_PRIM_WANTS_EXTFLONUM_FIRST 512
> -#define SCHEME_PRIM_WANTS_EXTFLONUM_SECOND 1024
> -#define SCHEME_PRIM_WANTS_EXTFLONUM_THIRD 2048
> -#define SCHEME_PRIM_IS_UNSAFE_NONALLOCATE 4096
> -#define SCHEME_PRIM_ALWAYS_ESCAPES 8192
> -
> -#define SCHEME_PRIM_OPT_TYPE_SHIFT 14
> +#define SCHEME_PRIM_IS_UNARY_INLINED (1 << 0)
> +#define SCHEME_PRIM_IS_BINARY_INLINED (1 << 1)
> +#define SCHEME_PRIM_IS_NARY_INLINED (1 << 2)
> +#define SCHEME_PRIM_IS_UNSAFE_OMITABLE (1 << 3)
> +#define SCHEME_PRIM_IS_OMITABLE (1 << 4)
> +#define SCHEME_PRIM_IS_OMITABLE_ALLOCATION (1 << 5)
> +#define SCHEME_PRIM_IS_UNSAFE_FUNCTIONAL (1 << 6)
> +#define SCHEME_PRIM_WANTS_FLONUM_FIRST (1 << 7)
> +#define SCHEME_PRIM_WANTS_FLONUM_SECOND (1 << 8)
> +#define SCHEME_PRIM_WANTS_FLONUM_THIRD (1 << 9)
> +#define SCHEME_PRIM_WANTS_EXTFLONUM_FIRST (1 << 10)
> +#define SCHEME_PRIM_WANTS_EXTFLONUM_SECOND (1 << 11)
> +#define SCHEME_PRIM_WANTS_EXTFLONUM_THIRD (1 << 12)
> +#define SCHEME_PRIM_IS_UNSAFE_NONALLOCATE (1 << 13)
> +#define SCHEME_PRIM_ALWAYS_ESCAPES (1 << 14)
> +
> +#define SCHEME_PRIM_OPT_TYPE_SHIFT 15
> #define SCHEME_PRIM_OPT_TYPE_MASK (SCHEME_MAX_LOCAL_TYPE_MASK << SCHEME_PRIM_OPT_TYPE_SHIFT)
> #define SCHEME_PRIM_OPT_TYPE(x) ((x & SCHEME_PRIM_OPT_TYPE_MASK) >> SCHEME_PRIM_OPT_TYPE_SHIFT)
>
> +#define SCHEME_PRIM_IS_OMITABLE_ANY (SCHEME_PRIM_IS_OMITABLE | SCHEME_PRIM_IS_OMITABLE_ALLOCATION | SCHEME_PRIM_IS_UNSAFE_OMITABLE)
> +
> #define SCHEME_PRIM_PRODUCES_FLONUM (SCHEME_LOCAL_TYPE_FLONUM << SCHEME_PRIM_OPT_TYPE_SHIFT)
> #define SCHEME_PRIM_PRODUCES_FIXNUM (SCHEME_LOCAL_TYPE_FIXNUM << SCHEME_PRIM_OPT_TYPE_SHIFT)
>
>
> racket/src/racket/src/vector.c
> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> --- OLD/racket/src/racket/src/vector.c
> +++ NEW/racket/src/racket/src/vector.c
> @@ -91,7 +91,7 @@ scheme_init_vector (Scheme_Env *env)
> SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
> | SCHEME_PRIM_IS_BINARY_INLINED
> | SCHEME_PRIM_IS_NARY_INLINED
> - | SCHEME_PRIM_IS_OMITABLE);
> + | SCHEME_PRIM_IS_OMITABLE_ALLOCATION);
> scheme_add_global_constant("vector", p, env);
>
> REGISTER_SO(scheme_vector_immutable_proc);
> @@ -100,7 +100,7 @@ scheme_init_vector (Scheme_Env *env)
> SCHEME_PRIM_PROC_FLAGS(p) |= scheme_intern_prim_opt_flags(SCHEME_PRIM_IS_UNARY_INLINED
> | SCHEME_PRIM_IS_BINARY_INLINED
> | SCHEME_PRIM_IS_NARY_INLINED
> - | SCHEME_PRIM_IS_OMITABLE);
> + | SCHEME_PRIM_IS_OMITABLE_ALLOCATION);
> scheme_add_global_constant("vector-immutable", p, env);
>
> p = scheme_make_folding_prim(vector_length, "vector-length", 1, 1, 1);