We have a function:
void fdouble (double one, ...)
We generate RTL that looks like
;; Save R3 into the varargs area.
(insn 4 2 6 (set (mem:SI (reg/f:SI 77 virtual-incoming-args) 2)
(reg:SI 3 r3)) -1 (nil)
(nil))
...
;; Load `one' from the saved value.
(insn 20 18 21 (set (reg/v:DF 82)
(mem/f:DF (reg/f:SI 77 virtual-incoming-args) 3)) -1 (nil)
(nil))
This is bogus because the two MEMs are in different alias sets. So,
the scheduler reorders the load and store.
The bug comes from this hunk in rs6000.c:setup_incoming_varargs:
set = get_varargs_alias_set ();
if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG)
{
mem = gen_rtx_MEM (BLKmode,
plus_constant (save_area,
first_reg_offset * reg_size)),
MEM_ALIAS_SET (mem) = set;
move_block_from_reg
(GP_ARG_MIN_REG + first_reg_offset, mem,
GP_ARG_NUM_REG - first_reg_offset,
(GP_ARG_NUM_REG - first_reg_offset) * UNITS_PER_WORD);
The first thing we're storing is the last named parameter.
Certainly, one safe way to fix the bug is to not use the alias set
here. If I don't hear a better solution, I'll do that on the release
branch at least.
I *think* that what we want to do is store only the last named
parameter in alias set zero. If so, then something like the attached
patch should be right. (It needs cleaning up; for one thing, it's
appalling that we don't have FN_TYPE_STDARGS_P and FN_TYPE_VARARGS_P
macros.)
This patch does seem to fix David's test-case.
I would appreciate two things:
- David, would you mind firing off an AIX bootstrap to test
the patch?
- Richard, since you seem to have introduced the alias set stuff
(in revision 1.88), would you care to comment?
Thanks in advance to both,
--
Mark Mitchell ma...@codesourcery.com
CodeSourcery, LLC http://www.codesourcery.com
Index: rs6000.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.c,v
retrieving revision 1.167
diff -c -p -r1.167 rs6000.c
*** rs6000.c 2001/02/09 03:15:56 1.167
--- rs6000.c 2001/04/25 18:45:17
*************** setup_incoming_varargs (cum, mode, type,
*** 2208,2230 ****
int reg_size = TARGET_32BIT ? 4 : 8;
rtx save_area, mem;
int first_reg_offset, set;
if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS)
{
- tree fntype;
- int stdarg_p;
-
- fntype = TREE_TYPE (current_function_decl);
- stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
- && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
- != void_type_node));
-
- /* For varargs, we do not want to skip the dummy va_dcl argument.
- For stdargs, we do want to skip the last named argument. */
- next_cum = *cum;
- if (stdarg_p)
- function_arg_advance (&next_cum, mode, type, 1);
-
/* Indicate to allocate space on the stack for varargs save area. */
/* ??? Does this really have to be located at a magic spot on the
stack, or can we allocate this with assign_stack_local instead. */
--- 2208,2229 ----
int reg_size = TARGET_32BIT ? 4 : 8;
rtx save_area, mem;
int first_reg_offset, set;
+ tree fntype;
+ int stdarg_p;
+ fntype = TREE_TYPE (current_function_decl);
+ stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
+ && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
+ != void_type_node));
+
+ /* For varargs, we do not want to skip the dummy va_dcl argument.
+ For stdargs, we do want to skip the last named argument. */
+ next_cum = *cum;
+ if (stdarg_p)
+ function_arg_advance (&next_cum, mode, type, 1);
+
if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS)
{
/* Indicate to allocate space on the stack for varargs save area. */
/* ??? Does this really have to be located at a magic spot on the
stack, or can we allocate this with assign_stack_local instead. */
*************** setup_incoming_varargs (cum, mode, type,
*** 2239,2248 ****
{
save_area = virtual_incoming_args_rtx;
cfun->machine->sysv_varargs_p = 0;
! first_reg_offset = cum->words;
! if (MUST_PASS_IN_STACK (mode, type))
! first_reg_offset += RS6000_ARG_SIZE (TYPE_MODE (type), type, 1);
}
set = get_varargs_alias_set ();
--- 2238,2256 ----
{
save_area = virtual_incoming_args_rtx;
cfun->machine->sysv_varargs_p = 0;
+
+ first_reg_offset = next_cum.words;
! if (next_cum.words != cum->words)
! {
! mem = gen_rtx_MEM (BLKmode,
! plus_constant (save_area,
! cum->words * reg_size));
! move_block_from_reg
! (GP_ARG_MIN_REG + cum->words, mem,
! first_reg_offset - cum->words,
! (first_reg_offset - cum->words) * UNITS_PER_WORD);
! }
}
set = get_varargs_alias_set ();
Richard> I havn't figured out why this is happening yet; I'll keep
Richard> poking.
Thanks!
> On Wed, Apr 25, 2001 at 11:54:28AM -0700, Mark Mitchell wrote:
> > The first thing we're storing is the last named parameter.
>
> I'm pretty sure this is the actual bug. Only the actual variadic
> arguments should be treated with the varargs alias set. Moreover,
> why in the world would we bother dumping named arguments to the
> stack like this only to read them back in?
>
> I havn't figured out why this is happening yet; I'll keep poking.
It's probably because of STRICT_ARGUMENT_NAMING. This is a
compatibility hack that causes the last argument to a varargs function
to be treated as unnamed, because that's how GCC used to do it and
some backends rely on it (it could change the ABI).
--
- Geoffrey Keating <geo...@geoffk.org>
Our discussion is up to:
> From: Dale Johannesen <da...@apple.com>
> All right, thanks for looking. I believe STRICT_ARGUMENT_NAMING
> should be used throughout, and I could write it that way. However,
> I am not in a position to test it on anything but MacOS. Is that
> what you want?
So perhaps if someone could help Dale with testing his patch on AIX
(he can of course build a cross-compiler to test it on ELF, assuming
that the code he's touching can even be run under ELF) he could
make more progress?
I've set up unnamed parameters (those matching ...) to be passed only in
Rn, not in Fn as well. This works with the usual receiving mechanism in
the callee, which stores all Rn to memory at the beginning. If somebody
needs them passed in Fn as well for some reason, there are (I think) 2
places you have to change, which are indicated in comments.
Index: rs6000.c
===================================================================
RCS file: /cvs/repository/CoreTools/gcc3/gcc/config/rs6000/rs6000.c,v
retrieving revision 1.18
diff -u -d -b -w -p -r1.18 rs6000.c
--- rs6000.c 2001/04/17 02:14:18 1.18
+++ rs6000.c 2001/04/25 23:51:26
@@ -2004,12 +2004,10 @@ function_arg_advance (cum, mode, type, n
&& function_arg_boundary (mode, type) == 64) ? 1 : 0;
cum->words += align;
- if (named)
- {
- cum->words += RS6000_ARG_SIZE (mode, type, named);
- if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_HARD_FLOAT)
+ cum->words += RS6000_ARG_SIZE (mode, type, 1);
+ /* Unnamed params are not passed in FP regs. */
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_HARD_FLOAT &&
named)
cum->fregno++;
- }
if (TARGET_DEBUG_ARG)
{
@@ -2113,13 +2111,11 @@ function_arg (cum, mode, type, named)
&& function_arg_boundary (mode, type) == 64) ? 1 : 0;
int align_words = cum->words + align;
- if (! named)
- return NULL_RTX;
-
if (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
return NULL_RTX;
- if (USE_FP_FOR_ARG_P (*cum, mode, type))
+ /* unnamed args should be passed in Rn regardless of type. */
+ if (USE_FP_FOR_ARG_P (*cum, mode, type) && named )
{
if (! type
|| ((cum->nargs_prototype > 0)
@@ -2169,20 +2165,17 @@ function_arg_partial_nregs (cum, mode, t
tree type;
int named;
{
- if (! named)
- return 0;
-
if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS)
return 0;
- if (USE_FP_FOR_ARG_P (*cum, mode, type))
+ if (USE_FP_FOR_ARG_P (*cum, mode, type) && named )
{
if (cum->nargs_prototype >= 0)
return 0;
}
if (cum->words < GP_ARG_NUM_REG
- && GP_ARG_NUM_REG < (cum->words + RS6000_ARG_SIZE (mode, type,
named)))
+ && GP_ARG_NUM_REG < (cum->words + RS6000_ARG_SIZE (mode, type,
1)))
{
int ret = GP_ARG_NUM_REG - cum->words;
if (ret && TARGET_DEBUG_ARG)
@@ -2249,9 +2242,6 @@ setup_incoming_varargs (cum, mode, type,
int reg_size = TARGET_32BIT ? 4 : 8;
rtx save_area = NULL_RTX, mem;
int first_reg_offset, set;
-
- if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS)
- {
tree fntype;
int stdarg_p;
@@ -2266,6 +2256,8 @@ setup_incoming_varargs (cum, mode, type,
if (stdarg_p)
function_arg_advance (&next_cum, mode, type, 1);
+ if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS)
+ {
/* Indicate to allocate space on the stack for varargs save
area. */
/* ??? Does this really have to be located at a magic spot on the
stack, or can we allocate this with assign_stack_local
instead. */
@@ -2278,10 +2270,10 @@ setup_incoming_varargs (cum, mode, type,
}
else
{
+ first_reg_offset = next_cum.words;
save_area = virtual_incoming_args_rtx;
cfun->machine->sysv_varargs_p = 0;
- first_reg_offset = cum->words;
if (MUST_PASS_IN_STACK (mode, type))
first_reg_offset += RS6000_ARG_SIZE (TYPE_MODE (type), type, 1);
}
Index: rs6000.h
===================================================================
RCS file: /cvs/repository/CoreTools/gcc3/gcc/config/rs6000/rs6000.h,v
retrieving revision 1.6
diff -u -d -b -w -p -r1.6 rs6000.h
--- rs6000.h 2001/04/03 01:53:17 1.6
+++ rs6000.h 2001/04/25 23:51:26
@@ -205,6 +205,12 @@ extern int target_flags;
/* Nonzero if we need to schedule the prolog and epilog. */
#define MASK_SCHED_PROLOG 0x00040000
+/* This causes the last named parameter to a varargs function to
+ be treated as an ordinary parameter, not as part of the varargs
+ area. Slightly improves generated code, and fixes a bug that
+ shows up in 980205.c. Does not affect SVR4 or Solaris. */
+#define STRICT_ARGUMENT_NAMING 1
+
#define TARGET_POWER (target_flags & MASK_POWER)
#define TARGET_POWER2 (target_flags & MASK_POWER2)
#define TARGET_POWERPC (target_flags & MASK_POWERPC)
I'm pretty sure this is the actual bug. Only the actual variadic
arguments should be treated with the varargs alias set. Moreover,
why in the world would we bother dumping named arguments to the
stack like this only to read them back in?
I havn't figured out why this is happening yet; I'll keep poking.
r~
>>>>>> Richard Henderson writes:
>
> Richard> Where is "one" placed when calling the function
>
> Richard> void foo(double one, ...);
>
> FP arguments are passed in both the FPR argument registers and
> GPR argument registers. "one" is passed in fpr1 and gpr3/gpr4 pair.
> "two" is passed in fpr2 and gpr5/gpr6 pair (for 32-bit mode code, one
> GPR
> for 64-bit code).
Does anyone know of a good reason to put "two" in fpr2? The callee code
does not look there for it.
Richard> Ug. It does indeed change the abi. What a crock. Well, any
Richard> idea how xlc treats this situation? I suspect they don't load
Richard> do this sort of thing, at which point we can make this change
Richard> as a compatability bug fix.
What specific informatin do you want/need about IBM's Visual Age
compilers for AIX?
David
The test case from the PR should be ok. Just need to check
where the native compiler has main place the first argument
to fdouble.
r~
I'm not sure. If you can think of a testcase I can try it on bluey
and see what the native compiler there thinks.
--
- Geoffrey Keating <geo...@geoffk.org>
Indeed it is.
> This is a compatibility hack that causes the last argument to a
> varargs function to be treated as unnamed, because that's how GCC
> used to do it and some backends rely on it (it could change the ABI).
Ug. It does indeed change the abi. What a crock. Well, any
idea how xlc treats this situation? I suspect they don't load
do this sort of thing, at which point we can make this change
as a compatability bug fix.
r~
Where is "one" placed when calling the function
void foo(double one, ...);
r~
In theory we can assemble enough pieces to get a full eabi cross
toolchain (and take care of the few remaining "what's Darwin?" reactions
from retro-conf packages), and it would be handy to have for this
kind of testing.
Stan
Here's what it does for this code:
void v(double a, ...);
void foo(void)
{
v (1.0, 2.0);
}
#comments are mine:
l r31,T.16.NO_SYMBOL(RTOC)
cau r3,r0,0x3ff0 # load r3/r4 with 1.0
cal r6,0(r0)
oril r4,r6,0x0000
cau r5,r0,0x4000 # load r5/r6 with 2.0
lfs fp1,0(r31) # load fp1 with 1.0
lfs fp2,4(r31) # load fp2 with 2.0
bl .v{PR} # make the call
That is, it seems to place all values in both FPRs and GPRs.
David
This seems OK---in any case, if we added such an optimisation, it
should be a separate patch, not as part of a bug fix. So could you
look at Dale's second patch? It should generate equivalent code in
all situations but for fixing the aliasing bug.
>
> On Wednesday, April 25, 2001, at 06:30 PM, David Edelsohn wrote:
>
>> This does not seem correct to me. The question is not where the
>> receiver looks for the arguments, but what the ABI says. The ABI (for
>> AIX
>> at least) says that it must be placed in both locations.
>
> True, but I don't think that's necessarily an overriding consideration,
> IF no runtime compatability problems can arise. According to its
> docs, anyway, gcc has been willing to tolerate some actual runtime
> incompatiblities (see the chapter "Interfacing to GCC Output" for
> examples). If that's acceptable, this difference [which is harmless,
> and results in more efficient code generation in a common case, namely
> printf("%f")] should be also, IMHO. To be sure, my main job at the
> moment is improving the quality of the generated code, which may bias
> me a bit.:)
>
> However, I am not the final arbiter. So I will also do it in full
> pedantic ABI conformance, and whoever has authority can take their pick.
And here it is. Bootstrap & check pass on Darwin. Same caveats as
previous attempt.
2001-04-26 Dale Johannesen <da...@apple.com>
* config/rs6000/rs6000.h: enable STRICT_ARGUMENT_NAMING
* config/rs6000/rs6000.c (function_arg_advance, function_arg,
function_arg_partial_nregs, setup_incoming_varargs): change
code generation around call to be correct for varargs when
STRICT_ARGUMENT_NAMING is on
Index: rs6000.c
===================================================================
RCS file: /cvs/repository/CoreTools/gcc3/gcc/config/rs6000/rs6000.c,v
retrieving revision 1.18
diff -u -d -b -w -r1.18 rs6000.c
--- rs6000.c 2001/04/17 02:14:18 1.18
+++ rs6000.c 2001/04/26 20:38:15
@@ -2004,12 +2004,9 @@
&& function_arg_boundary (mode, type) == 64) ? 1 : 0;
cum->words += align;
- if (named)
- {
- cum->words += RS6000_ARG_SIZE (mode, type, named);
+ cum->words += RS6000_ARG_SIZE (mode, type, 1);
if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_HARD_FLOAT)
cum->fregno++;
- }
if (TARGET_DEBUG_ARG)
{
@@ -2113,9 +2110,6 @@
&& function_arg_boundary (mode, type) == 64) ? 1 : 0;
int align_words = cum->words + align;
- if (! named)
- return NULL_RTX;
-
if (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
return NULL_RTX;
@@ -2169,9 +2163,6 @@
tree type;
int named;
{
- if (! named)
- return 0;
-
if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS)
return 0;
@@ -2182,7 +2173,7 @@
}
if (cum->words < GP_ARG_NUM_REG
- && GP_ARG_NUM_REG < (cum->words + RS6000_ARG_SIZE (mode, type,
named)))
+ && GP_ARG_NUM_REG < (cum->words + RS6000_ARG_SIZE (mode, type,
1)))
{
int ret = GP_ARG_NUM_REG - cum->words;
if (ret && TARGET_DEBUG_ARG)
@@ -2249,9 +2240,6 @@
int reg_size = TARGET_32BIT ? 4 : 8;
rtx save_area = NULL_RTX, mem;
int first_reg_offset, set;
-
- if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS)
- {
tree fntype;
int stdarg_p;
@@ -2266,6 +2254,8 @@
if (stdarg_p)
function_arg_advance (&next_cum, mode, type, 1);
+ if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS)
+ {
/* Indicate to allocate space on the stack for varargs save
area. */
/* ??? Does this really have to be located at a magic spot on the
stack, or can we allocate this with assign_stack_local
instead. */
@@ -2278,10 +2268,10 @@
}
else
{
+ first_reg_offset = next_cum.words;
save_area = virtual_incoming_args_rtx;
cfun->machine->sysv_varargs_p = 0;
- first_reg_offset = cum->words;
if (MUST_PASS_IN_STACK (mode, type))
first_reg_offset += RS6000_ARG_SIZE (TYPE_MODE (type), type, 1);
}
Index: rs6000.h
===================================================================
RCS file: /cvs/repository/CoreTools/gcc3/gcc/config/rs6000/rs6000.h,v
retrieving revision 1.6
diff -u -d -b -w -r1.6 rs6000.h
--- rs6000.h 2001/04/03 01:53:17 1.6
+++ rs6000.h 2001/04/26 20:38:15
@@ -205,6 +205,12 @@
I will check into this further next week, but I do not believe
that the compiler has any leeway in the implementation. Improving code
quality is an optimization. One should not hard-code optimizations like
this; the compiler must deduce the optimization when the information
allows it to do so.
David