Improve ClampDoubleToUint8 on ia32/x64 (issue 11190049)

21 views
Skip to first unread message

shd...@gmail.com

unread,
Oct 19, 2012, 1:20:59 AM10/19/12
to yan...@chromium.org, sven...@chromium.org, v8-...@googlegroups.com, zheng...@intel.com
Reviewers: Yang, Sven Panne,

Message:
PTAL

Zheng Liu
zheng...@intel.com

Description:
Improve ClampDoubleToUint8 on ia32/x64.
It's measured faster when:
a) clamp never happens;
b) clamp random happens ([-128,384], pseudo random).

Please review this at https://codereview.chromium.org/11190049/

SVN Base: http://v8.googlecode.com/svn/branches/bleeding_edge/

Affected files:
M src/ia32/lithium-ia32.cc
M src/ia32/macro-assembler-ia32.cc
M src/x64/lithium-codegen-x64.cc
M src/x64/lithium-x64.h
M src/x64/lithium-x64.cc
M src/x64/macro-assembler-x64.h
M src/x64/macro-assembler-x64.cc


Index: src/ia32/lithium-ia32.cc
===================================================================
--- src/ia32/lithium-ia32.cc (revision 12749)
+++ src/ia32/lithium-ia32.cc (working copy)
@@ -1779,7 +1779,7 @@
Representation input_rep = value->representation();
if (input_rep.IsDouble()) {
LOperand* reg = UseRegister(value);
- return DefineAsRegister(new(zone()) LClampDToUint8(reg));
+ return DefineFixed(new(zone()) LClampDToUint8(reg), eax);
} else if (input_rep.IsInteger32()) {
LOperand* reg = UseFixed(value, eax);
return DefineFixed(new(zone()) LClampIToUint8(reg), eax);
Index: src/ia32/macro-assembler-ia32.cc
===================================================================
--- src/ia32/macro-assembler-ia32.cc (revision 12749)
+++ src/ia32/macro-assembler-ia32.cc (working copy)
@@ -129,14 +129,22 @@
XMMRegister scratch_reg,
Register result_reg) {
Label done;
- ExternalReference zero_ref = ExternalReference::address_of_zero();
- movdbl(scratch_reg, Operand::StaticVariable(zero_ref));
+ Label conv_failure;
+ pxor(scratch_reg, scratch_reg);
+ cvtsd2si(result_reg, input_reg);
+ test(result_reg, Immediate(0xFFFFFF00));
+ j(zero, &done, Label::kNear);
+ cmp(result_reg, Immediate(0x80000000));
+ j(equal, &conv_failure, Label::kNear);
+ mov(result_reg, Immediate(0));
+ setcc(above, result_reg);
+ sub(result_reg, Immediate(1));
+ and_(result_reg, Immediate(255));
+ jmp(&done, Label::kNear);
+ bind(&conv_failure);
Set(result_reg, Immediate(0));
ucomisd(input_reg, scratch_reg);
j(below, &done, Label::kNear);
- cvtsd2si(result_reg, input_reg);
- test(result_reg, Immediate(0xFFFFFF00));
- j(zero, &done, Label::kNear);
Set(result_reg, Immediate(255));
bind(&done);
}
Index: src/x64/lithium-codegen-x64.cc
===================================================================
--- src/x64/lithium-codegen-x64.cc (revision 12749)
+++ src/x64/lithium-codegen-x64.cc (working copy)
@@ -4490,8 +4490,7 @@
void LCodeGen::DoClampDToUint8(LClampDToUint8* instr) {
XMMRegister value_reg = ToDoubleRegister(instr->unclamped());
Register result_reg = ToRegister(instr->result());
- Register temp_reg = ToRegister(instr->temp());
- __ ClampDoubleToUint8(value_reg, xmm0, result_reg, temp_reg);
+ __ ClampDoubleToUint8(value_reg, xmm0, result_reg);
}


@@ -4505,8 +4504,7 @@
void LCodeGen::DoClampTToUint8(LClampTToUint8* instr) {
ASSERT(instr->unclamped()->Equals(instr->result()));
Register input_reg = ToRegister(instr->unclamped());
- Register temp_reg = ToRegister(instr->temp());
- XMMRegister temp_xmm_reg = ToDoubleRegister(instr->temp2());
+ XMMRegister temp_xmm_reg = ToDoubleRegister(instr->temp_xmm());
Label is_smi, done, heap_number;

__ JumpIfSmi(input_reg, &is_smi);
@@ -4526,7 +4524,7 @@
// Heap number
__ bind(&heap_number);
__ movsd(xmm0, FieldOperand(input_reg, HeapNumber::kValueOffset));
- __ ClampDoubleToUint8(xmm0, temp_xmm_reg, input_reg, temp_reg);
+ __ ClampDoubleToUint8(xmm0, temp_xmm_reg, input_reg);
__ jmp(&done, Label::kNear);

// smi
Index: src/x64/lithium-x64.cc
===================================================================
--- src/x64/lithium-x64.cc (revision 12749)
+++ src/x64/lithium-x64.cc (working copy)
@@ -1697,8 +1697,7 @@
Representation input_rep = value->representation();
LOperand* reg = UseRegister(value);
if (input_rep.IsDouble()) {
- return DefineAsRegister(new(zone()) LClampDToUint8(reg,
- TempRegister()));
+ return DefineAsRegister(new(zone()) LClampDToUint8(reg));
} else if (input_rep.IsInteger32()) {
return DefineSameAsFirst(new(zone()) LClampIToUint8(reg));
} else {
@@ -1706,7 +1705,6 @@
// Register allocator doesn't (yet) support allocation of double
// temps. Reserve xmm1 explicitly.
LClampTToUint8* result = new(zone()) LClampTToUint8(reg,
- TempRegister(),
FixedTemp(xmm1));
return AssignEnvironment(DefineSameAsFirst(result));
}
Index: src/x64/lithium-x64.h
===================================================================
--- src/x64/lithium-x64.h (revision 12749)
+++ src/x64/lithium-x64.h (working copy)
@@ -2138,15 +2138,13 @@
};


-class LClampDToUint8: public LTemplateInstruction<1, 1, 1> {
+class LClampDToUint8: public LTemplateInstruction<1, 1, 0> {
public:
- LClampDToUint8(LOperand* unclamped, LOperand* temp) {
+ explicit LClampDToUint8(LOperand* unclamped) {
inputs_[0] = unclamped;
- temps_[0] = temp;
}

LOperand* unclamped() { return inputs_[0]; }
- LOperand* temp() { return temps_[0]; }

DECLARE_CONCRETE_INSTRUCTION(ClampDToUint8, "clamp-d-to-uint8")
};
@@ -2164,19 +2162,16 @@
};


-class LClampTToUint8: public LTemplateInstruction<1, 1, 2> {
+class LClampTToUint8: public LTemplateInstruction<1, 1, 1> {
public:
LClampTToUint8(LOperand* unclamped,
- LOperand* temp,
- LOperand* temp2) {
+ LOperand* temp_xmm) {
inputs_[0] = unclamped;
- temps_[0] = temp;
- temps_[1] = temp2;
+ temps_[0] = temp_xmm;
}

LOperand* unclamped() { return inputs_[0]; }
- LOperand* temp() { return temps_[0]; }
- LOperand* temp2() { return temps_[1]; }
+ LOperand* temp_xmm() { return temps_[0]; }

DECLARE_CONCRETE_INSTRUCTION(ClampTToUint8, "clamp-t-to-uint8")
};
Index: src/x64/macro-assembler-x64.cc
===================================================================
--- src/x64/macro-assembler-x64.cc (revision 12749)
+++ src/x64/macro-assembler-x64.cc (working copy)
@@ -2868,16 +2868,24 @@

void MacroAssembler::ClampDoubleToUint8(XMMRegister input_reg,
XMMRegister temp_xmm_reg,
- Register result_reg,
- Register temp_reg) {
+ Register result_reg) {
Label done;
- Set(result_reg, 0);
+ Label conv_failure;
xorps(temp_xmm_reg, temp_xmm_reg);
- ucomisd(input_reg, temp_xmm_reg);
- j(below, &done, Label::kNear);
cvtsd2si(result_reg, input_reg);
testl(result_reg, Immediate(0xFFFFFF00));
j(zero, &done, Label::kNear);
+ cmpl(result_reg, Immediate(0x80000000));
+ j(equal, &conv_failure, Label::kNear);
+ movl(result_reg, Immediate(0));
+ setcc(above, result_reg);
+ subl(result_reg, Immediate(1));
+ andl(result_reg, Immediate(255));
+ jmp(&done, Label::kNear);
+ bind(&conv_failure);
+ Set(result_reg, 0);
+ ucomisd(input_reg, temp_xmm_reg);
+ j(below, &done, Label::kNear);
Set(result_reg, 255);
bind(&done);
}
Index: src/x64/macro-assembler-x64.h
===================================================================
--- src/x64/macro-assembler-x64.h (revision 12749)
+++ src/x64/macro-assembler-x64.h (working copy)
@@ -942,8 +942,7 @@

void ClampDoubleToUint8(XMMRegister input_reg,
XMMRegister temp_xmm_reg,
- Register result_reg,
- Register temp_reg);
+ Register result_reg);

void LoadUint32(XMMRegister dst, Register src, XMMRegister scratch);



yan...@chromium.org

unread,
Oct 19, 2012, 5:29:37 AM10/19/12
to shd...@gmail.com, sven...@chromium.org, v8-...@googlegroups.com, zheng...@intel.com
On 2012/10/19 05:20:58, Zheng Liu wrote:
> PTAL

> Zheng Liu
> mailto:zheng...@intel.com

LGTM so far, but could you please provide a benchmark that shows the
improvement?

https://codereview.chromium.org/11190049/

yan...@chromium.org

unread,
Oct 19, 2012, 5:29:45 AM10/19/12
to shd...@gmail.com, sven...@chromium.org, v8-...@googlegroups.com, zheng...@intel.com

https://codereview.chromium.org/11190049/diff/1/src/ia32/lithium-ia32.cc
File src/ia32/lithium-ia32.cc (right):

https://codereview.chromium.org/11190049/diff/1/src/ia32/lithium-ia32.cc#newcode1782
src/ia32/lithium-ia32.cc:1782: return DefineFixed(new(zone())
LClampDToUint8(reg), eax);
I assume that you have to use EAX because SETcc needs to access the
least significant byte, ruling out ESI and EDI?

https://codereview.chromium.org/11190049/

shd...@gmail.com

unread,
Oct 19, 2012, 6:25:48 AM10/19/12
to yan...@chromium.org, sven...@chromium.org, v8-...@googlegroups.com, zheng...@intel.com
Yes.
And I sent a micro benchmark to you (with a 2MiB attachment).

http://codereview.chromium.org/11190049/

yan...@chromium.org

unread,
Oct 19, 2012, 9:21:12 AM10/19/12
to shd...@gmail.com, sven...@chromium.org, v8-...@googlegroups.com, zheng...@intel.com
Thanks for this patch! I landed this as r12777.

http://codereview.chromium.org/11190049/
Reply all
Reply to author
Forward
0 new messages