Reviewers: Yang, Sven Panne,
Message:
PTAL
Zheng Liu
zheng...@intel.com
Description:
Improve ClampDoubleToUint8 on ia32/x64.
It's measured faster when:
a) clamp never happens;
b) clamp random happens ([-128,384], pseudo random).
Please review this at
https://codereview.chromium.org/11190049/
SVN Base:
http://v8.googlecode.com/svn/branches/bleeding_edge/
Affected files:
M src/ia32/lithium-ia32.cc
M src/ia32/macro-assembler-ia32.cc
M src/x64/lithium-codegen-x64.cc
M src/x64/lithium-x64.h
M src/x64/lithium-x64.cc
M src/x64/macro-assembler-x64.h
M src/x64/macro-assembler-x64.cc
Index: src/ia32/lithium-ia32.cc
===================================================================
--- src/ia32/lithium-ia32.cc (revision 12749)
+++ src/ia32/lithium-ia32.cc (working copy)
@@ -1779,7 +1779,7 @@
Representation input_rep = value->representation();
if (input_rep.IsDouble()) {
LOperand* reg = UseRegister(value);
- return DefineAsRegister(new(zone()) LClampDToUint8(reg));
+ return DefineFixed(new(zone()) LClampDToUint8(reg), eax);
} else if (input_rep.IsInteger32()) {
LOperand* reg = UseFixed(value, eax);
return DefineFixed(new(zone()) LClampIToUint8(reg), eax);
Index: src/ia32/macro-assembler-ia32.cc
===================================================================
--- src/ia32/macro-assembler-ia32.cc (revision 12749)
+++ src/ia32/macro-assembler-ia32.cc (working copy)
@@ -129,14 +129,22 @@
XMMRegister scratch_reg,
Register result_reg) {
Label done;
- ExternalReference zero_ref = ExternalReference::address_of_zero();
- movdbl(scratch_reg, Operand::StaticVariable(zero_ref));
+ Label conv_failure;
+ pxor(scratch_reg, scratch_reg);
+ cvtsd2si(result_reg, input_reg);
+ test(result_reg, Immediate(0xFFFFFF00));
+ j(zero, &done, Label::kNear);
+ cmp(result_reg, Immediate(0x80000000));
+ j(equal, &conv_failure, Label::kNear);
+ mov(result_reg, Immediate(0));
+ setcc(above, result_reg);
+ sub(result_reg, Immediate(1));
+ and_(result_reg, Immediate(255));
+ jmp(&done, Label::kNear);
+ bind(&conv_failure);
Set(result_reg, Immediate(0));
ucomisd(input_reg, scratch_reg);
j(below, &done, Label::kNear);
- cvtsd2si(result_reg, input_reg);
- test(result_reg, Immediate(0xFFFFFF00));
- j(zero, &done, Label::kNear);
Set(result_reg, Immediate(255));
bind(&done);
}
Index: src/x64/lithium-codegen-x64.cc
===================================================================
--- src/x64/lithium-codegen-x64.cc (revision 12749)
+++ src/x64/lithium-codegen-x64.cc (working copy)
@@ -4490,8 +4490,7 @@
void LCodeGen::DoClampDToUint8(LClampDToUint8* instr) {
XMMRegister value_reg = ToDoubleRegister(instr->unclamped());
Register result_reg = ToRegister(instr->result());
- Register temp_reg = ToRegister(instr->temp());
- __ ClampDoubleToUint8(value_reg, xmm0, result_reg, temp_reg);
+ __ ClampDoubleToUint8(value_reg, xmm0, result_reg);
}
@@ -4505,8 +4504,7 @@
void LCodeGen::DoClampTToUint8(LClampTToUint8* instr) {
ASSERT(instr->unclamped()->Equals(instr->result()));
Register input_reg = ToRegister(instr->unclamped());
- Register temp_reg = ToRegister(instr->temp());
- XMMRegister temp_xmm_reg = ToDoubleRegister(instr->temp2());
+ XMMRegister temp_xmm_reg = ToDoubleRegister(instr->temp_xmm());
Label is_smi, done, heap_number;
__ JumpIfSmi(input_reg, &is_smi);
@@ -4526,7 +4524,7 @@
// Heap number
__ bind(&heap_number);
__ movsd(xmm0, FieldOperand(input_reg, HeapNumber::kValueOffset));
- __ ClampDoubleToUint8(xmm0, temp_xmm_reg, input_reg, temp_reg);
+ __ ClampDoubleToUint8(xmm0, temp_xmm_reg, input_reg);
__ jmp(&done, Label::kNear);
// smi
Index: src/x64/lithium-x64.cc
===================================================================
--- src/x64/lithium-x64.cc (revision 12749)
+++ src/x64/lithium-x64.cc (working copy)
@@ -1697,8 +1697,7 @@
Representation input_rep = value->representation();
LOperand* reg = UseRegister(value);
if (input_rep.IsDouble()) {
- return DefineAsRegister(new(zone()) LClampDToUint8(reg,
- TempRegister()));
+ return DefineAsRegister(new(zone()) LClampDToUint8(reg));
} else if (input_rep.IsInteger32()) {
return DefineSameAsFirst(new(zone()) LClampIToUint8(reg));
} else {
@@ -1706,7 +1705,6 @@
// Register allocator doesn't (yet) support allocation of double
// temps. Reserve xmm1 explicitly.
LClampTToUint8* result = new(zone()) LClampTToUint8(reg,
- TempRegister(),
FixedTemp(xmm1));
return AssignEnvironment(DefineSameAsFirst(result));
}
Index: src/x64/lithium-x64.h
===================================================================
--- src/x64/lithium-x64.h (revision 12749)
+++ src/x64/lithium-x64.h (working copy)
@@ -2138,15 +2138,13 @@
};
-class LClampDToUint8: public LTemplateInstruction<1, 1, 1> {
+class LClampDToUint8: public LTemplateInstruction<1, 1, 0> {
public:
- LClampDToUint8(LOperand* unclamped, LOperand* temp) {
+ explicit LClampDToUint8(LOperand* unclamped) {
inputs_[0] = unclamped;
- temps_[0] = temp;
}
LOperand* unclamped() { return inputs_[0]; }
- LOperand* temp() { return temps_[0]; }
DECLARE_CONCRETE_INSTRUCTION(ClampDToUint8, "clamp-d-to-uint8")
};
@@ -2164,19 +2162,16 @@
};
-class LClampTToUint8: public LTemplateInstruction<1, 1, 2> {
+class LClampTToUint8: public LTemplateInstruction<1, 1, 1> {
public:
LClampTToUint8(LOperand* unclamped,
- LOperand* temp,
- LOperand* temp2) {
+ LOperand* temp_xmm) {
inputs_[0] = unclamped;
- temps_[0] = temp;
- temps_[1] = temp2;
+ temps_[0] = temp_xmm;
}
LOperand* unclamped() { return inputs_[0]; }
- LOperand* temp() { return temps_[0]; }
- LOperand* temp2() { return temps_[1]; }
+ LOperand* temp_xmm() { return temps_[0]; }
DECLARE_CONCRETE_INSTRUCTION(ClampTToUint8, "clamp-t-to-uint8")
};
Index: src/x64/macro-assembler-x64.cc
===================================================================
--- src/x64/macro-assembler-x64.cc (revision 12749)
+++ src/x64/macro-assembler-x64.cc (working copy)
@@ -2868,16 +2868,24 @@
void MacroAssembler::ClampDoubleToUint8(XMMRegister input_reg,
XMMRegister temp_xmm_reg,
- Register result_reg,
- Register temp_reg) {
+ Register result_reg) {
Label done;
- Set(result_reg, 0);
+ Label conv_failure;
xorps(temp_xmm_reg, temp_xmm_reg);
- ucomisd(input_reg, temp_xmm_reg);
- j(below, &done, Label::kNear);
cvtsd2si(result_reg, input_reg);
testl(result_reg, Immediate(0xFFFFFF00));
j(zero, &done, Label::kNear);
+ cmpl(result_reg, Immediate(0x80000000));
+ j(equal, &conv_failure, Label::kNear);
+ movl(result_reg, Immediate(0));
+ setcc(above, result_reg);
+ subl(result_reg, Immediate(1));
+ andl(result_reg, Immediate(255));
+ jmp(&done, Label::kNear);
+ bind(&conv_failure);
+ Set(result_reg, 0);
+ ucomisd(input_reg, temp_xmm_reg);
+ j(below, &done, Label::kNear);
Set(result_reg, 255);
bind(&done);
}
Index: src/x64/macro-assembler-x64.h
===================================================================
--- src/x64/macro-assembler-x64.h (revision 12749)
+++ src/x64/macro-assembler-x64.h (working copy)
@@ -942,8 +942,7 @@
void ClampDoubleToUint8(XMMRegister input_reg,
XMMRegister temp_xmm_reg,
- Register result_reg,
- Register temp_reg);
+ Register result_reg);
void LoadUint32(XMMRegister dst, Register src, XMMRegister scratch);