Account Options

  1. Sign in
The old Google Groups will be going away soon, but your browser is incompatible with the new version.
Google Groups Home
« Groups Home
Improve ClampDoubleToUint8 on ia32/x64 (issue 11190049)
There are currently too many topics in this group that display first. To make this topic appear first, remove this option from another topic.
There was an error processing your request. Please try again.
flag
  5 messages - Collapse all  -  Translate all to Translated (View all originals)
The group you are posting to is a Usenet group. Messages posted to this group will make your email address visible to anyone on the Internet.
Your reply message has not been sent.
Your post was successful
 
From:
To:
Cc:
Followup To:
Add Cc | Add Followup-to | Edit Subject
Subject:
Validation:
For verification purposes please type the characters you see in the picture below or the numbers you hear by clicking the accessibility icon. Listen and type the numbers you hear
 
shdw...@gmail.com  
View profile  
 More options Oct 19 2012, 1:21 am
From: shdw...@gmail.com
Date: Fri, 19 Oct 2012 05:20:59 +0000
Local: Fri, Oct 19 2012 1:20 am
Subject: Improve ClampDoubleToUint8 on ia32/x64 (issue 11190049)
Reviewers: Yang, Sven Panne,

Message:
PTAL

Zheng Liu
zheng.z....@intel.com

Description:
Improve ClampDoubleToUint8 on ia32/x64.
It's measured faster when:
a) clamp never happens;
b) clamp random happens ([-128,384], pseudo random).

Please review this at https://codereview.chromium.org/11190049/

SVN Base: http://v8.googlecode.com/svn/branches/bleeding_edge/

Affected files:
   M     src/ia32/lithium-ia32.cc
   M     src/ia32/macro-assembler-ia32.cc
   M     src/x64/lithium-codegen-x64.cc
   M     src/x64/lithium-x64.h
   M     src/x64/lithium-x64.cc
   M     src/x64/macro-assembler-x64.h
   M     src/x64/macro-assembler-x64.cc

Index: src/ia32/lithium-ia32.cc
===================================================================
--- src/ia32/lithium-ia32.cc    (revision 12749)
+++ src/ia32/lithium-ia32.cc    (working copy)
@@ -1779,7 +1779,7 @@
    Representation input_rep = value->representation();
    if (input_rep.IsDouble()) {
      LOperand* reg = UseRegister(value);
-    return DefineAsRegister(new(zone()) LClampDToUint8(reg));
+    return DefineFixed(new(zone()) LClampDToUint8(reg), eax);
    } else if (input_rep.IsInteger32()) {
      LOperand* reg = UseFixed(value, eax);
      return DefineFixed(new(zone()) LClampIToUint8(reg), eax);
Index: src/ia32/macro-assembler-ia32.cc
===================================================================
--- src/ia32/macro-assembler-ia32.cc    (revision 12749)
+++ src/ia32/macro-assembler-ia32.cc    (working copy)
@@ -129,14 +129,22 @@
                                          XMMRegister scratch_reg,
                                          Register result_reg) {
    Label done;
-  ExternalReference zero_ref = ExternalReference::address_of_zero();
-  movdbl(scratch_reg, Operand::StaticVariable(zero_ref));
+  Label conv_failure;
+  pxor(scratch_reg, scratch_reg);
+  cvtsd2si(result_reg, input_reg);
+  test(result_reg, Immediate(0xFFFFFF00));
+  j(zero, &done, Label::kNear);
+  cmp(result_reg, Immediate(0x80000000));
+  j(equal, &conv_failure, Label::kNear);
+  mov(result_reg, Immediate(0));
+  setcc(above, result_reg);
+  sub(result_reg, Immediate(1));
+  and_(result_reg, Immediate(255));
+  jmp(&done, Label::kNear);
+  bind(&conv_failure);
    Set(result_reg, Immediate(0));
    ucomisd(input_reg, scratch_reg);
    j(below, &done, Label::kNear);
-  cvtsd2si(result_reg, input_reg);
-  test(result_reg, Immediate(0xFFFFFF00));
-  j(zero, &done, Label::kNear);
    Set(result_reg, Immediate(255));
    bind(&done);
  }
Index: src/x64/lithium-codegen-x64.cc
===================================================================
--- src/x64/lithium-codegen-x64.cc      (revision 12749)
+++ src/x64/lithium-codegen-x64.cc      (working copy)
@@ -4490,8 +4490,7 @@
  void LCodeGen::DoClampDToUint8(LClampDToUint8* instr) {
    XMMRegister value_reg = ToDoubleRegister(instr->unclamped());
    Register result_reg = ToRegister(instr->result());
-  Register temp_reg = ToRegister(instr->temp());
-  __ ClampDoubleToUint8(value_reg, xmm0, result_reg, temp_reg);
+  __ ClampDoubleToUint8(value_reg, xmm0, result_reg);
  }

@@ -4505,8 +4504,7 @@
  void LCodeGen::DoClampTToUint8(LClampTToUint8* instr) {
    ASSERT(instr->unclamped()->Equals(instr->result()));
    Register input_reg = ToRegister(instr->unclamped());
-  Register temp_reg = ToRegister(instr->temp());
-  XMMRegister temp_xmm_reg = ToDoubleRegister(instr->temp2());
+  XMMRegister temp_xmm_reg = ToDoubleRegister(instr->temp_xmm());
    Label is_smi, done, heap_number;

    __ JumpIfSmi(input_reg, &is_smi);
@@ -4526,7 +4524,7 @@
    // Heap number
    __ bind(&heap_number);
    __ movsd(xmm0, FieldOperand(input_reg, HeapNumber::kValueOffset));
-  __ ClampDoubleToUint8(xmm0, temp_xmm_reg, input_reg, temp_reg);
+  __ ClampDoubleToUint8(xmm0, temp_xmm_reg, input_reg);
    __ jmp(&done, Label::kNear);

    // smi
Index: src/x64/lithium-x64.cc
===================================================================
--- src/x64/lithium-x64.cc      (revision 12749)
+++ src/x64/lithium-x64.cc      (working copy)
@@ -1697,8 +1697,7 @@
    Representation input_rep = value->representation();
    LOperand* reg = UseRegister(value);
    if (input_rep.IsDouble()) {
-    return DefineAsRegister(new(zone()) LClampDToUint8(reg,
-                                               TempRegister()));
+    return DefineAsRegister(new(zone()) LClampDToUint8(reg));
    } else if (input_rep.IsInteger32()) {
      return DefineSameAsFirst(new(zone()) LClampIToUint8(reg));
    } else {
@@ -1706,7 +1705,6 @@
      // Register allocator doesn't (yet) support allocation of double
      // temps. Reserve xmm1 explicitly.
      LClampTToUint8* result = new(zone()) LClampTToUint8(reg,
-                                                        TempRegister(),
                                                          FixedTemp(xmm1));
      return AssignEnvironment(DefineSameAsFirst(result));
    }
Index: src/x64/lithium-x64.h
===================================================================
--- src/x64/lithium-x64.h       (revision 12749)
+++ src/x64/lithium-x64.h       (working copy)
@@ -2138,15 +2138,13 @@
  };

-class LClampDToUint8: public LTemplateInstruction<1, 1, 1> {
+class LClampDToUint8: public LTemplateInstruction<1, 1, 0> {
   public:
-  LClampDToUint8(LOperand* unclamped, LOperand* temp) {
+  explicit LClampDToUint8(LOperand* unclamped) {
      inputs_[0] = unclamped;
-    temps_[0] = temp;
    }

    LOperand* unclamped() { return inputs_[0]; }
-  LOperand* temp() { return temps_[0]; }

    DECLARE_CONCRETE_INSTRUCTION(ClampDToUint8, "clamp-d-to-uint8")
  };
@@ -2164,19 +2162,16 @@
  };

-class LClampTToUint8: public LTemplateInstruction<1, 1, 2> {
+class LClampTToUint8: public LTemplateInstruction<1, 1, 1> {
   public:
    LClampTToUint8(LOperand* unclamped,
-                 LOperand* temp,
-                 LOperand* temp2) {
+                 LOperand* temp_xmm) {
      inputs_[0] = unclamped;
-    temps_[0] = temp;
-    temps_[1] = temp2;
+    temps_[0] = temp_xmm;
    }

    LOperand* unclamped() { return inputs_[0]; }
-  LOperand* temp() { return temps_[0]; }
-  LOperand* temp2() { return temps_[1]; }
+  LOperand* temp_xmm() { return temps_[0]; }

    DECLARE_CONCRETE_INSTRUCTION(ClampTToUint8, "clamp-t-to-uint8")
  };
Index: src/x64/macro-assembler-x64.cc
===================================================================
--- src/x64/macro-assembler-x64.cc      (revision 12749)
+++ src/x64/macro-assembler-x64.cc      (working copy)
@@ -2868,16 +2868,24 @@

  void MacroAssembler::ClampDoubleToUint8(XMMRegister input_reg,
                                          XMMRegister temp_xmm_reg,
-                                        Register result_reg,
-                                        Register temp_reg) {
+                                        Register result_reg) {
    Label done;
-  Set(result_reg, 0);
+  Label conv_failure;
    xorps(temp_xmm_reg, temp_xmm_reg);
-  ucomisd(input_reg, temp_xmm_reg);
-  j(below, &done, Label::kNear);
    cvtsd2si(result_reg, input_reg);
    testl(result_reg, Immediate(0xFFFFFF00));
    j(zero, &done, Label::kNear);
+  cmpl(result_reg, Immediate(0x80000000));
+  j(equal, &conv_failure, Label::kNear);
+  movl(result_reg, Immediate(0));
+  setcc(above, result_reg);
+  subl(result_reg, Immediate(1));
+  andl(result_reg, Immediate(255));
+  jmp(&done, Label::kNear);
+  bind(&conv_failure);
+  Set(result_reg, 0);
+  ucomisd(input_reg, temp_xmm_reg);
+  j(below, &done, Label::kNear);
    Set(result_reg, 255);
    bind(&done);
  }
Index: src/x64/macro-assembler-x64.h
===================================================================
--- src/x64/macro-assembler-x64.h       (revision 12749)
+++ src/x64/macro-assembler-x64.h       (working copy)
@@ -942,8 +942,7 @@

    void ClampDoubleToUint8(XMMRegister input_reg,
                            XMMRegister temp_xmm_reg,
-                          Register result_reg,
-                          Register temp_reg);
+                          Register result_reg);

    void LoadUint32(XMMRegister dst, Register src, XMMRegister scratch);


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
yang...@chromium.org  
View profile  
 More options Oct 19 2012, 5:29 am
From: yang...@chromium.org
Date: Fri, 19 Oct 2012 09:29:37 +0000
Local: Fri, Oct 19 2012 5:29 am
Subject: Re: Improve ClampDoubleToUint8 on ia32/x64 (issue 11190049)
On 2012/10/19 05:20:58, Zheng Liu wrote:

> PTAL
> Zheng Liu
> mailto:zheng.z....@intel.com

LGTM so far, but could you please provide a benchmark that shows the
improvement?

https://codereview.chromium.org/11190049/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
yang...@chromium.org  
View profile  
 More options Oct 19 2012, 5:29 am
From: yang...@chromium.org
Date: Fri, 19 Oct 2012 09:29:45 +0000
Local: Fri, Oct 19 2012 5:29 am
Subject: Re: Improve ClampDoubleToUint8 on ia32/x64 (issue 11190049)

https://codereview.chromium.org/11190049/diff/1/src/ia32/lithium-ia32.cc
File src/ia32/lithium-ia32.cc (right):

https://codereview.chromium.org/11190049/diff/1/src/ia32/lithium-ia32...
src/ia32/lithium-ia32.cc:1782: return DefineFixed(new(zone())
LClampDToUint8(reg), eax);
I assume that you have to use EAX because SETcc needs to access the
least significant byte, ruling out ESI and EDI?

https://codereview.chromium.org/11190049/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
shdw...@gmail.com  
View profile  
 More options Oct 19 2012, 6:25 am
From: shdw...@gmail.com
Date: Fri, 19 Oct 2012 10:25:48 +0000
Local: Fri, Oct 19 2012 6:25 am
Subject: Re: Improve ClampDoubleToUint8 on ia32/x64 (issue 11190049)
On 2012/10/19 09:29:45, Yang wrote:

https://codereview.chromium.org/11190049/diff/1/src/ia32/lithium-ia32...

> src/ia32/lithium-ia32.cc:1782: return DefineFixed(new(zone())
> LClampDToUint8(reg), eax);
> I assume that you have to use EAX because SETcc needs to access the least
> significant byte, ruling out ESI and EDI?

Yes.
And I sent a micro benchmark to you (with a 2MiB attachment).

http://codereview.chromium.org/11190049/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
yang...@chromium.org  
View profile  
 More options Oct 19 2012, 9:21 am
From: yang...@chromium.org
Date: Fri, 19 Oct 2012 13:21:12 +0000
Local: Fri, Oct 19 2012 9:21 am
Subject: Re: Improve ClampDoubleToUint8 on ia32/x64 (issue 11190049)
On 2012/10/19 10:25:48, Zheng Liu wrote:

> On 2012/10/19 09:29:45, Yang wrote:
> > https://codereview.chromium.org/11190049/diff/1/src/ia32/lithium-ia32.cc
> > File src/ia32/lithium-ia32.cc (right):

https://codereview.chromium.org/11190049/diff/1/src/ia32/lithium-ia32...

> > src/ia32/lithium-ia32.cc:1782: return DefineFixed(new(zone())
> > LClampDToUint8(reg), eax);
> > I assume that you have to use EAX because SETcc needs to access the  
> least
> > significant byte, ruling out ESI and EDI?
> Yes.
> And I sent a micro benchmark to you (with a 2MiB attachment).

Thanks for this patch! I landed this as r12777.

http://codereview.chromium.org/11190049/


 
You must Sign in before you can post messages.
To post a message you must first join this group.
Please update your nickname on the subscription settings page before posting.
You do not have the permission required to post.
End of messages
« Back to Discussions « Newer topic     Older topic »