Comment #8 on issue 6633 by
jgr...@chromium.org: RegExp is much faster than indexOf
https://bugs.chromium.org/p/v8/issues/detail?id=6633#c8Irregexp code is fairly large for simple regexps:
d8> /(ab)/.exec(".")
Instruction size in bytes: 725
Atom regexps only store the pattern string (possibly twice), so we're looking at an overhead of several 100x.
But: Code is only generated once for each regexp literal in the source code, so it may just be an option to take this hit. Thoughts?
The Octane/RegExp score is unaffected by restricting Atom regexps to single-char patterns.
For reference, the disassembly of the above:
d8> /(ab)/.exec(".")
kind = REGEXP
name = IRREGEXP
compiler = unknown
Instructions (size = 725)
0x156930384060 0 e9d6000000 jmp 0x15693038413b <+0xdb>
0x156930384065 5 4883e904 REX.W subq rcx,0x4
0x156930384069 9 c70133010000 movl [rcx],0x133
0x15693038406f f 48a190889bd84d560000 REX.W movq rax,(0x564dd89b8890) ;; external reference (RegExpStack::limit_address())
0x156930384079 19 483bc8 REX.W cmpq rcx,rax
0x15693038407c 1c 0f8705000000 ja 0x156930384087 <+0x27>
0x156930384082 22 e848020000 call 0x1569303842cf <+0x26f>
0x156930384087 27 83ffff cmpl rdi,0xff
0x15693038408a 2a 0f8d27000000 jge 0x1569303840b7 <+0x57>
0x156930384090 30 0fb6543e01 movzxbl rdx,[rsi+rdi*1+0x1]
0x156930384095 35 48b8c9c792a3653c0000 REX.W movq rax,0x3c65a392c7c9 ;; object: 0x3c65a392c7c9 <ByteArray[128]>
0x15693038409f 3f 488bda REX.W movq rbx,rdx
0x1569303840a2 42 4883e37f REX.W andq rbx,0x7f
0x1569303840a6 46 807c180f00 cmpb [rax+rbx*1+0xf],0x0
0x1569303840ab 4b 0f8506000000 jnz 0x1569303840b7 <+0x57>
0x1569303840b1 51 4883c702 REX.W addq rdi,0x2
0x1569303840b5 55 ebd0 jmp 0x156930384087 <+0x27>
0x1569303840b7 57 83ffff cmpl rdi,0xff
0x1569303840ba 5a 0f8d9a010000 jge 0x15693038425a <+0x1fa>
0x1569303840c0 60 0fb7143e movzxwl rdx,[rsi+rdi*1]
0x1569303840c4 64 81fa61620000 cmpl rdx,0x6261
0x1569303840ca 6a 0f8406000000 jz 0x1569303840d6 <+0x76>
0x1569303840d0 70 4883c701 REX.W addq rdi,0x1
0x1569303840d4 74 ebb1 jmp 0x156930384087 <+0x27>
0x1569303840d6 76 4883e904 REX.W subq rcx,0x4
0x1569303840da 7a 8939 movl [rcx],rdi
0x1569303840dc 7c 48897db0 REX.W movq [rbp-0x50],rdi
0x1569303840e0 80 488d4702 REX.W leaq rax,[rdi+0x2]
0x1569303840e4 84 488945a8 REX.W movq [rbp-0x58],rax
0x1569303840e8 88 48897da0 REX.W movq [rbp-0x60],rdi
0x1569303840ec 8c 488d4702 REX.W leaq rax,[rdi+0x2]
0x1569303840f0 90 48894598 REX.W movq [rbp-0x68],rax
0x1569303840f4 94 4883c702 REX.W addq rdi,0x2
0x1569303840f8 98 4883e904 REX.W subq rcx,0x4
0x1569303840fc 9c c7011e010000 movl [rcx],0x11e
0x156930384102 a2 48a190889bd84d560000 REX.W movq rax,(0x564dd89b8890) ;; external reference (RegExpStack::limit_address())
0x15693038410c ac 483bc8 REX.W cmpq rcx,rax
0x15693038410f af 0f8705000000 ja 0x15693038411a <+0xba>
0x156930384115 b5 e8b5010000 call 0x1569303842cf <+0x26f>
0x15693038411a ba e9f1000000 jmp 0x156930384210 <+0x1b0>
0x15693038411f bf 488b45b8 REX.W movq rax,[rbp-0x48]
0x156930384123 c3 488945a0 REX.W movq [rbp-0x60],rax
0x156930384127 c7 48894598 REX.W movq [rbp-0x68],rax
0x15693038412b cb 486339 REX.W movsxlq rdi,[rcx]
0x15693038412e ce 4883c104 REX.W addq rcx,0x4
0x156930384132 d2 eb9c jmp 0x1569303840d0 <+0x70>
0x156930384134 d4 33c0 xorl rax,rax
0x156930384136 d6 e916010000 jmp 0x156930384251 <+0x1f1>
0x15693038413b db 55 push rbp
0x15693038413c dc 4889e5 REX.W movq rbp,rsp
0x15693038413f df 57 push rdi
0x156930384140 e0 56 push rsi
0x156930384141 e1 52 push rdx
0x156930384142 e2 51 push rcx
0x156930384143 e3 4150 push r8
0x156930384145 e5 4151 push r9
0x156930384147 e7 53 push rbx
0x156930384148 e8 6a00 push 0x0
0x15693038414a ea 6a00 push 0x0
0x15693038414c ec 4889e1 REX.W movq rcx,rsp
0x15693038414f ef 49baa8f197d84d560000 REX.W movq r10,0x564dd897f1a8 ;; external reference (StackGuard::address_of_jslimit())
0x156930384159 f9 492b0a REX.W subq rcx,[r10]
0x15693038415c fc 0f8616000000 jna 0x156930384178 <+0x118>
0x156930384162 102 4883f920 REX.W cmpq rcx,0x20
0x156930384166 106 0f8350000000 jnc 0x1569303841bc <+0x15c>
0x15693038416c 10c 48c7c0ffffffff REX.W movq rax,0xffffffff
0x156930384173 113 e9d9000000 jmp 0x156930384251 <+0x1f1>
0x156930384178 118 49b80140383069150000 REX.W movq r8,0x156930384001 ;; object: 0x156930384001 <Code REGEXP>
0x156930384182 122 4989e2 REX.W movq r10,rsp
0x156930384185 125 4883ec08 REX.W subq rsp,0x8
0x156930384189 129 4883e4f0 REX.W andq rsp,0xf0
0x15693038418d 12d 4c891424 REX.W movq [rsp],r10
0x156930384191 131 488bd5 REX.W movq rdx,rbp
0x156930384194 134 498bf0 REX.W movq rsi,r8
0x156930384197 137 488d7c24f8 REX.W leaq rdi,[rsp-0x8]
0x15693038419c 13c 48b8006f7c24207f0000 REX.W movq rax,0x7f20247c6f00 ;; external reference (RegExpMacroAssembler*::CheckStackGuardState())
0x1569303841a6 146 40f6c40f testb rsp,0xf
0x1569303841aa 14a 7401 jz 0x1569303841ad <+0x14d>
0x1569303841ac 14c cc int3l
0x1569303841ad 14d ffd0 call rax
0x1569303841af 14f 488b2424 REX.W movq rsp,[rsp]
0x1569303841b3 153 4885c0 REX.W testq rax,rax
0x1569303841b6 156 0f8595000000 jnz 0x156930384251 <+0x1f1>
0x1569303841bc 15c 4883ec20 REX.W subq rsp,0x20
0x1569303841c0 160 488b75e0 REX.W movq rsi,[rbp-0x20]
0x1569303841c4 164 488b7de8 REX.W movq rdi,[rbp-0x18]
0x1569303841c8 168 482bfe REX.W subq rdi,rsi
0x1569303841cb 16b 488b5df0 REX.W movq rbx,[rbp-0x10]
0x1569303841cf 16f 48f7db REX.W negq rbx
0x1569303841d2 172 488d441fff REX.W leaq rax,[rdi+rbx*1-0x1]
0x1569303841d7 177 488945b8 REX.W movq [rbp-0x48],rax
0x1569303841db 17b 49b80140383069150000 REX.W movq r8,0x156930384001 ;; object: 0x156930384001 <Code REGEXP>
0x1569303841e5 185 837df000 cmpl [rbp-0x10],0x0
0x1569303841e9 189 7507 jnz 0x1569303841f2 <+0x192>
0x1569303841eb 18b ba0a000000 movl rdx,0xa
0x1569303841f0 190 eb05 jmp 0x1569303841f7 <+0x197>
0x1569303841f2 192 0fb6543eff movzxbl rdx,[rsi+rdi*1-0x1]
0x1569303841f7 197 488945b0 REX.W movq [rbp-0x50],rax
0x1569303841fb 19b 488945a8 REX.W movq [rbp-0x58],rax
0x1569303841ff 19f 488945a0 REX.W movq [rbp-0x60],rax
0x156930384203 1a3 48894598 REX.W movq [rbp-0x68],rax
0x156930384207 1a7 488b4d10 REX.W movq rcx,[rbp+0x10]
0x15693038420b 1ab e955feffff jmp 0x156930384065 <+0x5>
0x156930384210 1b0 488b55f0 REX.W movq rdx,[rbp-0x10]
0x156930384214 1b4 488b5dd8 REX.W movq rbx,[rbp-0x28]
0x156930384218 1b8 488b4de0 REX.W movq rcx,[rbp-0x20]
0x15693038421c 1bc 482b4de8 REX.W subq rcx,[rbp-0x18]
0x156930384220 1c0 4803ca REX.W addq rcx,rdx
0x156930384223 1c3 488b45b0 REX.W movq rax,[rbp-0x50]
0x156930384227 1c7 4803c1 REX.W addq rax,rcx
0x15693038422a 1ca 8903 movl [rbx],rax
0x15693038422c 1cc 488b45a8 REX.W movq rax,[rbp-0x58]
0x156930384230 1d0 4803c1 REX.W addq rax,rcx
0x156930384233 1d3 894304 movl [rbx+0x4],rax
0x156930384236 1d6 488b45a0 REX.W movq rax,[rbp-0x60]
0x15693038423a 1da 4803c1 REX.W addq rax,rcx
0x15693038423d 1dd 894308 movl [rbx+0x8],rax
0x156930384240 1e0 488b4598 REX.W movq rax,[rbp-0x68]
0x156930384244 1e4 4803c1 REX.W addq rax,rcx
0x156930384247 1e7 89430c movl [rbx+0xc],rax
0x15693038424a 1ea 48c7c001000000 REX.W movq rax,0x1
0x156930384251 1f1 488b5dc8 REX.W movq rbx,[rbp-0x38]
0x156930384255 1f5 488be5 REX.W movq rsp,rbp
0x156930384258 1f8 5d pop rbp
0x156930384259 1f9 c3 retl
0x15693038425a 1fa 48a1a8f197d84d560000 REX.W movq rax,(0x564dd897f1a8) ;; external reference (StackGuard::address_of_jslimit())
0x156930384264 204 483be0 REX.W cmpq rsp,rax
0x156930384267 207 0f8705000000 ja 0x156930384272 <+0x212>
0x15693038426d 20d e80c000000 call 0x15693038427e <+0x21e>
0x156930384272 212 486319 REX.W movsxlq rbx,[rcx]
0x156930384275 215 4883c104 REX.W addq rcx,0x4
0x156930384279 219 4903d8 REX.W addq rbx,r8
0x15693038427c 21c ffe3 jmp rbx
0x15693038427e 21e 4c290424 REX.W subq [rsp],r8
0x156930384282 222 51 push rcx
0x156930384283 223 57 push rdi
0x156930384284 224 4989e2 REX.W movq r10,rsp
0x156930384287 227 4883ec08 REX.W subq rsp,0x8
0x15693038428b 22b 4883e4f0 REX.W andq rsp,0xf0
0x15693038428f 22f 4c891424 REX.W movq [rsp],r10
0x156930384293 233 488bd5 REX.W movq rdx,rbp
0x156930384296 236 498bf0 REX.W movq rsi,r8
0x156930384299 239 488d7c24f8 REX.W leaq rdi,[rsp-0x8]
0x15693038429e 23e 48b8006f7c24207f0000 REX.W movq rax,0x7f20247c6f00 ;; external reference (RegExpMacroAssembler*::CheckStackGuardState())
0x1569303842a8 248 40f6c40f testb rsp,0xf
0x1569303842ac 24c 7401 jz 0x1569303842af <+0x24f>
0x1569303842ae 24e cc int3l
0x1569303842af 24f ffd0 call rax
0x1569303842b1 251 488b2424 REX.W movq rsp,[rsp]
0x1569303842b5 255 4885c0 REX.W testq rax,rax
0x1569303842b8 258 7597 jnz 0x156930384251 <+0x1f1>
0x1569303842ba 25a 49b80140383069150000 REX.W movq r8,0x156930384001 ;; object: 0x156930384001 <Code REGEXP>
0x1569303842c4 264 5f pop rdi
0x1569303842c5 265 59 pop rcx
0x1569303842c6 266 488b75e0 REX.W movq rsi,[rbp-0x20]
0x1569303842ca 26a 4c010424 REX.W addq [rsp],r8
0x1569303842ce 26e c3 retl
0x1569303842cf 26f 4c290424 REX.W subq [rsp],r8
0x1569303842d3 273 56 push rsi
0x1569303842d4 274 57 push rdi
0x1569303842d5 275 4989e2 REX.W movq r10,rsp
0x1569303842d8 278 4883ec08 REX.W subq rsp,0x8
0x1569303842dc 27c 4883e4f0 REX.W andq rsp,0xf0
0x1569303842e0 280 4c891424 REX.W movq [rsp],r10
0x1569303842e4 284 488bf9 REX.W movq rdi,rcx
0x1569303842e7 287 488d7510 REX.W leaq rsi,[rbp+0x10]
0x1569303842eb 28b 48bac0d897d84d560000 REX.W movq rdx,0x564dd897d8c0 ;; external reference (isolate)
0x1569303842f5 295 48b820aa4f24207f0000 REX.W movq rax,0x7f20244faa20 ;; external reference (NativeRegExpMacroAssembler::GrowStack())
0x1569303842ff 29f 40f6c40f testb rsp,0xf
0x156930384303 2a3 7401 jz 0x156930384306 <+0x2a6>
0x156930384305 2a5 cc int3l
0x156930384306 2a6 ffd0 call rax
0x156930384308 2a8 488b2424 REX.W movq rsp,[rsp]
0x15693038430c 2ac 4885c0 REX.W testq rax,rax
0x15693038430f 2af 0f8414000000 jz 0x156930384329 <+0x2c9>
0x156930384315 2b5 488bc8 REX.W movq rcx,rax
0x156930384318 2b8 49b80140383069150000 REX.W movq r8,0x156930384001 ;; object: 0x156930384001 <Code REGEXP>
0x156930384322 2c2 5f pop rdi
0x156930384323 2c3 5e pop rsi
0x156930384324 2c4 4c010424 REX.W addq [rsp],r8
0x156930384328 2c8 c3 retl
0x156930384329 2c9 48c7c0ffffffff REX.W movq rax,0xffffffff
0x156930384330 2d0 e91cffffff jmp 0x156930384251 <+0x1f1>