Message from discussion
Constant strings - again
Newsgroups: perl.perl6.internals
Path: archiver1.google.com!news2.google.com!news.maxwell.syr.edu!newsfeed.stanford.edu!nntp.perl.org
Return-Path: <jcli...@mac.com>
Mailing-List: contact perl6-internals-h...@perl.org; run by ezmlm
Delivered-To: mailing list perl6-intern...@perl.org
Received: (qmail 60808 invoked by uid 76); 19 Apr 2004 08:22:21 -0000
Received: from x1.develooper.com (HELO x1.develooper.com) (63.251.223.170)
by onion.perl.org (qpsmtpd/0.27.1) with SMTP; Mon, 19 Apr 2004 01:22:21 -0700
Received: (qmail 9999 invoked by uid 225); 19 Apr 2004 08:22:15 -0000
Delivered-To: perl6-intern...@perl.org
Received: (qmail 9989 invoked by alias); 19 Apr 2004 08:22:14 -0000
X-Spam-Status: No, hits=0.0 required=7.0
tests=
X-Spam-Check-By: la.mx.develooper.com
Received: from A17-250-248-89.apple.com (HELO smtpout.mac.com) (17.250.248.89)
by la.mx.develooper.com (qpsmtpd/0.27.1) with ESMTP; Mon, 19 Apr 2004 01:22:13 -0700
Received: from mac.com (smtpin08-en2 [10.13.10.153])
by smtpout.mac.com (Xserve/MantshX 2.0) with ESMTP id i3J8M32L023894;
Mon, 19 Apr 2004 01:22:03 -0700 (PDT)
Received: from [192.168.0.60] (adsl-209-204-178-23.sonic.net [209.204.178.23])
(authenticated bits=0)
by mac.com (Xserve/smtpin08/MantshX 3.0) with ESMTP id i3J8M1TK006189;
Mon, 19 Apr 2004 01:22:02 -0700 (PDT)
In-Reply-To: <200404181506.i3IF6oo30330@thu8.leo.home>
References: <407FB1AF.2060003@toetsch.at> <200404181506.i3IF6oo30330@thu8.leo.home>
Mime-Version: 1.0 (Apple Message framework v609)
Content-Type: multipart/mixed; boundary=Apple-Mail-4-976505177
Message-ID: <A14A6816-91DA-11D8-8CC6-000393A6B9DA@mac.com>
Cc: perl6-intern...@perl.org
Subject: Re: Constant strings - again
Date: Mon, 19 Apr 2004 01:22:00 -0700
To: l...@toetsch.at
X-Mailer: Apple Mail (2.609)
X-Virus-Checked: Checked
Approved: n...@nntp.perl.org
From: jcli...@mac.com (Jeff Clites)
--Apple-Mail-4-976505177
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
charset=US-ASCII;
format=flowed
On Apr 18, 2004, at 8:06 AM, Leopold Toetsch wrote:
> Leopold Toetsch <l...@toetsch.at> wrote:
>
> [ initial proposal ]
>
> I've now checked in a working version.
> * c2str.pl generates a .str header from a .c file
> * c2str.pl --all generates $(INC)/string_private_cstring.h
> * this us used in string_init() to finally generate entries
> in the interpreter's constant string table
>
> * to add new files makefiles/root.in or such has to be edited
> see objects.str as a template
>
> Using multiple files is only slightly tested though.
To handle multiple files, we'll probably need to generate a .c to hold
the C strings (instead of the .h), and have an extern declaration in
the .h (since it will be included in multiple files). That's assuming
they'll all be aggregated into a single file (which makes sense).
Here is a related patch, to cause us to cache the hash values of all
strings (on demand). The important part is that the cached value is
cleared out in unmake_COW, which is called any time we might mutate the
string (and thus, invalidate the cached value). This will have the
side-effect of allowing c2str.pl to be slightly simpler, since it won't
need to pre-calculate the hash value (since const strings are the same
as any others, and their hash value will be calculated and cached if it
is ever needed).
This change speeds up the attached benchmark by a factor of 1.86 in the
optimize case (via --optimize, so -Os), or 3.73 in the unoptimized case
(on Mac OS X):
# without the patch, optimized build
% ./parrot hash-timing.pasm
679 hash entries
128 characters per test key
1000000 lookups each
rep 1: 1.093889 sec
rep 2: 1.109484 sec
rep 4: 1.095041 sec
# with the patch, optimized build
% ./parrot hash-timing.pasm
679 hash entries
128 characters per test key
1000000 lookups each
rep 1: 0.608547 sec
rep 2: 0.586352 sec
rep 4: 0.575159 sec
JEff
--Apple-Mail-4-976505177
Content-Transfer-Encoding: 7bit
Content-Type: application/octet-stream;
x-unix-mode=0644;
name="hash-caching.patch"
Content-Disposition: attachment;
filename=hash-caching.patch
Index: include/parrot/string_funcs.h
===================================================================
RCS file: /cvs/public/parrot/include/parrot/string_funcs.h,v
retrieving revision 1.38
diff -u -b -r1.38 string_funcs.h
--- include/parrot/string_funcs.h 18 Apr 2004 15:10:12 -0000 1.38
+++ include/parrot/string_funcs.h 19 Apr 2004 07:46:30 -0000
@@ -84,7 +84,7 @@
/*void string_iterator_init(struct string_iterator_t *i, const STRING *s);*/
UINTVAL string_decode_and_advance(struct string_iterator_t *i);
-size_t string_hash(Interp *interpreter, Hash *hash, STRING *s);
+size_t string_hash(Interp *interpreter, STRING *s);
STRING * string_unescape_cstring(struct Parrot_Interp *, char *cstring,
char delimiter);
Index: src/hash.c
===================================================================
RCS file: /cvs/public/parrot/src/hash.c,v
retrieving revision 1.79
diff -u -b -r1.79 hash.c
--- src/hash.c 17 Apr 2004 06:22:55 -0000 1.79
+++ src/hash.c 19 Apr 2004 07:46:35 -0000
@@ -147,18 +147,13 @@
*/
/* see also string.c */
-#define USE_HASH_VAL 1
static size_t
key_hash_STRING(Interp *interpreter, Hash *hash, void *value)
{
STRING *s = value;
-#if USE_HASH_VAL
- if (PObj_constant_TEST(s) && s->hashval) {
- return s->hashval ^ hash->seed;
- }
-#endif
- return string_hash(interpreter, hash, s);
+
+ return string_hash(interpreter, s) ^ hash->seed;
}
/*
Index: src/string.c
===================================================================
RCS file: /cvs/public/parrot/src/string.c,v
retrieving revision 1.194
diff -u -b -r1.194 string.c
--- src/string.c 18 Apr 2004 15:10:56 -0000 1.194
+++ src/string.c 19 Apr 2004 07:46:41 -0000
@@ -27,8 +27,6 @@
#include "parrot/parrot.h"
#include <assert.h>
-#define USE_HASH_VAL 1
-
/*
* this extra size is in the hope, that some concat ops might
* follow in a sequence.
@@ -106,6 +104,8 @@
/* COW_FLAG | external_FLAG | bufstart_external_FLAG immobile_FLAG */
PObj_is_external_CLEARALL(s);
}
+
+ s->hashval = 0;
}
/*
@@ -753,13 +753,7 @@
PObj_bufstart(s) = s->strstart = const_cast(buffer);
PObj_buflen(s) = s->strlen = s->bufused = len;
PObj_bufstart_external_SET(s);
-#if USE_HASH_VAL
- if (flags & PObj_constant_FLAG) {
- Hash hash;
- hash.seed = 0;
- s->hashval = string_hash(interpreter, &hash, s);
- }
-#endif
+
return s;
}
}
@@ -784,13 +778,6 @@
else {
s->strlen = s->bufused = 0;
}
-#if USE_HASH_VAL
- if (flags & PObj_constant_FLAG) {
- Hash hash;
- hash.seed = 0;
- s->hashval = string_hash(interpreter, &hash, s);
- }
-#endif
}
else {
string_fill_from_buffer(interpreter, buffer, len, encoding_name, s);
@@ -2838,29 +2825,27 @@
=item C<size_t
string_hash(struct Parrot_Interp * interpreter, Hash *hash, STRING *s)>
-Returns a hash value for the specified Parrot string.
-
-TODO - Cache this value in C<s->hashval>? Make the seed global, but
-random at start time?
+Returns the hash value for the specified Parrot string, caching it in
+C<s->hashval>.
=cut
*/
size_t
-string_hash(struct Parrot_Interp * interpreter, Hash *hash, STRING *s)
+string_hash(struct Parrot_Interp * interpreter, STRING *s)
{
-#if USE_HASH_VAL
register size_t h = 0;
-#else
- register size_t h = hash->seed;
-#endif
UNUSED(interpreter);
if (!s)
return 0;
+ if (s->hashval) {
+ return s->hashval;
+ }
+
switch (s->representation) {
case enum_stringrep_one:
HASH_STRING(Parrot_UInt1, s, h);
@@ -2876,11 +2861,9 @@
break;
}
-#if USE_HASH_VAL
- return h ^ hash->seed;
-#else
+ s->hashval = h;
+
return h;
-#endif
}
@@ -3035,13 +3018,7 @@
}
result->strlen = d;
result->bufused = string_max_bytes(interpreter, result, d);
-#if USE_HASH_VAL
- {
- Hash hash;
- hash.seed = 0;
- result->hashval = string_hash(interpreter, &hash, result);
- }
-#endif
+
return result;
}
Index: t/pmc/perlhash.t
===================================================================
RCS file: /cvs/public/parrot/t/pmc/perlhash.t,v
retrieving revision 1.43
diff -u -b -r1.43 perlhash.t
--- t/pmc/perlhash.t 9 Apr 2004 20:32:54 -0000 1.43
+++ t/pmc/perlhash.t 19 Apr 2004 07:46:43 -0000
@@ -19,7 +19,7 @@
=cut
-use Parrot::Test tests => 33;
+use Parrot::Test tests => 34;
use Test::More;
output_is(<<CODE, <<OUTPUT, "Initial PerlHash tests");
@@ -1142,6 +1142,34 @@
-134.134000
135.135000
-135.135000
+OUTPUT
+
+output_is(<< 'CODE', << 'OUTPUT', "mutating the lookup string");
+ new P0, .PerlHash
+ set P0["a"], "one"
+ set P0["ab"], "two"
+ set P0["abc"], "three"
+
+ set S0, "a"
+ set S1, P0[S0]
+ print S1
+ print "\n"
+
+ concat S0, "b"
+ set S1, P0[S0]
+ print S1
+ print "\n"
+
+ concat S0, "c"
+ set S1, P0[S0]
+ print S1
+ print "\n"
+
+ end
+CODE
+one
+two
+three
OUTPUT
1;
--Apple-Mail-4-976505177
Content-Transfer-Encoding: 7bit
Content-Type: application/text;
x-mac-type=54455854;
x-unix-mode=0644;
name="hash-timing.pasm"
Content-Disposition: attachment;
filename=hash-timing.pasm
new P1, .PerlHash
# add 26*26 = 676 entries
set I0, 65
lp0:
set I1, 65
lp1:
chr S0, I0
chr S1, I1
concat S0, S1
set P1[S0], I0
inc I1
le I1, 90, lp1
inc I0
le I0, 90, lp0
set S6, "AA"
concat S6, S6
concat S6, S6
concat S6, S6
concat S6, S6
concat S6, S6
concat S6, S6
set S7, "\u1234\u1234"
concat S7, S7
concat S7, S7
concat S7, S7
concat S7, S7
concat S7, S7
concat S7, S7
chr S8, 0x1043F
concat S8, S8
concat S8, S8
concat S8, S8
concat S8, S8
concat S8, S8
concat S8, S8
concat S8, S8
set P1[S6], 601
set P1[S7], 602
set P1[S8], 603
set I8, P1[S6]
ne I8, 601, error
set I8, P1[S7]
ne I8, 602, error
set I8, P1[S8]
ne I8, 603, error
set I0, P1
print I0
print " hash entries\n"
length I0, S6
print I0
print " characters per test key\n"
# time 1e6 lookups
set I0, 1000000
print I0
print " lookups each\n"
time N1
lp2:
set I1, P1[S6]
dec I0
if I0, lp2
time N0
sub N0, N1
print "rep 1: "
print N0
print " sec\n"
# time 1e6 lookups
set I0, 1000000
time N1
lp3:
set I1, P1[S7]
dec I0
if I0, lp3
time N0
sub N0, N1
print "rep 2: "
print N0
print " sec\n"
# time 1e6 lookups
set I0, 1000000
time N1
lp4:
set I1, P1[S8]
dec I0
if I0, lp4
time N0
sub N0, N1
print "rep 4: "
print N0
print " sec\n"
end
error:
print "error\n"
end
--Apple-Mail-4-976505177
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
charset=US-ASCII;
format=flowed
--Apple-Mail-4-976505177--