Account Options

  1. Sign in
The old Google Groups will be going away soon, but your browser is incompatible with the new version.
Google Groups Home
« Groups Home
Message from discussion Constant strings - again

Newsgroups: perl.perl6.internals
Path: archiver1.google.com!news2.google.com!news.maxwell.syr.edu!newsfeed.stanford.edu!nntp.perl.org
Return-Path: <jcli...@mac.com>
Mailing-List: contact perl6-internals-h...@perl.org; run by ezmlm
Delivered-To: mailing list perl6-intern...@perl.org
Received: (qmail 60808 invoked by uid 76); 19 Apr 2004 08:22:21 -0000
Received: from x1.develooper.com (HELO x1.develooper.com) (63.251.223.170)
  by onion.perl.org (qpsmtpd/0.27.1) with SMTP; Mon, 19 Apr 2004 01:22:21 -0700
Received: (qmail 9999 invoked by uid 225); 19 Apr 2004 08:22:15 -0000
Delivered-To: perl6-intern...@perl.org
Received: (qmail 9989 invoked by alias); 19 Apr 2004 08:22:14 -0000
X-Spam-Status: No, hits=0.0 required=7.0
	tests=
X-Spam-Check-By: la.mx.develooper.com
Received: from A17-250-248-89.apple.com (HELO smtpout.mac.com) (17.250.248.89)
  by la.mx.develooper.com (qpsmtpd/0.27.1) with ESMTP; Mon, 19 Apr 2004 01:22:13 -0700
Received: from mac.com (smtpin08-en2 [10.13.10.153])
	by smtpout.mac.com (Xserve/MantshX 2.0) with ESMTP id i3J8M32L023894;
	Mon, 19 Apr 2004 01:22:03 -0700 (PDT)
Received: from [192.168.0.60] (adsl-209-204-178-23.sonic.net [209.204.178.23])
	(authenticated bits=0)
	by mac.com (Xserve/smtpin08/MantshX 3.0) with ESMTP id i3J8M1TK006189;
	Mon, 19 Apr 2004 01:22:02 -0700 (PDT)
In-Reply-To: <200404181506.i3IF6oo30330@thu8.leo.home>
References: <407FB1AF.2060003@toetsch.at> <200404181506.i3IF6oo30330@thu8.leo.home>
Mime-Version: 1.0 (Apple Message framework v609)
Content-Type: multipart/mixed; boundary=Apple-Mail-4-976505177
Message-ID: <A14A6816-91DA-11D8-8CC6-000393A6B9DA@mac.com>
Cc: perl6-intern...@perl.org
Subject: Re: Constant strings - again
Date: Mon, 19 Apr 2004 01:22:00 -0700
To: l...@toetsch.at
X-Mailer: Apple Mail (2.609)
X-Virus-Checked: Checked
Approved: n...@nntp.perl.org
From: jcli...@mac.com (Jeff Clites)

--Apple-Mail-4-976505177
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
	charset=US-ASCII;
	format=flowed

On Apr 18, 2004, at 8:06 AM, Leopold Toetsch wrote:

> Leopold Toetsch <l...@toetsch.at> wrote:
>
> [ initial proposal ]
>
> I've now checked in a working version.
> * c2str.pl generates a  .str header from a .c file
> * c2str.pl --all generates $(INC)/string_private_cstring.h
> * this us used in string_init() to finally generate entries
>   in the interpreter's constant string table
>
> * to add new files makefiles/root.in or such has to be edited
>   see objects.str as a template
>
> Using multiple files is only slightly tested though.

To handle multiple files, we'll probably need to generate a .c to hold 
the C strings (instead of the .h), and have an extern declaration in 
the .h (since it will be included in multiple files). That's assuming 
they'll all be aggregated into a single file (which makes sense).

Here is a related patch, to cause us to cache the hash values of all 
strings (on demand). The important part is that the cached value is 
cleared out in unmake_COW, which is called any time we might mutate the 
string (and thus, invalidate the cached value). This will have the 
side-effect of allowing c2str.pl to be slightly simpler, since it won't 
need to pre-calculate the hash value (since const strings are the same 
as any others, and their hash value will be calculated and cached if it 
is ever needed).

This change speeds up the attached benchmark by a factor of 1.86 in the 
optimize case (via --optimize, so -Os), or 3.73 in the unoptimized case 
(on Mac OS X):

# without the patch, optimized build
% ./parrot hash-timing.pasm
679 hash entries
128 characters per test key
1000000 lookups each
rep 1:  1.093889 sec
rep 2:  1.109484 sec
rep 4:  1.095041 sec

# with the patch, optimized build
% ./parrot hash-timing.pasm
679 hash entries
128 characters per test key
1000000 lookups each
rep 1:  0.608547 sec
rep 2:  0.586352 sec
rep 4:  0.575159 sec

JEff


--Apple-Mail-4-976505177
Content-Transfer-Encoding: 7bit
Content-Type: application/octet-stream;
	x-unix-mode=0644;
	name="hash-caching.patch"
Content-Disposition: attachment;
	filename=hash-caching.patch

Index: include/parrot/string_funcs.h
===================================================================
RCS file: /cvs/public/parrot/include/parrot/string_funcs.h,v
retrieving revision 1.38
diff -u -b -r1.38 string_funcs.h
--- include/parrot/string_funcs.h	18 Apr 2004 15:10:12 -0000	1.38
+++ include/parrot/string_funcs.h	19 Apr 2004 07:46:30 -0000
@@ -84,7 +84,7 @@
 /*void string_iterator_init(struct string_iterator_t *i, const STRING *s);*/
 UINTVAL string_decode_and_advance(struct string_iterator_t *i);
 
-size_t string_hash(Interp *interpreter, Hash *hash, STRING *s);
+size_t string_hash(Interp *interpreter, STRING *s);
 STRING * string_unescape_cstring(struct Parrot_Interp *, char *cstring,
 									char delimiter);
 
Index: src/hash.c
===================================================================
RCS file: /cvs/public/parrot/src/hash.c,v
retrieving revision 1.79
diff -u -b -r1.79 hash.c
--- src/hash.c	17 Apr 2004 06:22:55 -0000	1.79
+++ src/hash.c	19 Apr 2004 07:46:35 -0000
@@ -147,18 +147,13 @@
 */
 
 /* see also string.c */
-#define  USE_HASH_VAL 1
 
 static size_t
 key_hash_STRING(Interp *interpreter, Hash *hash, void *value)
 {
     STRING *s = value;
-#if USE_HASH_VAL
-    if (PObj_constant_TEST(s) && s->hashval) {
-        return s->hashval ^ hash->seed;
-    }
-#endif
-    return string_hash(interpreter, hash, s);
+
+    return string_hash(interpreter, s) ^ hash->seed;
 }
 
 /*
Index: src/string.c
===================================================================
RCS file: /cvs/public/parrot/src/string.c,v
retrieving revision 1.194
diff -u -b -r1.194 string.c
--- src/string.c	18 Apr 2004 15:10:56 -0000	1.194
+++ src/string.c	19 Apr 2004 07:46:41 -0000
@@ -27,8 +27,6 @@
 #include "parrot/parrot.h"
 #include <assert.h>
 
-#define USE_HASH_VAL 1
-
 /*
  * this extra size is in the hope, that some concat ops might
  * follow in a sequence.
@@ -106,6 +104,8 @@
         /* COW_FLAG | external_FLAG | bufstart_external_FLAG immobile_FLAG */
         PObj_is_external_CLEARALL(s);
     }
+    
+	s->hashval = 0;
 }
 
 /*
@@ -753,13 +753,7 @@
                 PObj_bufstart(s) = s->strstart = const_cast(buffer);
                 PObj_buflen(s)   = s->strlen = s->bufused = len;
                 PObj_bufstart_external_SET(s);
-#if USE_HASH_VAL
-                if (flags & PObj_constant_FLAG) {
-                    Hash hash;
-                    hash.seed = 0;
-                    s->hashval = string_hash(interpreter, &hash, s);
-                }
-#endif
+
                 return s;
             }
         }
@@ -784,13 +778,6 @@
             else {
                 s->strlen = s->bufused = 0;
             }
-#if USE_HASH_VAL
-                if (flags & PObj_constant_FLAG) {
-                    Hash hash;
-                    hash.seed = 0;
-                    s->hashval = string_hash(interpreter, &hash, s);
-                }
-#endif
         }
         else {
             string_fill_from_buffer(interpreter, buffer, len, encoding_name, s);
@@ -2838,29 +2825,27 @@
 =item C<size_t
 string_hash(struct Parrot_Interp * interpreter, Hash *hash, STRING *s)>
 
-Returns a hash value for the specified Parrot string.
-
-TODO - Cache this value in C<s->hashval>? Make the seed global, but
-random at start time?
+Returns the hash value for the specified Parrot string, caching it in
+C<s->hashval>.
 
 =cut
 
 */
 
 size_t
-string_hash(struct Parrot_Interp * interpreter, Hash *hash, STRING *s)
+string_hash(struct Parrot_Interp * interpreter, STRING *s)
 {
-#if USE_HASH_VAL
     register size_t h = 0;
-#else
-    register size_t h = hash->seed;
-#endif
 
     UNUSED(interpreter);
 
     if (!s)
         return 0;
 
+    if (s->hashval) {
+        return s->hashval;
+    }
+
     switch (s->representation) {
         case enum_stringrep_one:
             HASH_STRING(Parrot_UInt1, s, h);
@@ -2876,11 +2861,9 @@
             break;
     }
 
-#if USE_HASH_VAL
-    return h ^ hash->seed;
-#else
+	s->hashval = h;
+
     return h;
-#endif
 }
 
 
@@ -3035,13 +3018,7 @@
     }
     result->strlen = d;
     result->bufused = string_max_bytes(interpreter, result, d);
-#if USE_HASH_VAL
-    {
-        Hash hash;
-        hash.seed = 0;
-        result->hashval = string_hash(interpreter, &hash, result);
-    }
-#endif
+
     return result;
 }
 
Index: t/pmc/perlhash.t
===================================================================
RCS file: /cvs/public/parrot/t/pmc/perlhash.t,v
retrieving revision 1.43
diff -u -b -r1.43 perlhash.t
--- t/pmc/perlhash.t	9 Apr 2004 20:32:54 -0000	1.43
+++ t/pmc/perlhash.t	19 Apr 2004 07:46:43 -0000
@@ -19,7 +19,7 @@
 
 =cut
 
-use Parrot::Test tests => 33;
+use Parrot::Test tests => 34;
 use Test::More;
 
 output_is(<<CODE, <<OUTPUT, "Initial PerlHash tests");
@@ -1142,6 +1142,34 @@
 -134.134000
 135.135000
 -135.135000
+OUTPUT
+
+output_is(<< 'CODE', << 'OUTPUT', "mutating the lookup string");
+    new P0, .PerlHash
+    set P0["a"], "one"
+    set P0["ab"], "two"
+    set P0["abc"], "three"
+
+	set S0, "a"
+    set S1, P0[S0]
+	print S1
+	print "\n"
+
+	concat S0, "b"
+    set S1, P0[S0]
+	print S1
+	print "\n"
+
+	concat S0, "c"
+    set S1, P0[S0]
+	print S1
+	print "\n"
+
+	end
+CODE
+one
+two
+three
 OUTPUT
 
 1;

--Apple-Mail-4-976505177
Content-Transfer-Encoding: 7bit
Content-Type: application/text;
	x-mac-type=54455854;
	x-unix-mode=0644;
	name="hash-timing.pasm"
Content-Disposition: attachment;
	filename=hash-timing.pasm

    new P1, .PerlHash

    # add 26*26 = 676 entries
    set I0, 65
lp0:
    set I1, 65
lp1:
    chr S0, I0
    chr S1, I1
    concat S0, S1
    set P1[S0], I0
    inc I1
    le I1, 90, lp1
    inc I0
    le I0, 90, lp0

	set S6, "AA"
    concat S6, S6
    concat S6, S6
    concat S6, S6
    concat S6, S6
    concat S6, S6
    concat S6, S6

	set S7, "\u1234\u1234"
    concat S7, S7
    concat S7, S7
    concat S7, S7
    concat S7, S7
    concat S7, S7
    concat S7, S7
    
    chr S8, 0x1043F
	concat S8, S8

    concat S8, S8
    concat S8, S8
    concat S8, S8
    concat S8, S8
    concat S8, S8
    concat S8, S8

    set P1[S6], 601
    set P1[S7], 602
    set P1[S8], 603

	set I8, P1[S6]
    ne I8, 601, error
	set I8, P1[S7]
    ne I8, 602, error
	set I8, P1[S8]
    ne I8, 603, error

    set I0, P1
    print I0
    print " hash entries\n"
    
    length I0, S6
    print I0
    print " characters per test key\n"

    # time 1e6 lookups
    set I0, 1000000
    print I0
    print " lookups each\n"
    
    time N1
lp2:
    set I1, P1[S6]
    dec I0
    if I0, lp2
    time N0
    sub N0, N1
    print "rep 1:  "
    print N0
    print " sec\n"

    # time 1e6 lookups
    set I0, 1000000
    time N1
lp3:
    set I1, P1[S7]
    dec I0
    if I0, lp3
    time N0
    sub N0, N1
    print "rep 2:  "
    print N0
    print " sec\n"

    # time 1e6 lookups
    set I0, 1000000
    time N1
lp4:
    set I1, P1[S8]
    dec I0
    if I0, lp4
    time N0
    sub N0, N1
    print "rep 4:  "
    print N0
    print " sec\n"

    end

error:
    print "error\n"
    end


--Apple-Mail-4-976505177
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
	charset=US-ASCII;
	format=flowed



--Apple-Mail-4-976505177--