Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

Pefrormance : assignment and memcpy for structure

0 views
Skip to first unread message

Alex Vinokur

unread,
Oct 24, 2000, 3:00:00 AM10/24/00
to
Hi,

Performance of assignment and memcpy for structure was measured.

Average time-cost of execution was obtained.

Main conclusion (for pSOS) : assignment is much faster than memcpy.

=======================
Alex Vinokur
http://go.to/alexv_math
=======================

P.S. The results of the experiment are shown below.

==============================
pRISM+ for pSOSystem/PowerPC
pRISM+ 1.2.3 • pSOSystem 2.2.2
Diab C Compiler
==============================

//================= C-code : BEGIN =================

# include <types.h>
# include <prepc.h>
# include <coder.h>
# include <psos.h>
# include <pna.h>
# include <phile.h>

//---------------------
# define ITERATIONS 2000

//---------------------
# define STRUCT_BODY {long a1 [345]; char a2; long a3 [124];}

#pragma pack(1)
struct s_pack1 STRUCT_BODY;

#pragma pack(2)
struct s_pack2 STRUCT_BODY;

#pragma pack(4)
struct s_pack4 STRUCT_BODY;

#pragma pack(8)
struct s_pack8 STRUCT_BODY;

#pragma pack(0)
struct s_pack_u STRUCT_BODY;

//---------------------
// This function has been received from Hanoch Magal
asm unsigned long GetTBL (void)
{
mftb r3
}

//---------------------
// This function has been received from Hanoch Magal
asm unsigned long GetTBU (void)
{
mftbu r3
}

//---------------------
// This function has been received from Hanoch Magal
# define LONG_SIZE 32
unsigned long long get_hr_time (void)
{
unsigned long t1, t2;
t1 = GetTBL () ;
t2 = GetTBU () ;
return ((((unsigned long long) t2) << LONG_SIZE) + t1) ;
}

//---------------------
void show_hr_time (unsigned long long time0, char* text_i)
{
unsigned long t1, t2;
t2 = time0 >> LONG_SIZE;
t1 = (time0 << LONG_SIZE) >> LONG_SIZE;
printf ("%s \t: hr_time = %lu.%012lu\n", text_i, t2, t1);
}

//---------------------
void test1 (void)
{
struct s_pack1 source1, target1;
struct s_pack2 source2, target2;
struct s_pack4 source4, target4;
struct s_pack8 source8, target8;
struct s_pack_u source_u, target_u;
unsigned long long time_start, time_end, tdiff;
int i;


//=============== s_pack1 ================
printf ("\tsizeof (s_pack1) = %d\n", sizeof (struct s_pack1));
assert (sizeof (source1) == sizeof (target1));
assert (sizeof (source1) == sizeof (struct s_pack1));
//--------------------------------
time_start = get_hr_time ();
for (i = 0; i < ITERATIONS; i++)
{
target1 = source1;
}
time_end = get_hr_time ();
tdiff = (time_end - time_start)/ITERATIONS;
show_hr_time (tdiff, "s_pack1 -> assignment");

//--------------------------------
time_start = get_hr_time ();
for (i = 0; i < ITERATIONS; i++)
{
memcpy (&target1, &source1, sizeof (struct s_pack1));
}
time_end = get_hr_time ();
tdiff = (time_end - time_start)/ITERATIONS;
show_hr_time (tdiff, "s_pack1 -> memcpy");


//=============== s_pack2 ================
printf ("\tsizeof (s_pack2) = %d\n", sizeof (struct s_pack2));
assert (sizeof (source2) == sizeof (target2));
assert (sizeof (source2) == sizeof (struct s_pack2));
//--------------------------------
time_start = get_hr_time ();
for (i = 0; i < ITERATIONS; i++)
{
target2 = source2;
}
time_end = get_hr_time ();
tdiff = (time_end - time_start)/ITERATIONS;
show_hr_time (tdiff, "s_pack2 -> assignment");

//--------------------------------
time_start = get_hr_time ();
for (i = 0; i < ITERATIONS; i++)
{
memcpy (&target2, &source2, sizeof (struct s_pack2));
}
time_end = get_hr_time ();
tdiff = (time_end - time_start)/ITERATIONS;
show_hr_time (tdiff, "s_pack2 -> memcpy");


//=============== s_pack4 ================
printf ("\tsizeof (s_pack4) = %d\n", sizeof (struct s_pack4));
assert (sizeof (source4) == sizeof (target4));
assert (sizeof (source4) == sizeof (struct s_pack4));
//--------------------------------
time_start = get_hr_time ();
for (i = 0; i < ITERATIONS; i++)
{
target4 = source4;
}
time_end = get_hr_time ();
tdiff = (time_end - time_start)/ITERATIONS;
show_hr_time (tdiff, "s_pack4 -> assignment");

//--------------------------------
time_start = get_hr_time ();
for (i = 0; i < ITERATIONS; i++)
{
memcpy (&target4, &source4, sizeof (struct s_pack4));
}
time_end = get_hr_time ();
tdiff = (time_end - time_start)/ITERATIONS;
show_hr_time (tdiff, "s_pack4 -> memcpy");


//=============== s_pack8 ================
printf ("\tsizeof (s_pack8) = %d\n", sizeof (struct s_pack8));
assert (sizeof (source8) == sizeof (target8));
assert (sizeof (source8) == sizeof (struct s_pack8));
//--------------------------------
time_start = get_hr_time ();
for (i = 0; i < ITERATIONS; i++)
{
target8 = source8;
}
time_end = get_hr_time ();
tdiff = (time_end - time_start)/ITERATIONS;
show_hr_time (tdiff, "s_pack8 -> assignment");

//--------------------------------
time_start = get_hr_time ();
for (i = 0; i < ITERATIONS; i++)
{
memcpy (&target8, &source8, sizeof (struct s_pack8));
}
time_end = get_hr_time ();
tdiff = (time_end - time_start)/ITERATIONS;
show_hr_time (tdiff, "s_pack8 -> memcpy");


//=============== s_pack_u ================
printf ("\tsizeof (s_pack_u) = %d\n", sizeof (struct s_pack_u));
assert (sizeof (source_u) == sizeof (target_u));
assert (sizeof (source_u) == sizeof (struct s_pack_u));
//--------------------------------
time_start = get_hr_time ();
for (i = 0; i < ITERATIONS; i++)
{
target_u = source_u;
}
time_end = get_hr_time ();
tdiff = (time_end - time_start)/ITERATIONS;
show_hr_time (tdiff, "s_pack_u -> assignment");

//--------------------------------
time_start = get_hr_time ();
for (i = 0; i < ITERATIONS; i++)
{
memcpy (&target_u, &source_u, sizeof (struct s_pack_u));
}
time_end = get_hr_time ();
tdiff = (time_end - time_start)/ITERATIONS;
show_hr_time (tdiff, "s_pack_u -> memcpy");


} // void test1 (void)


//=====================================================
void root ( /* Stuff */)
{
// Stuff

for (the_i = 1; the_i <= 5; the_i++)
{
printf ("\n====== Test#%d (struct : assignment & memcpy) ======\n",
the_i);
test1 ();
}

// Stuff

}
//================= C-code : END ===================


//######## Results of the Running : BEGIN ##########

====== Test#1 (struct : assignment & memcpy) ======
sizeof (s_pack1) = 1877
s_pack1 -> assignment : hr_time = 0.000000000225
s_pack1 -> memcpy : hr_time = 0.000000001803
sizeof (s_pack2) = 1878
s_pack2 -> assignment : hr_time = 0.000000000226
s_pack2 -> memcpy : hr_time = 0.000000001804
sizeof (s_pack4) = 1880
s_pack4 -> assignment : hr_time = 0.000000000224
s_pack4 -> memcpy : hr_time = 0.000000001809
sizeof (s_pack8) = 1880
s_pack8 -> assignment : hr_time = 0.000000000225
s_pack8 -> memcpy : hr_time = 0.000000001803
sizeof (s_pack_u) = 1880
s_pack_u -> assignment : hr_time = 0.000000000225
s_pack_u -> memcpy : hr_time = 0.000000001807

====== Test#2 (struct : assignment & memcpy) ======
sizeof (s_pack1) = 1877
s_pack1 -> assignment : hr_time = 0.000000000226
s_pack1 -> memcpy : hr_time = 0.000000001802
sizeof (s_pack2) = 1878
s_pack2 -> assignment : hr_time = 0.000000000227
s_pack2 -> memcpy : hr_time = 0.000000001803
sizeof (s_pack4) = 1880
s_pack4 -> assignment : hr_time = 0.000000000226
s_pack4 -> memcpy : hr_time = 0.000000001808
sizeof (s_pack8) = 1880
s_pack8 -> assignment : hr_time = 0.000000000225
s_pack8 -> memcpy : hr_time = 0.000000001805
sizeof (s_pack_u) = 1880
s_pack_u -> assignment : hr_time = 0.000000000226
s_pack_u -> memcpy : hr_time = 0.000000001805

====== Test#3 (struct : assignment & memcpy) ======
sizeof (s_pack1) = 1877
s_pack1 -> assignment : hr_time = 0.000000000225
s_pack1 -> memcpy : hr_time = 0.000000001802
sizeof (s_pack2) = 1878
s_pack2 -> assignment : hr_time = 0.000000000226
s_pack2 -> memcpy : hr_time = 0.000000001802
sizeof (s_pack4) = 1880
s_pack4 -> assignment : hr_time = 0.000000000224
s_pack4 -> memcpy : hr_time = 0.000000001807
sizeof (s_pack8) = 1880
s_pack8 -> assignment : hr_time = 0.000000000226
s_pack8 -> memcpy : hr_time = 0.000000001803
sizeof (s_pack_u) = 1880
s_pack_u -> assignment : hr_time = 0.000000000225
s_pack_u -> memcpy : hr_time = 0.000000001803

====== Test#4 (struct : assignment & memcpy) ======
sizeof (s_pack1) = 1877
s_pack1 -> assignment : hr_time = 0.000000000225
s_pack1 -> memcpy : hr_time = 0.000000001803
sizeof (s_pack2) = 1878
s_pack2 -> assignment : hr_time = 0.000000000226
s_pack2 -> memcpy : hr_time = 0.000000001802
sizeof (s_pack4) = 1880
s_pack4 -> assignment : hr_time = 0.000000000225
s_pack4 -> memcpy : hr_time = 0.000000001804
sizeof (s_pack8) = 1880
s_pack8 -> assignment : hr_time = 0.000000000226
s_pack8 -> memcpy : hr_time = 0.000000001804
sizeof (s_pack_u) = 1880
s_pack_u -> assignment : hr_time = 0.000000000225
s_pack_u -> memcpy : hr_time = 0.000000001808

====== Test#5 (struct : assignment & memcpy) ======
sizeof (s_pack1) = 1877
s_pack1 -> assignment : hr_time = 0.000000000225
s_pack1 -> memcpy : hr_time = 0.000000001802
sizeof (s_pack2) = 1878
s_pack2 -> assignment : hr_time = 0.000000000226
s_pack2 -> memcpy : hr_time = 0.000000001801
sizeof (s_pack4) = 1880
s_pack4 -> assignment : hr_time = 0.000000000225
s_pack4 -> memcpy : hr_time = 0.000000001805
sizeof (s_pack8) = 1880
s_pack8 -> assignment : hr_time = 0.000000000226
s_pack8 -> memcpy : hr_time = 0.000000001806
sizeof (s_pack_u) = 1880
s_pack_u -> assignment : hr_time = 0.000000000226
s_pack_u -> memcpy : hr_time = 0.000000001803

//######## Results of the Running : END ############


Sent via Deja.com http://www.deja.com/
Before you buy.

Alf Katz

unread,
Oct 24, 2000, 3:00:00 AM10/24/00
to
This should be compiler, rather than operating system, dependent. If you
generate the intermediate assembler, you might find the reason.

--
Regards,
Alf Katz
alf...@ieee.org

"Alex Vinokur" <ale...@bigfoot.com> wrote in message
news:8t3jmg$mvj$1...@nnrp1.deja.com...


> Hi,
>
> Performance of assignment and memcpy for structure was measured.
>
> Average time-cost of execution was obtained.
>
> Main conclusion (for pSOS) : assignment is much faster than memcpy.
>
> =======================
> Alex Vinokur
> http://go.to/alexv_math
> =======================
>
> P.S. The results of the experiment are shown below.
>
> ==============================
> pRISM+ for pSOSystem/PowerPC

> pRISM+ 1.2.3 . pSOSystem 2.2.2

Alex Vinokur

unread,
Oct 24, 2000, 3:00:00 AM10/24/00
to
In article <isdJ5.7190$Xx3....@news1.eburwd1.vic.optushome.com.au>,

"Alf Katz" <alf...@optushome.com.au> wrote:
> This should be compiler, rather than operating system, dependent. If
you
> generate the intermediate assembler, you might find the reason.
>
> --
> Regards,
> Alf Katz
> alf...@ieee.org

Here is a relevant piece of assembler code

---------------------------------------------------------------
[snip]
---------------------------------------------------------------
571 # for (i = 0; i < ITERATIONS; i++)
572 rlwinm r27,r27,31,1,31
573 .L33:
574 # {
575 # target_u = source_u;
576 addi r11,r1,16924
577 addi r12,r1,15044
578 addi r10,r0,235
579 mtspr ctr,r10
580 .L54:
581 lwzu r9,4(r12)
582 lwzu r10,4(r12)
583 stwu r9,4(r11)
584 stwu r10,4(r11)
585 bc 16,0,.L54
586 addi r27,r27,-1
587 cmpi 0,0,r27,0
588 bc 4,2,.L33 # ne
589 # }
---------------------------------------------------------------
[snip]
---------------------------------------------------------------
621 .L36:
622 # for (i = 0; i < ITERATIONS; i++)
623 # {
624 # memcpy (&target_u, &source_u, sizeof (struct s_pack_u));
625 addi r3,r1,16928
626 addi r4,r1,15048
627 addi r5,r0,1880
628 #$$fn 0x1ff8 0x0
629 #$$tl 0x38 0x0
630 bl memcpy
631 addi r27,r27,-1
632 cmpi 0,0,r27,0
633 bc 4,2,.L36 # ne
634 # }
---------------------------------------------------------------
[snip]
---------------------------------------------------------------

--


=======================
Alex Vinokur
http://go.to/alexv_math
=======================

Dan Pop

unread,
Oct 24, 2000, 3:00:00 AM10/24/00
to
In <8t3jmg$mvj$1...@nnrp1.deja.com> Alex Vinokur <ale...@bigfoot.com> writes:

>Performance of assignment and memcpy for structure was measured.
>
>Average time-cost of execution was obtained.
>
>Main conclusion (for pSOS) : assignment is much faster than memcpy.

This only shows that your compiler doesn't inline memcpy calls.

Regardless of any performance issues, if you want to copy a structure,
assignment is the natural way to do it.

Dan
--
Dan Pop
CERN, IT Division
Email: Dan...@cern.ch
Mail: CERN - IT, Bat. 31 1-014, CH-1211 Geneve 23, Switzerland

0 new messages