Robert AH Prins
unread,May 19, 2012, 11:52:04 AM5/19/12You do not have permission to delete messages in this group
Either email addresses are anonymous for this group or you need the view member email addresses permission to view the original message
to
The compilers used for the samples in this post are
5655-H31 IBM(R) Enterprise PL/I for z/OS V3.R9.M0 (Built:20100923)
and
5724-B67 IBM(R) PL/I for Windows 8.0 (Built:20110825)
It may be that the newer versions of Enterprise PL/I (i.e. V4.1 and V4.2) have
already addressed issues as the one described here.
While looking at some code, I realized that some very frequently executed loops
were comparing "FIXED DEC (3)" and "FIXED DEC (5)" variables against "PIC
'9999'" ones (To satisfy the c.l.pl1 PITA: the FD5 contains a year, so will
never have more than 4 digits, and of course this should never have happened in
the first place). The code:
5980 select;
5981 when (@trip)
5982 do wait_ptr = wait_top repeat wait_nxt
5983 while(wait_ptr ^= sysnull())
5984 until(wait_list.trip = trip_save);
5985 end;
5986
5987 when (@country)
5988 do wait_ptr = wait_top repeat wait_nxt
5989 while(wait_ptr ^= sysnull())
5990 until(wait_list.country = country_list.country);
5991 end;
5992
5993 when (@year)
5994 do wait_ptr = wait_top repeat wait_nxt
5995 while(wait_ptr ^= sysnull())
5996 until(wait_list.year = year_save);
5997 end;
5998
5999 when (@summary)
6000 wait_ptr = wait_top;
6001 end;
where wait_list.trip is the FIXED DEC(3) and save_trip is the PIC '9999' (the
"year" attributes are FIXED DEC(5) and again PIC '9999'.
What does EPLI V3R9 make of this?
It knows that "trip_save" is invariant, so one would expect it to do the
conversion from PIC to FIXED once, outside the loop and used the converted
value, just a half-word, so easy to keep in a register, in the loop, and one has
to be blind not to see that all "when" statements have something very much in
common, namely "wait_ptr = wait_top", so that is a prime candidate to be
commoned. Finally country is a CHAR(4) in both lists.
Well, let's see what it really does, when compiled OPT(3):
005981 | TMY @TRIP(r5,9658),128
005981 | JE @58L3132
005985 | LGR r1,r5
005982 Q LGF r14,WAIT_TOP(,r5,8324)
005982 Q STY r14,WAIT_PTR(,r5,8320)
005985 | AHI r1,H'22368'
005982 | LARL r11,F'22442'
005982 | @58L3104 DS 0H
005982 | CIJE r14,H'0',@58L3145
005985 X PACK #pd20124_58(3,r13,636),_shadow10(4,r1,0)
005985 X MVC #pd5394_58(3,r13,640),#pd20124_58(r13,636)
005985 X MVN #pd5394_58(1,r13,642),+CONSTANT_AREA(r11,3220)
005985 | CP _shadow17(2,r14,8),#pd5394_58(3,r13,640)
005985 | JE @58L3106
005985 | LGF r14,_shadow16(,r14,0)
005985 | STY r14,WAIT_PTR(,r5,8320)
005985 | J @58L3104
005985 | @58L3132 DS 0H
005987 | TMY @COUNTRY(r5,9660),128
005987 | JE @58L3136
005988 Q LGF r14,WAIT_TOP(,r5,8324)
005988 Q STY r14,WAIT_PTR(,r5,8320)
005988 | LGHI r1,H'0'
005988 | CIJE r14,H'0',@58L3107
005988 | @58L10101DS 0H
005988 | LGHI r1,H'1'
005988 | @58L3107 DS 0H
005988 | CIJE r1,H'0',@58L3106
005991 | LY r1,COUNTRY_PTR(,r5,8040)
005991 | CLC _shadow19(4,r14,12),_shadow19(r1,4)
005991 | JE @58L3106
005991 | LGF r14,_shadow16(,r14,0)
005991 | STY r14,WAIT_PTR(,r5,8320)
005988 | LGHI r1,H'0'
005988 | CIJE r14,H'0',@58L3107
005988 | J @58L10101
005991 | @58L3136 DS 0H
005993 | TMY @YEAR(r5,9663),128
005993 | JE @58L3140
005994 Q LGF r14,WAIT_TOP(,r5,8324)
005994 Q STY r14,WAIT_PTR(,r5,8320)
005994 | LGHI r1,H'0'
005994 | CIJE r14,H'0',@58L3110
005994 | @58L10100DS 0H
005994 | LGHI r1,H'1'
005994 | @58L3110 DS 0H
005994 | CIJE r1,H'0',@58L3106
005997 | CP _shadow12(3,r14,16),YEAR_SAVE(3,r6,1584)
005997 | JE @58L3106
005997 | LGF r14,_shadow16(,r14,0)
005997 | STY r14,WAIT_PTR(,r5,8320)
005994 | LGHI r1,H'0'
005994 | CIJE r14,H'0',@58L3110
005994 | J @58L10100
005997 | @58L3140 DS 0H
005999 | TMY @SUMMARY(r5,9657),128
005999 | JE @58L3144
006000 Q LGF r14,WAIT_TOP(,r5,8324)
006000 Q STY r14,WAIT_PTR(,r5,8320)
006000 | J @58L3106
006000 | @58L3144 DS 0H
006001 | LA r0,+CONSTANT_AREA(,r10,2936)
006001 | MVHI #MX_TEMP58(r13,228),H'12'
006001 | LGF r15,=V(IBMQERNP)(,r3,16)
006001 | ST r0,#MX_TEMP58(,r13,224)
006001 | LA r1,#MX_TEMP58(,r13,224)
006001 | BASR r14,r15
000000 | LGF r14,WAIT_PTR(,r5,8320)
005985 | @58L3106 DS 0H
Note that the select is "OTHERWISE"-less, and this is done on purpose,
as there should never be an "OTHERWISE" situation.
As for the optimizer? It misses the obvious fact that the three "X" labeled
statements can be moved out of the loop, and repeating the four pairs of "QQ"
labeled statements also strikes me as somewhat of an oversight, to express
things politely. Of course these are only assignments, so the fourfold
repetition only serves to "bloat" the resulting load module, whereas the
"X"-ed statements are executed at ever iteration! Boo! Hiss!
As for the Windows compiler? I'll only include the "fixed dec" v "pic" compare
for the trips:
; 5989 when (@trip)
test byte ptr [ebx-0320ah],080h; LIFT_WORK
je @BLBL1619
; 5990 do wait_ptr = wait_top repeat wait_nxt
mov ecx,[ebx-03748h]; LIFT_WORK
mov [ebx-0374ch],ecx; LIFT_WORK
mov edi,dword ptr __imp__IBMPCMPD
align 010h
@BLBL1621:
mov ebx,[ebp-0d4h]; @CBE570
cmp dword ptr [ebx-0374ch],0h; LIFT_WORK
je @BLBL1626
; 5993 end;
push offset FLAT:@CBE214
lea esi,[ebp-068h]; _temp448
mov ecx,esi
mov edx,offset FLAT:@CBE273
lea eax,[ebx-03eedh]; LIFT_WORK
sub esp,0ch
call edi
add esp,010h
mov [ebp-088h],esi; _temp464
mov ecx,[ebx-0374ch]; LIFT_WORK
mov ax,[ebp-068h]; _temp448
xchg al,ah
movzx eax,ax
shl eax,08h
movzx edx,byte ptr [ebp-066h]; _temp448
or edx,eax
shr edx,04h
mov al,[ebp-066h]; _temp448
and al,0fh
movsx eax,al
movsx esi,byte ptr [eax+ @CBE191]
imul esi,edx
mov dl,[ecx+09h]
and dl,0fh
movsx edx,dl
movsx edx,byte ptr [edx+ @CBE191]
mov ax,[ecx+08h]
xchg al,ah
movzx eax,ax
shr eax,04h
imul edx,eax
cmp edx,esi
je @BLBL1626
mov ecx,[ecx]
mov [ebx-0374ch],ecx; LIFT_WORK
jmp @BLBL1621
align 010h
@BLBL1619:
For crying out loud... Sure, the Windows compiler stores the data
in z/OS format, but again doing this for every iteration of the loop?
Please pass me a bucket, things can't really get any worse!
Well, actually they can, at least as far as the Windows compiler is
concerned....
Take this:
do trip_ptr = trip_top repeat trip_list.trip_nxt
while(trip_ptr ^= sysnull())
until(trip_list.trip = lift_list.trip);
end;
Here both XXXX_list.trip variables are FIXED DEC(3), in other words two bytes,
and they are compared for *equality*, let me repeat, EQUALITY! From what I wrote
earlier, you may guess that the Windows compiler does not really subscribe to
that point of view:
; 4236 do trip_ptr = trip_top repeat trip_list.trip_nxt
mov eax,esi
mov ecx,[eax-03898h]; LIFT_WORK
mov [eax-0389ch],ecx; LIFT_WORK
align 010h
@BLBL849:
mov ecx,[ebp-018h]; @CBE671
mov edx,[ecx-0389ch]; LIFT_WORK
test edx,edx
je @BLBL850
; 4239 end;
mov eax,[ecx-038b8h]; LIFT_WORK
mov bl,[eax+0dh]
and bl,0fh
movsx ebx,bl
movsx ebx,byte ptr [ebx+ @CBE191]
mov ax,[eax+0ch]
xchg al,ah
movzx eax,ax
shr eax,04h
imul ebx,eax
mov al,[edx+05h]
and al,0fh
movsx eax,al
movsx esi,byte ptr [eax+ @CBE191]
mov ax,[edx+04h]
xchg al,ah
xchg ebx,eax
movzx ebx,bx
shr ebx,04h
imul esi,ebx
cmp esi,eax
je @BLBL850
mov edx,[edx]
mov [ecx-0389ch],edx; LIFT_WORK
jmp @BLBL849
align 010h
@BLBL850:
OMG! WTF? HS! Of course, Windows runs on your desktop and nobody will be charged
for cycles used, so who cares? Then again, given that both compilers are based
on the same codebase, these Window-icities may very well creep into the real
thing...
As for how it could be done? I don't really think you can do much better than
this code:
mov eax, lift_ptr
mov eax, [eax + offset lift_list.trip]
mov edx, trip_top
@01:
test edx, edx
jz @02
cmp eax, [edx + offset trip_list.trip]
je @02
mov edx, [edx]
jmp @01
@02:
mov trip_ptr, edx
Nuff said for now, I'll let you digest this...
Robert
--
Robert AH Prins
robert(a)prino(d)org