Register cross-reference for Virtual Pascal programs with in-line assembler

9 views
Skip to first unread message

Robert Prins

unread,
Feb 11, 2021, 1:44:39 PM2/11/21
to
Unlikely to be of any interest, but the following (Regina) REXX exec will
produce an output file with all registers (x86/mmx/xmm/ymm/ (not ST/ST(x)!) used
in assembler procedures in Virtual Pascal.

Speed?

Processing the just under 80,000 lines of code that make up the sources for
which this routine was written takes about 4 seconds on a quad-core 2.5GHz Intel
i7-4710MQ from 2014 using rotating rust. The GPL'ed programs are available on
request.

Note that some lines below wrapped!

/* REXX exec to extract data from all Pascal files */
/*** trace ?r ***************************************************** \| *
* (C) Copyright Robert AH Prins, 2020-2021 *
************************************************************************
* ------------------------------------------------------------------ *
* | Date | By | Remarks | *
* |------------+------+----------------------------------------------| *
* | | | | *
* |------------+------+----------------------------------------------| *
* | 2021-02-10 | RAHP | Add code to scan for MMX, XMM, and YMM regs | *
* |------------+------+----------------------------------------------| *
* | 2020-05-29 | RAHP | Initial version | *
* |------------+------+----------------------------------------------| *
************************************************************************
* HH-XREF is a REXX exec to extract data from all Pascal files that *
* make up Prino's hitchhike statistics programs. *
* *
* It currently produces one file, *
* *
* - registers.new.data *
* *
* This file contains the various x86 GP, MMX, XMM, and YMM registers *
* used by every procedure in every file (except ESP/EBP and the FPU *
* registers) *
* *
* Requirements: *
* *
* - register names must be in lowercase *
* - every procedure must end with an "end; {name-of-procedure}" *
* statement *
* - statements containing MMX, XMM, or YMM registers should contain *
* the real statement as a comment, i.e. like *
* *
* { vpxor xmm2, xmm1, [eax + edx] } db $c5,$f1,$ef,$14,$02 *
* { vpcmpistri xmm0, xmm2, 8 } db $c4,$e3,$79,$63,$c2,$08 *
************************************************************************
* Send questions, suggestions and/or bug reports to: *
* *
* robert.a.h.prins[a]gmail.you-know-the domain *
************************************************************************
* This program is free software: you can redistribute it and/or *
* modify it under the terms of the GNU General Public License as *
* published by the Free Software Foundation, either version 3 of *
* the License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/> *
***********************************************************************/
address system 'dir /b /on *.pas' with output stem dir.

oreg = 0
out_reg. = ''

oproc = -1

x86 = 'ebx esi edi eax ecx edx'
x86m = 'mm0 mm1 mm2 mm3 mm4 mm5 mm6 mm7'
x86x = 'xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7'
x86y = 'ymm0 ymm1 ymm2 ymm3 ymm4 ymm5 ymm6 ymm7'

lm = 0
lx = 0
ly = 0

anum = xrange('a', 'z') || xrange('A', 'Z') || xrange('0', '9')
nul = x2c(00)

do i = 1 to dir.0
pas. = -1
address system 'type' dir.i with output stem pas.

parse value dir.i with srce '.' .

do p = 1 to pas.0
line = pas.p

if left(line, 4) = 'unit' then
do
do p = p + 1 by 1 while pos('implementation', pas.p) = 0
end

iterate p
end

regs = ''

if left(line, 9) = 'procedure' |,
left(line, 8) = 'function' then
do
if pos('assembler;', line) \= 0 then
parse value space(line) with pf ' ' proc '; assem' .
else
parse value space(line) with pf ' ' proc

pl = p

! = pos('(', proc)
if ! \= 0 then
do
parm = substr(proc, !)
proc = left(proc, ! - 1)
end
else
parm = ''

! = pos(':', proc)
if ! \= 0 then
proc = left(proc, ! - 1)

proc = strip(proc,, ';')

pf = ''
rem = ''
asm = ''
frame = ''
mode = ' '

if oproc \= proc then
do
parse value line with pf (proc) rem
if pos('assembler;', rem) \= 0 then
do
parse value line with pf (proc) rem 'assembler; {&uses ' regs
'}' frame
asm = 'assembler; {&uses' regs'}' strip(frame)
end

r = '------'
rm = '--------'
rx = '--------'
ry = '--------'

do p = p + 1 by 1 until pos('{'proc'}', pas.p) \= 0 | /* r =
'123456' | */ pas.p = -1
line = pas.p

do w = 1 to words(x86)
! = pos(word(x86, w), line)

if ! \= 0 then
if translate(substr(line, ! - 1, 1), left(nul, length(anum),
nul), anum) = nul |,
translate(substr(line, ! + 3, 1), left(nul, length(anum),
nul), anum) = nul then
! = 0

if ! \= 0 then
r = overlay(w, r, w)
end

do w = 1 to words(x86m)
! = pos(word(x86m, w), line)

if ! \= 0 then
if translate(substr(line, ! - 1, 1), left(nul, length(anum),
nul), anum) = nul |,
translate(substr(line, ! + 3, 1), left(nul, length(anum),
nul), anum) = nul then
! = 0

if ! \= 0 then
rm = overlay(w, rm, w)
end

do w = 1 to words(x86x)
! = pos(word(x86x, w), line)

if ! \= 0 then
if translate(substr(line, ! - 1, 1), left(nul, length(anum),
nul), anum) = nul |,
translate(substr(line, ! + 4, 1), left(nul, length(anum),
nul), anum) = nul then
! = 0

if ! \= 0 then
rx = overlay(w, rx, w)
end

do w = 1 to words(x86y)
! = pos(word(x86y, w), line)

if ! \= 0 then
if translate(substr(line, ! - 1, 1), left(nul, length(anum),
nul), anum) = nul |,
translate(substr(line, ! + 4, 1), left(nul, length(anum),
nul), anum) = nul then
! = 0

if ! \= 0 then
ry = overlay(w, ry, w)
end
end

do p = p by 1 while pos('{'proc'}', pas.p) = 0 & pas.p \= -1
end

if parm = '' then
proc = proc';'
else
if right(parm, 1) \= ';' then
parm = parm';'

r_used = ''
rm_used = ''
rx_used = ''
ry_used = ''

do ! = 1 to words(x86)
? = substr(r, !, 1)

if ! = ? then
r_used = r_used word(x86, !)
end

do ! = 1 to words(x86m)
? = substr(rm, !, 1)

if ! = ? then
rm_used = rm_used word(x86m, !)
end

rm_used = strip(rm_used)
lm = max(lm, length(rm_used))

do ! = 1 to words(x86x)
? = substr(rx, !, 1)

if ! = ? then
rx_used = rx_used word(x86x, !)
end

rx_used = strip(rx_used)
lx = max(lx, length(rx_used))

do ! = 1 to words(x86y)
? = substr(ry, !, 1)

if ! = ? then
ry_used = ry_used word(x86y, !)
end

ry_used = strip(ry_used)
ly = max(ly, length(ry_used))

r_used = strip(r_used)
proc = strip(proc,, ';')

if left(asm, 1) = 'a' then
mode = '*'
else
if r_used \= '' then
mode = '#'
else
mode = ' '

regs = translate(regs, ' ', ',')

rx = ''
if regs \= '' | r_used \= '' then
rx = left(regs, 11) '/' r_used

if rm_used \= '' |,
rx_used \= '' |,
ry_used \= '' then
do
rx = left(rx, 37) '/' left(rm_used, 31)
rx = rx '/' left(rx_used, 39)
rx = rx '/' left(ry_used, 39)
end

oreg = oreg + 1
out_reg.oreg = left(srce'.pas', 14) right(pl, 4) mode left(proc, 31) rx
/*--------------------------------------------------------*/

oproc = strip(proc,, ';')
end
end
end
end

/*---------------------------------------------------------------------+
| Write out the "registers.new.data" file |
+---------------------------------------------------------------------*/
file = 'registers.new.data'

"if exist" file "del" file

do i = 1 to oreg
out_reg.i = strip(substr(out_reg.i, 1, 93),
substr(out_reg.i, 95, lm) '/',
substr(out_reg.i, 128, lx) '/',
substr(out_reg.i, 171, ly))

call lineout file, out_reg.i
end

call lineout file

===============================================================================

Sample (wrapped) output:

readfile.pas 179 * utf8_length esi / esi eax edx
/ /

readfile.pas 231 * readfile none / ebx esi edi
eax ecx edx / mm0 mm1 mm5 mm6 mm7 / xmm0 / ymm0 ymm1


The first register column comes from the "{&uses ...}" directive. And yes, many
of my procedures are using the ebx, esi, and edi registers without saving them
as they are only one level below the program, and VP doesn't know anything about
inter-procedural optimisation anyway.

I use <https://defuse.ca/online-x86-assembler.htm> to assemble code to get the
required "db $xx,$yy,$zz... sequences.

And another yes, xmm and ymm registers should really be merged as the xmm's are
just aliases for the lower 16 bytes of the ymm's.

Robert
--
Robert AH Prins
robert(a)prino(d)org
The hitchhiking grandfather - https://prino.neocities.org/indez.html
Some REXX code for use on z/OS - https://prino.neocities.org/zOS/zOS-Tools.html
Reply all
Reply to author
Forward
0 new messages