Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

ARM assembler in Bash

13 views
Skip to first unread message

presidentbyamendment

unread,
Dec 1, 2019, 4:28:33 PM12/1/19
to
shasARM 2019, son of shasm, This version is public domain.
Rick presidentbyamendment Hohensee

This is 3 files cat'ed together that build a small ARM program
that does an endless loop. This therefor is an ARM assembler
written in Bash. It's basically an rpn calculator, some ARM
constants, some ARM-specific bit-shifters, and a branch
resolver. Make two passes over a source file and I imagine it
could do forward branches. I think ksh will work. I did this
recently in Termux for Android. You can chmod +x stuff in the
Termux directory on an unrooted phone/etc.

# 32 bit binary file assembler in Bash
# base file of This Complete Assembler
# Stuff for making arbitrary binary files
# rick_h...@email.com nov 2019


ab () { # this is how to bang bits in sh. assemble byte.
# there's an ab command in the apache pkg. Watch that.
# ab byte [byte byte...]
for i in $*
do
echo -en "${oct[$i]}" >> output
let here+=1
done
}

oct[0]="\00"
# Bash set shows "\\00". Tis OK.
# my Termux ksh doesn't accept array[]=(lots of stuff) but I think even dash has
# echo -en
oct[1]="\01"
oct[2]="\02"
oct[3]="\03"
oct[4]="\04"
oct[5]="\05"
oct[6]="\06"
oct[7]="\07"
oct[8]="\010"
oct[9]="\011"
oct[10]="\012"
oct[11]="\013"
oct[12]="\014"
oct[13]="\015"
oct[14]="\016"
oct[15]="\017"
oct[16]="\020"
oct[17]="\021"
oct[18]="\022"
oct[19]="\023"
oct[20]="\024"
oct[21]="\025"
oct[22]="\026"
oct[23]="\027"
oct[24]="\030"
oct[25]="\031"
oct[26]="\032"
oct[27]="\033"
oct[28]="\034"
oct[29]="\035"
oct[30]="\036"
oct[31]="\037"
oct[32]="\040"
oct[33]="\041"
oct[34]="\042"
oct[35]="\043"
oct[36]="\044"
oct[37]="\045"
oct[38]="\046"
oct[39]="\047"
oct[40]="\050"
oct[41]="\051"
oct[42]="\052"
oct[43]="\053"
oct[44]="\054"
oct[45]="\055"
oct[46]="\056"
oct[47]="\057"
oct[48]="\060"
oct[49]="\061"
oct[50]="\062"
oct[51]="\063"
oct[52]="\064"
oct[53]="\065"
oct[54]="\066"
oct[55]="\067"
oct[56]="\070"
oct[57]="\071"
oct[58]="\072"
oct[59]="\073"
oct[60]="\074"
oct[61]="\075"
oct[62]="\076"
oct[63]="\077"
oct[64]="\0100"
oct[65]="\0101"
oct[66]="\0102"
oct[67]="\0103"
oct[68]="\0104"
oct[69]="\0105"
oct[70]="\0106"
oct[71]="\0107"
oct[72]="\0110"
oct[73]="\0111"
oct[74]="\0112"
oct[75]="\0113"
oct[76]="\0114"
oct[77]="\0115"
oct[78]="\0116"
oct[79]="\0117"
oct[80]="\0120"
oct[81]="\0121"
oct[82]="\0122"
oct[83]="\0123"
oct[84]="\0124"
oct[85]="\0125"
oct[86]="\0126"
oct[87]="\0127"
oct[88]="\0130"
oct[89]="\0131"
oct[90]="\0132"
oct[91]="\0133"
oct[92]="\0134"
oct[93]="\0135"
oct[94]="\0136"
oct[95]="\0137"
oct[96]="\0140"
oct[97]="\0141"
oct[98]="\0142"
oct[99]="\0143"
oct[100]="\0144"
oct[101]="\0145"
oct[102]="\0146"
oct[103]="\0147"
oct[104]="\0150"
oct[105]="\0151"
oct[106]="\0152"
oct[107]="\0153"
oct[108]="\0154"
oct[109]="\0155"
oct[110]="\0156"
oct[111]="\0157"
oct[112]="\0160"
oct[113]="\0161"
oct[114]="\0162"
oct[115]="\0163"
oct[116]="\0164"
oct[117]="\0165"
oct[118]="\0166"
oct[119]="\0167"
oct[120]="\0170"
oct[121]="\0171"
oct[122]="\0172"
oct[123]="\0173"
oct[124]="\0174"
oct[125]="\0175"
oct[126]="\0176"
oct[127]="\0177"
oct[128]="\0200"
oct[129]="\0201"
oct[130]="\0202"
oct[131]="\0203"
oct[132]="\0204"
oct[133]="\0205"
oct[134]="\0206"
oct[135]="\0207"
oct[136]="\0210"
oct[137]="\0211"
oct[138]="\0212"
oct[139]="\0213"
oct[140]="\0214"
oct[141]="\0215"
oct[142]="\0216"
oct[143]="\0217"
oct[144]="\0220"
oct[145]="\0221"
oct[146]="\0222"
oct[147]="\0223"
oct[148]="\0224"
oct[149]="\0225"
oct[150]="\0226"
oct[151]="\0227"
oct[152]="\0230"
oct[153]="\0231"
oct[154]="\0232"
oct[155]="\0233"
oct[156]="\0234"
oct[157]="\0235"
oct[158]="\0236"
oct[159]="\0237"
oct[160]="\0240"
oct[161]="\0241"
oct[162]="\0242"
oct[163]="\0243"
oct[164]="\0244"
oct[165]="\0245"
oct[166]="\0246"
oct[167]="\0247"
oct[168]="\0250"
oct[169]="\0251"
oct[170]="\0252"
oct[171]="\0253"
oct[172]="\0254"
oct[173]="\0255"
oct[174]="\0256"
oct[175]="\0257"
oct[176]="\0260"
oct[177]="\0261"
oct[178]="\0262"
oct[179]="\0263"
oct[180]="\0264"
oct[181]="\0265"
oct[182]="\0266"
oct[183]="\0267"
oct[184]="\0270"
oct[185]="\0271"
oct[186]="\0272"
oct[187]="\0273"
oct[188]="\0274"
oct[189]="\0275"
oct[190]="\0276"
oct[191]="\0277"
oct[192]="\0300"
oct[193]="\0301"
oct[194]="\0302"
oct[195]="\0303"
oct[196]="\0304"
oct[197]="\0305"
oct[198]="\0306"
oct[199]="\0307"
oct[200]="\0310"
oct[201]="\0311"
oct[202]="\0312"
oct[203]="\0313"
oct[204]="\0314"
oct[205]="\0315"
oct[206]="\0316"
oct[207]="\0317"
oct[208]="\0320"
oct[209]="\0321"
oct[210]="\0322"
oct[211]="\0323"
oct[212]="\0324"
oct[213]="\0325"
oct[214]="\0326"
oct[215]="\0327"
oct[216]="\0330"
oct[217]="\0331"
oct[218]="\0332"
oct[219]="\0333"
oct[220]="\0334"
oct[221]="\0335"
oct[222]="\0336"
oct[223]="\0337"
oct[224]="\0340"
oct[225]="\0341"
oct[226]="\0342"
oct[227]="\0343"
oct[228]="\0344"
oct[229]="\0345"
oct[230]="\0346"
oct[231]="\0347"
oct[232]="\0350"
oct[233]="\0351"
oct[234]="\0352"
oct[235]="\0353"
oct[236]="\0354"
oct[237]="\0355"
oct[238]="\0356"
oct[239]="\0357"
oct[240]="\0360"
oct[241]="\0361"
oct[242]="\0362"
oct[243]="\0363"
oct[244]="\0364"
oct[245]="\0365"
oct[246]="\0366"
oct[247]="\0367"
oct[248]="\0370"
oct[249]="\0371"
oct[250]="\0372"
oct[251]="\0373"
oct[252]="\0374"
oct[253]="\0375"
oct[254]="\0376"
oct[255]="\0377"



# stack internals.
# Our assembler state stack is s[$sp]

let s[0]=0 # keeps the index like I like it


bump () {
let sp+=1
}


drop () {
let sp-=1
}


# Now for ARM especially with all quad intructions
# we're going to need an endianizer

LEEQ () { # little endian quad ab as binary bytes

let f=${s[$sp]}\&255
ab $f

let f=${s[$sp]}\>\>8
let f=$f\&255
ab $f

let f=${s[$sp]}\>\>16
let f=$f\&255
ab $f

let f=${s[$sp]}\>\>24
# signed. far out. so mask it.
let f=$f\&255
ab $f
drop
}


# and while we're here...

BEEQ () { # big endian quad ab as binary bytes
let f=${s[$sp]}\>\>24 # signed. far out. so mask it.
let f=$f\&255
ab $f
f=${s[$sp]}\>\>16
let f=$f\&255
ab $f
let f=${s[$sp]}\>\>8
let f=$f\&255
ab $f
let f=${s[$sp]}\&255
ab $f
drop
}


# LEEQ blows up if ${s[sp]} is null.
# Ain't worth a if.

# testing typing savers, might have un-sh dependencies
# the od-less version is LAAETTR


LEED () { # little endian quad ab as binary bytes

let f=${s[$sp]}\&255
ab $f

let f=${s[$sp]}\>\>8
let f=$f\&255
ab $f

drop
}



s. () { # print the whole s array. Zimple. A bit confusing...
echo "sp= " $sp
echo ${s[*]}
}


ho () { # hexdump output. I have an xd alias also
echo "\$here is " $here
od -t x1z -Ax output
}


push () {
for i in $*
do
bump
let s[$sp]=$i
done
}


ws () { # wipe stack
unset s
sp=0
let s[0]=0
s.
}



# Wanna bang bits, you need bits
# create $b0=1, b1=2, b2=4...
# 32 bit version in a 64 bit Bash

let i=0
while test $i -lt 32
do
let j=$i+1
let b$i=1\<\<$i ## let lets you do eval-ish $lvalue=num

let i+=1
done



ELF () { # this has a rm output in it.

# Lay down an ELF executable header. Needs shasm.
# ID

# I'm not in love with ELF. What's simple? .EXE? a.out is 8 cells.

# Apparently, on ARM LInux you need to leave room in virtual low
# memory for a virtual COmmodore 64. Which is why I'm writing an
# assembler in sh

# that was not the case on x86 10 years ago. But I'm not doing
# CPU/bla/ELF thank you very much.

rm output
here=0

# MAGIC ELF # * required , per Brian Raiter
ab 0x7f # * and ARM needs MORE.
echo -n ELF >> output # *
ab 1 # 32 bit # spam, but 'file' reads it
ab 1 # little endian
ab 1 # ELF version
ab 3 # GNU/Linux
ab 5 # ABI supposedly.

# pad
ab 0 0 0 0 0 0 0

# executable type object
ab 2 0 # *

# machine; ARM
ab 40 0 # *

# version
ab 1 0 0 0

# entry point # *
ab 0x54 0 1 0 # high

# program header offset # *
ab 0x34 0 0 0

# section header offset
ab 0 0 0 0

# flags, "none defined"
ab 0 2 0 5

# program header size
ab 52 0

# program header entry size # *
ab 32 0

# program header entry number
ab 1 0

# section header entry size
ab 40 0

# section header number
ab 0 0

# section string index loc or something
ab 0 0

##############################################
### program header

# type, exec *
ab 1 0 0 0

# o/s to program segment *
ab 0 0 0 0 # ???

# virt load address *
ab 0 0 1 0

# phys. addr
ab 0 0 0 0

# file size *
ab 100 0 0 0

# memsize
ab 100 0 0 0

# perms
ab 7 0 0 0 # RWE

# alignment
ab 0 0 1 0

}



rpn () { # RPN calc using stack s[$sp]
for t in $*
do
case $t in

bla)
echo "Yeah, me too."
;;
\.)
drop # the 4th be with u
echo ${s[$sp]}
;;

AD|ad) # duals. 16 bits.
LEED
;;


+)
drop
let s[$sp]=${s[$sp]}+${s[$sp-1]}
;;

-)
drop
let s[$sp]=${s[$sp]}-${s[$sp+1]}
;;

%) # modulo
drop
let s[$sp]=${s[$sp]}%${s[$sp+1]}
;;

eor|XOR|^)
drop
let s[$sp]=${s[$sp]}^${s[$sp+1]}
;;

/)
drop
let s[$sp]=${s[$sp]}/${s[$sp+1]}
;;

\|or)
drop
let s[$sp]=${s[$sp]}\|${s[$sp+1]}
;;

\&|and)
drop
let s[$sp]=${s[$sp]}\&${s[$sp+1]}
;;

\<\<|ups*)
drop
let s[$sp]=${s[$sp]}\<\<${s[$sp+1]}
;;

NOT|not|\!)
let s[$sp]=~${s[$sp]}
;;


\>\>|downs*)
drop
let s[$sp]=${s[$sp]}\>\>${s[$sp+1]}
;;

L)
LEEQ >> output
# HARDWARE ASSUMPTION
;;

bp) # bit print, see below
bp
;;

*) # default is numbers to push
push $t
;;
esac
done
}


align () { # like e.g. .align =shasm
if test "$1" = "h" ; then echo -e "\n\n\n\n
Don't use this until \$here has been bumped above 0."
else ####################
if test "$here" && test "$here" != "0" ;then
let lop=$here%$1
let pad=$1-$lop
if test $lop -ne 0 ;then
allot $pad
fi
else
echo "For simplicity, align doesn't work until
\$here has been set greater than 0."
fi
fi
}


homp () { # homp chomp is homp
echo ${1:1:100}
}


chom () { # chom "chomp" returns "chom"
echo ${1:0:${1}-1}
}


bigpic () {
echo "3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0"
echo "1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0"
}



bp () (
pic=""
let i=32
let munch=${s[$sp]}
while test $i -ne 0
do

let bit=$munch\&1
pic=$bit" "$pic
let i-=1
let munch=$munch\>\>1
done
printf %x"\n" ${s[$sp]}
bigpic
echo $pic
)

# ## ##### second file, ARM-specific
# shasm machine instruction calculator for ARM
# CONSTs. SO, like, don't change them, eh?

let PC=15
let ALWAYS=0xe0000000
let FLAGS=$b20
let SHIFTON=$b20
let IMMED=$b25

# write your own $ANDFOO if you want conditional execution or flags off

let AND=$FLAGS\|$ALWAYS
let EOR=1\<\<21\|$FLAGS\|$ALWAYS
let SUB=2\<\<21\|$FLAGS\|$ALWAYS
let RSB=3\<\<21\|$FLAGS\|$ALWAYS
let ADD=4\<\<21\|$FLAGS\|$ALWAYS
let ADC=5\<\<21\|$FLAGS\|$ALWAYS
let SBC=6\<\<21\|$FLAGS\|$ALWAYS
let RSC=7\<\<21\|$FLAGS\|$ALWAYS
let TST=8\<\<21\|$FLAGS\|$ALWAYS
let TEQ=9\<\<21\|$FLAGS\|$ALWAYS
let CMP=10\<\<21\|$FLAGS\|$ALWAYS
let CMN=11\<\<21\|$FLAGS\|$ALWAYS
let ORR=12\<\<21\|$FLAGS\|$ALWAYS
let MOV=13\<\<21\|$FLAGS\|$ALWAYS
let BIC=14\<\<21\|$FLAGS\|$ALWAYS
let MVN=15\<\<21\|$FLAGS\|$ALWAYS

let SMULL=0xe0d00090 # always, flags on, needs 4 regs incl. hi and low

let REG=$b4 # ALU op register mode bit
let REGSHIFTED=$b4

let LOAD=$ALWAYS\|$b26\|$b20

let STORE=$ALWAYS\|$b26

let ZERO=0 ZT=0
let NOTZERO=1\<\<28 ZF=1\<\<28
let CARRY=2\<\<28 CT=2\<\<28
let NOTCARRY=3\<\<28 CF=3\<\<28
let SIGN=4\<\<28 NT=4\<\<28
let NOTSIGN=5\<\<28 NF=5\<\<28
let OVER=6\<\<28 VT=6\<\<28
let NOTOVER=7\<\<28 VF=7\<\<28
let CARRYNOTZERO=8\<\<28
let ZEROORNOTCARRY=9\<\<28
let SIGNOVEREQUAL=10\<\<28
let SIGNOVERNOTEQUAL=11\<\<28
let OVERNOTZERO=12\<\<28
let ZEROORSIGNOVERNOTEQUAL=13\<\<28
let ALWAYS=14\<\<28
let UNCONDITIONAL=15\<\<28


let B=5\<\<25
let BRANCH=$B

let LINK=$b24 # might not use this...
# looking like I won't for my stuff.
let SWI=0xef000000

let STOREM=4\<\<25\|$ALWAYS

let LOADEM=4\<\<25\|$LOAD\|$ALWAYS

# [stack] [comment] --- [thing] #

# an assembly address label is a shell variable
# you set to $here, the assembly cursor, AKA . or HERE

# label=$HERE # this works and is clear. Finito.


arm () { # shasm has rpn too. Bad idea?
for t in $*
do
case $t in

## opcodes are just numbers. Modifiers are actions. Numbers are usually
### all-caps

noflags)
let s[$sp]=${s[$sp]}~$b20
;;


resolve) # address --- (address-HERE-8)>>2
let l=$here+8
let t=${s[$sp]}-$l
let t=$t\>\>2
let s[$sp]=$t\&0xffffff # mask to 24 bits
# AFTER the shift for a
# signed 24 bitter
;;

# named shifts, but not the OR

Rs|shiftamountreg) # shift amount register?
let s[$sp]=${s[$sp]}\<\<8
;;


Rm)
let foo=didaRm # Rm is an OR
;;

source|Rn|hi)
let s[$sp]=${s[$sp]}\<\<16
;;

dest|Rd|low)
let s[$sp]=${s[$sp]}\<\<12
;;

shiftamount)
let s[$sp]=${s[$sp]}\<\<7
;;

shifttype)
let s[$sp]=${s[$sp]}\<\<5
;;

rotate)
let s[$sp]=${s[$sp]}\<\<8
;;

s.)
s.
;;


# reuse shasm rpn stuff
bla)
echo "Yeah, me too."
;;

p*)
printf %x" " ${s[$sp]} # Bash and ksh
drop
;;
+)
drop
let s[$sp]=${s[$sp]}+${s[$sp+1]}
;;
-)
drop
let s[$sp]=${s[$sp]}-${s[$sp+1]}
;;
%)
drop
let s[$sp]=${s[$sp]}%${s[$sp+1]}
;;
eor|XOR|^)
drop
let s[$sp]=${s[$sp]}^${s[$sp+1]}
;;

/)
drop
let s[$sp]=${s[$sp]}/${s[$sp+1]}
;;

OR|or)
drop
let s[$sp]=${s[$sp]}\|${s[$sp+1]}
;;

OR3)
drop
let s[$sp]=${s[$sp]}\|${s[$sp+1]}
drop
let s[$sp]=${s[$sp]}\|${s[$sp+1]}
;;

AND|and)
drop
let s[$sp]=${s[$sp]}\&${s[$sp+1]}
;;

ups*)
drop
let s[$sp]=${s[$sp]}\<\<${s[$sp+1]}
;;

NOT|not)
let s[$sp]=~${s[$sp]}
;;

downs*)
drop;
let s[$sp]=${s[$sp]}\>\>${s[$sp+1]}
;;

drop)
drop
;;

swap)
let foo=${s[$sp]}
let s[$sp]=${s[$sp-1]}
let s[$sp-1]=$foo
;;


AD|ad)
LEED
;;

L)
LEEQ >> output
;;

bp)
bp
;;


*)
push $t
;;
esac
done
}


bigpic () {
echo "3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0"
echo "1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0"
echo " COND | meta| OPCODE|F| Rn | Rd | shifter operands"
}



bp () (
pic=""
let i=32
let munch=${s[$sp]}
while test $i -ne 0
do

let bit=$munch\&1
pic=$bit" "$pic
let i-=1
let munch=$munch\>\>1
done
printf %x"\n" ${s[$sp]}
bigpic
echo $pic
) # local scope

# third file, program
# shasARM program to just use a branch. Resolved perhaps.


#. ../../shasm not the paths you use eh?
#. ./shas
## branch, do an endless loop of nothing.


here=0

ELF
foo=$here
echo $foo
arm 0xe3a07001 L \
0xe3a00000 L \
$B $ALWAYS or $foo resolve or L \
$SWI L

I probably pasted this into a browser window, so
watch the EOLS and \s.
0 new messages