Some have expressed skepticism regarding the goal of writing an emulator and even hinted that a disassembler may be a large task.
To counter that, I'll post my current disassembler code. It is really dead simple. There are a handful of known minor issues with it, all of them easy to fix.
At some point, I'll probably want a more advanced disassembler that handles labels and can infer which parts of a binary blob might be code and which parts probably aren't. That's not a problem as I've done that before for the VAX architecture.
/* S/360 POP defines RR/RS/RX/SI/SS but it doesn't properly define the
subformats.
The number of formats (and subformats) exploded as the architecture matured.
These names are chosen to be as stable (forward-compatible) as possible with
that format zoo.
Letters are used where POP uses explicit subformats, numbers are used where
they are implicit. The first subtype is the most common subtype so it gets
a simplified name.
*/
#define RR 1 /* R1/R2 */
#define RR_2 2 /* M1/R2 */
#define RR_3 3 /* R1/-- */
#define RS 4 /* R1/R3, B2/D2 */
#define RS_2 5 /* R1/--, B2/D2 */
#define RX 6 /* R1/X2, B2/D2 */
#define RX_b 7 /* M1/X2, B2/D2 */
#define SI 8 /* I2, B1/D1 */
#define SI_2 9 /* --, B1/D1 */
#define SS 10 /* L, B1/D1, B2/D2 */
#define SS_b 11 /* L1/L2, B1/D1, B2/D2 */
/* S/360 is simple -- single-byte opcodes. Later architectures use 16-bit opcodes
This is taken from Appendix G of S/360 POP.
*/
static const char iname[256][5] = {
/* page 162-163 */
[0x5A] = "A", [0x6A] = "AD", [0x2A] = "ADR", [0x7A] = "AE",
[0x3A] = "AER", [0x4A] = "AH", [0x5E] = "AL", [0x1E] = "ALR",
[0xFA] = "AP", [0x1A] = "AR", [0x7E] = "AU", [0x3E] = "AUR",
[0x6E] = "AW", [0x2E] = "AWR", [0x45] = "BAL", [0x05] = "BALR",
[0x47] = "BC", [0x07] = "BCR", [0x46] = "BCT", [0x06] = "BCTR",
[0x86] = "BXH", [0x87] = "BXLE",[0x59] = "C", [0x69] = "CD",
[0x29] = "CDR", [0x79] = "CE", [0x39] = "CER", [0x49] = "CH",
[0x55] = "CL", [0xD5] = "CLC", [0x95] = "CLI", [0x15] = "CLR",
[0xF9] = "CP", [0x19] = "CR", [0x4F] = "CVB", [0x4E] = "CVD",
[0x5D] = "D", [0x6D] = "DD", [0x2D] = "DDR", [0x7D] = "DE",
[0x3D] = "DER", [0xFD] = "DP", [0x1D] = "DR", [0xDE] = "ED",
/* page 164-165 */
[0xDF] = "EDMK",[0x44] = "EX", [0x24] = "HDR", [0x34] = "HER",
[0x9E] = "HIO", [0x43] = "IC", [0x09] = "ISK", [0x58] = "L",
[0x41] = "LA", [0x23] = "LCDR",[0x33] = "LCER",[0x13] = "LCR",
[0x68] = "LD", [0x28] = "LDR", [0x78] = "LE", [0x38] = "LER",
[0x48] = "LH", [0x98] = "LM", [0x21] = "LNDR",[0x31] = "LNER",
[0x11] = "LNR", [0x20] = "LPDR",[0x30] = "LPER",[0x10] = "LPR",
[0x82] = "LPSW",[0x18] = "LR", [0x22] = "LTDR",[0x32] = "LTER",
[0x12] = "LTR", [0x5C] = "M", [0x6C] = "MD", [0x2C] = "MDR",
[0x7C] = "ME", [0x3C] = "MER", [0x4C] = "MH", [0xFC] = "MP",
[0x1C] = "MR", [0xD2] = "MVC", [0x92] = "MVI", [0xD1] = "MVN",
[0xF1] = "MVO", [0xD3] = "MVZ", [0x54] = "N", [0xD4] = "NC",
/* page 166-167 */
[0x94] = "NI", [0x14] = "NR", [0x56] = "O", [0xD6] = "OC",
[0x96] = "OI", [0x16] = "OR", [0xF2] = "PACK",[0x85] = "RDD",
[0x5B] = "S", [0x6B] = "SD", [0x2B] = "SDR", [0x7B] = "SE",
[0x3B] = "SER", [0x4B] = "SH", [0x9C] = "SIO", [0x5F] = "SL",
[0x8B] = "SLA", [0x8F] = "SLDA",[0x8D] = "SLDL",[0x89] = "SLL",
[0x1F] = "SLR", [0xFB] = "SP", [0x04] = "SPM", [0x1B] = "SR",
[0x8A] = "SRA", [0x8E] = "SRDA",[0x8C] = "SRDL",[0x88] = "SRL",
[0x08] = "SSK", [0x80] = "SSM", [0x50] = "ST", [0x42] = "STC",
[0x60] = "STD", [0x70] = "STE", [0x40] = "STH", [0x90] = "STM",
[0x7F] = "SU", [0x3F] = "SUR", [0x0A] = "SVC", [0x6F] = "SW",
[0x2F] = "SWR", [0x9F] = "TCH", [0x9D] = "TIO", [0x91] = "TM",
/* page 168 */
[0xDC] = "TR", [0xDD] = "TRT", [0xF3] = "UNPK",[0x84] = "WRD",
[0x57] = "X", [0xD7] = "XC", [0x97] = "XI", [0x17] = "XR",
[0xF8] = "ZAP",
};
/* taken from Appendix G of POP */
static const uint8_t iformat[256] = {
/* page 162-163 */
[0x5A] = RX, [0x6A] = RX, [0x2A] = RR, [0x7A] = RX,
[0x3A] = RR, [0x4A] = RX, [0x5E] = RX, [0x1E] = RR,
[0xFA] = SS_b, [0x1A] = RR, [0x7E] = RX, [0x3E] = RR,
[0x6E] = RX, [0x2E] = RR, [0x45] = RX, [0x05] = RR,
[0x47] = RX_b, [0x07] = RR_2, [0x46] = RX, [0x06] = RR,
[0x86] = RS, [0x87] = RS, [0x59] = RX, [0x69] = RX,
[0x29] = RR, [0x79] = RX, [0x39] = RR, [0x49] = RX,
[0x55] = RX, [0xD5] = SS, [0x95] = SI, [0x15] = RR,
[0xF9] = SS, [0x19] = RR, [0x4F] = RX, [0x4E] = RX,
[0x5D] = RX, [0x6D] = RX, [0x2D] = RR, [0x7D] = RX,
[0x3D] = RR, [0xFD] = SS, [0x1D] = RR, [0xDE] = SS,
/* page 164-165 */
[0xDF] = SS, [0x44] = RX, [0x24] = RR, [0x34] = RR,
[0x9E] = SI_2, [0x43] = RX, [0x09] = RR, [0x58] = RX,
[0x41] = RX, [0x23] = RR, [0x33] = RR, [0x13] = RR,
[0x68] = RX, [0x28] = RR, [0x78] = RX, [0x38] = RR,
[0x48] = RX, [0x98] = RS, [0x21] = RR, [0x31] = RR,
[0x11] = RR, [0x20] = RR, [0x30] = RR, [0x10] = RR,
[0x82] = SI, [0x18] = RR, [0x22] = RR, [0x32] = RR,
[0x12] = RR, [0x5C] = RX, [0x6C] = RX, [0x2C] = RR,
[0x7C] = RX, [0x3C] = RR, [0x4C] = RX, [0xFC] = SS,
[0x1C] = RR, [0xD2] = SS, [0x92] = SI, [0xD1] = SS,
[0xF1] = SS, [0xD3] = SS, [0x54] = RX, [0xD4] = SS,
/* page 166-167 */
[0x94] = SI, [0x14] = RR, [0x56] = RX, [0xD6] = SS,
[0x96] = SI, [0x16] = RR, [0xF2] = SS_b, [0x85] = SI,
[0x5B] = RX, [0x6B] = RX, [0x2B] = RR, [0x7B] = RX,
[0x3B] = RR, [0x4B] = RX, [0x9C] = SI_2, [0x5F] = RX,
[0x8B] = RS_2, [0x8F] = RS_2, [0x8D] = RS_2, [0x89] = RS_2,
[0x1F] = RR, [0xFB] = SS_b, [0x04] = RR_3, [0x1B] = RR,
[0x8A] = RS_2, [0x8E] = RS_2, [0x8C] = RS_2, [0x88] = RS_2,
[0x08] = RR, [0x80] = SI_2, [0x50] = RX, [0x42] = RX,
[0x60] = RX, [0x70] = RX, [0x40] = RX, [0x90] = RS,
[0x7F] = RX, [0x3F] = RR, [0x0A] = RR, [0x6F] = RX,
[0x2F] = RR, [0x9F] = SI_2, [0x9D] = SI_2, [0x91] = SI,
/* page 168 */
[0xDC] = SS, [0xDD] = SS, [0xF3] = SS_b, [0x84] = SI,
[0x57] = RX, [0xD7] = SS, [0x97] = SI, [0x17] = RR,
[0xF8] = SS_b,
};
/* FIXME routine for printing basedisp and baseidxdisp */
/* returns length of instruction -- illegal instructions are written as DB $XX, $XX
and the length is returned as 2.
*/
int disasm_instr(uint32_t addr, uint8_t instr[6])
{
if (!iformat[instr[0]]) {
printf("%04X_%04X\tdb\t$%02X, $%02X\t; illegal instruction\n",
addr >> 16, addr & 0xFFFF,
instr[0], instr[1]);
return 2;
}
/* special casing: NOP, NOPR + B */
/* special casing: R1 has to be even in some instructions, note in comments? */
/* special casing: cond branch, write BH/BH/BP/BL/... in comments */
/* mnemonic */
printf("%04X_%04X\t%s\t",
addr >> 16, addr & 0xFFFF,
iname[instr[0]]);
/* operands -- depends on instruction format */
switch (iformat[instr[0]]) {
case RR: printf("R%d, R%d", instr[1] >> 4, instr[1] & 0xF); break;
case RR_2: printf("%d, R%d", instr[1] >> 4, instr[1] & 0xF); break;
case RR_3: printf("R%d", instr[1] >> 4); break;
case RS: printf("R%d, R%d, B%d, %d",
instr[1] >> 4, instr[1] & 0xF,
instr[2] >> 4, ((instr[2] & 0xF) << 8) | instr[3]);
break;
case RS_2: printf("R%d, B%d, %d",
instr[1] >> 4,
instr[2] >> 4, ((instr[2] & 0xF) << 8) | instr[3]);
break;
case RX: printf("R%d, X%d, B%d, %d",
instr[1] >> 4, instr[1] & 0xF,
instr[2] >> 4, ((instr[2] & 0xF) << 8) | instr[3]);
break;
case RX_b: printf("%d, X%d, B%d, %d",
instr[1] >> 4,
instr[1] & 0xF, instr[2] >> 4, ((instr[2] & 0xF) << 8) | instr[3]);
break;
case SI: printf("%d, B%d, %d",
instr[1],
instr[2] >> 4, ((instr[2] & 0xF) << 8) | instr[3]);
break;
case SI_2: printf("B%d, %d",
instr[2] >> 4, ((instr[2] & 0xF) << 8) | instr[3]);
break;
/* FIXME len8 and len4 fields are 1 off */
case SS: printf("%d, B%d, %d, B%d, %d",
instr[1],
instr[2] >> 4, ((instr[2] & 0xF) << 8) | instr[3],
instr[4] >> 4, ((instr[4] & 0xF) << 8) | instr[5]);
break;
case SS_b: printf("%d, %d, B%d, %d, B%d, %d",
instr[1] >> 4, instr[1] & 0xF,
instr[2] >> 4, ((instr[2] & 0xF) << 8) | instr[3],
instr[4] >> 4, ((instr[4] & 0xF) << 8) | instr[5]);
break;
}
/* done */
printf("\n");
/* instruction length -- depends solely on top 2 bits of byte 0 */
switch (instr[0] >> 6) {
case 0: return 2;
case 1:
case 2: return 4;
default: /* make stoopid compiler not warn */
case 3: return 6;
}
}
void disasm_blob(uint32_t addr, size_t blob_size, uint8_t blob[blob_size])
{
size_t idx = 0;
/* FIXME -- handle last 2-4 bytes */
while (blob_size >= 6) {
int sze;
uint8_t instr[6];
/* gather instr bytes */
for (unsigned i=0; i < 6; i++)
instr[i] = blob[idx+i];
/* disasm */
sze = disasm_instr(addr, instr);
/* next */
blob_size -= sze;
idx += sze;
addr += sze;
}
}