S/360 disassembler -- not that hard

20 views
Skip to first unread message

Peter Lund

unread,
Dec 19, 2020, 6:51:39 AM12/19/20
to z390
I have received nice (and often helpful) responses from no less than 7 people!
It turns out this forum is much more active than it looks :)

Some have expressed skepticism regarding the goal of writing an emulator and even hinted that a disassembler may be a large task.

To counter that, I'll post my current disassembler code.  It is really dead simple.  There are a handful of known minor issues with it, all of them easy to fix.

At some point, I'll probably want a more advanced disassembler that handles labels and can infer which parts of a binary blob might be code and which parts probably aren't.  That's not a problem as I've done that before for the VAX architecture.

-Peter

/* S/360 POP defines RR/RS/RX/SI/SS but it doesn't properly define the
   subformats.
   The number of formats (and subformats) exploded as the architecture matured.
   These names are chosen to be as stable (forward-compatible) as possible with
   that format zoo.

   Letters are used where POP uses explicit subformats, numbers are used where
   they are implicit.  The first subtype is the most common subtype so it gets
   a simplified name.
 */
#define RR        1     /* R1/R2                */
#define RR_2      2     /* M1/R2                */
#define RR_3      3     /* R1/--                */

#define RS        4     /* R1/R3, B2/D2         */
#define RS_2      5     /* R1/--, B2/D2         */

#define RX        6     /* R1/X2, B2/D2         */
#define RX_b      7     /* M1/X2, B2/D2         */

#define SI        8     /* I2, B1/D1            */
#define SI_2      9     /* --, B1/D1            */

#define SS       10     /* L, B1/D1, B2/D2      */
#define SS_b     11     /* L1/L2, B1/D1, B2/D2  */


/* S/360 is simple -- single-byte opcodes.  Later architectures use 16-bit opcodes
   This is taken from Appendix G of S/360 POP.
 */
static const char iname[256][5] = {
        /* page 162-163 */
        [0x5A] = "A",   [0x6A] = "AD",  [0x2A] = "ADR", [0x7A] = "AE",
        [0x3A] = "AER", [0x4A] = "AH",  [0x5E] = "AL",  [0x1E] = "ALR",
        [0xFA] = "AP",  [0x1A] = "AR",  [0x7E] = "AU",  [0x3E] = "AUR",
        [0x6E] = "AW",  [0x2E] = "AWR", [0x45] = "BAL", [0x05] = "BALR",
        [0x47] = "BC",  [0x07] = "BCR", [0x46] = "BCT", [0x06] = "BCTR",
        [0x86] = "BXH", [0x87] = "BXLE",[0x59] = "C",   [0x69] = "CD",
        [0x29] = "CDR", [0x79] = "CE",  [0x39] = "CER", [0x49] = "CH",
        [0x55] = "CL",  [0xD5] = "CLC", [0x95] = "CLI", [0x15] = "CLR",
        [0xF9] = "CP",  [0x19] = "CR",  [0x4F] = "CVB", [0x4E] = "CVD",
        [0x5D] = "D",   [0x6D] = "DD",  [0x2D] = "DDR", [0x7D] = "DE",
        [0x3D] = "DER", [0xFD] = "DP",  [0x1D] = "DR",  [0xDE] = "ED",

        /* page 164-165 */
        [0xDF] = "EDMK",[0x44] = "EX",  [0x24] = "HDR", [0x34] = "HER",
        [0x9E] = "HIO", [0x43] = "IC",  [0x09] = "ISK", [0x58] = "L",
        [0x41] = "LA",  [0x23] = "LCDR",[0x33] = "LCER",[0x13] = "LCR",
        [0x68] = "LD",  [0x28] = "LDR", [0x78] = "LE",  [0x38] = "LER",
        [0x48] = "LH",  [0x98] = "LM",  [0x21] = "LNDR",[0x31] = "LNER",
        [0x11] = "LNR", [0x20] = "LPDR",[0x30] = "LPER",[0x10] = "LPR",
        [0x82] = "LPSW",[0x18] = "LR",  [0x22] = "LTDR",[0x32] = "LTER",
        [0x12] = "LTR", [0x5C] = "M",   [0x6C] = "MD",  [0x2C] = "MDR",
        [0x7C] = "ME",  [0x3C] = "MER", [0x4C] = "MH",  [0xFC] = "MP",
        [0x1C] = "MR",  [0xD2] = "MVC", [0x92] = "MVI", [0xD1] = "MVN",
        [0xF1] = "MVO", [0xD3] = "MVZ", [0x54] = "N",   [0xD4] = "NC",

        /* page 166-167 */
        [0x94] = "NI",  [0x14] = "NR",  [0x56] = "O",   [0xD6] = "OC",
        [0x96] = "OI",  [0x16] = "OR",  [0xF2] = "PACK",[0x85] = "RDD",
        [0x5B] = "S",   [0x6B] = "SD",  [0x2B] = "SDR", [0x7B] = "SE",
        [0x3B] = "SER", [0x4B] = "SH",  [0x9C] = "SIO", [0x5F] = "SL",
        [0x8B] = "SLA", [0x8F] = "SLDA",[0x8D] = "SLDL",[0x89] = "SLL",
        [0x1F] = "SLR", [0xFB] = "SP",  [0x04] = "SPM", [0x1B] = "SR",
        [0x8A] = "SRA", [0x8E] = "SRDA",[0x8C] = "SRDL",[0x88] = "SRL",
        [0x08] = "SSK", [0x80] = "SSM", [0x50] = "ST",  [0x42] = "STC",
        [0x60] = "STD", [0x70] = "STE", [0x40] = "STH", [0x90] = "STM",
        [0x7F] = "SU",  [0x3F] = "SUR", [0x0A] = "SVC", [0x6F] = "SW",
        [0x2F] = "SWR", [0x9F] = "TCH", [0x9D] = "TIO", [0x91] = "TM",

        /* page 168 */
        [0xDC] = "TR",  [0xDD] = "TRT", [0xF3] = "UNPK",[0x84] = "WRD",
        [0x57] = "X",   [0xD7] = "XC",  [0x97] = "XI",  [0x17] = "XR",
        [0xF8] = "ZAP",
};

/* taken from Appendix G of POP */
static const uint8_t iformat[256] = {
        /* page 162-163 */
        [0x5A] = RX,    [0x6A] = RX,    [0x2A] = RR,    [0x7A] = RX,
        [0x3A] = RR,    [0x4A] = RX,    [0x5E] = RX,    [0x1E] = RR,
        [0xFA] = SS_b,  [0x1A] = RR,    [0x7E] = RX,    [0x3E] = RR,
        [0x6E] = RX,    [0x2E] = RR,    [0x45] = RX,    [0x05] = RR,
        [0x47] = RX_b,  [0x07] = RR_2,  [0x46] = RX,    [0x06] = RR,
        [0x86] = RS,    [0x87] = RS,    [0x59] = RX,    [0x69] = RX,
        [0x29] = RR,    [0x79] = RX,    [0x39] = RR,    [0x49] = RX,
        [0x55] = RX,    [0xD5] = SS,    [0x95] = SI,    [0x15] = RR,
        [0xF9] = SS,    [0x19] = RR,    [0x4F] = RX,    [0x4E] = RX,
        [0x5D] = RX,    [0x6D] = RX,    [0x2D] = RR,    [0x7D] = RX,
        [0x3D] = RR,    [0xFD] = SS,    [0x1D] = RR,    [0xDE] = SS,

        /* page 164-165 */
        [0xDF] = SS,    [0x44] = RX,    [0x24] = RR,    [0x34] = RR,
        [0x9E] = SI_2,  [0x43] = RX,    [0x09] = RR,    [0x58] = RX,
        [0x41] = RX,    [0x23] = RR,    [0x33] = RR,    [0x13] = RR,
        [0x68] = RX,    [0x28] = RR,    [0x78] = RX,    [0x38] = RR,
        [0x48] = RX,    [0x98] = RS,    [0x21] = RR,    [0x31] = RR,
        [0x11] = RR,    [0x20] = RR,    [0x30] = RR,    [0x10] = RR,
        [0x82] = SI,    [0x18] = RR,    [0x22] = RR,    [0x32] = RR,
        [0x12] = RR,    [0x5C] = RX,    [0x6C] = RX,    [0x2C] = RR,
        [0x7C] = RX,    [0x3C] = RR,    [0x4C] = RX,    [0xFC] = SS,
        [0x1C] = RR,    [0xD2] = SS,    [0x92] = SI,    [0xD1] = SS,
        [0xF1] = SS,    [0xD3] = SS,    [0x54] = RX,    [0xD4] = SS,

        /* page 166-167 */
        [0x94] = SI,    [0x14] = RR,    [0x56] = RX,    [0xD6] = SS,
        [0x96] = SI,    [0x16] = RR,    [0xF2] = SS_b,  [0x85] = SI,
        [0x5B] = RX,    [0x6B] = RX,    [0x2B] = RR,    [0x7B] = RX,
        [0x3B] = RR,    [0x4B] = RX,    [0x9C] = SI_2,  [0x5F] = RX,
        [0x8B] = RS_2,  [0x8F] = RS_2,  [0x8D] = RS_2,  [0x89] = RS_2,
        [0x1F] = RR,    [0xFB] = SS_b,  [0x04] = RR_3,  [0x1B] = RR,
        [0x8A] = RS_2,  [0x8E] = RS_2,  [0x8C] = RS_2,  [0x88] = RS_2,
        [0x08] = RR,    [0x80] = SI_2,  [0x50] = RX,    [0x42] = RX,
        [0x60] = RX,    [0x70] = RX,    [0x40] = RX,    [0x90] = RS,
        [0x7F] = RX,    [0x3F] = RR,    [0x0A] = RR,    [0x6F] = RX,
        [0x2F] = RR,    [0x9F] = SI_2,  [0x9D] = SI_2,  [0x91] = SI,

        /* page 168 */
        [0xDC] = SS,    [0xDD] = SS,    [0xF3] = SS_b,  [0x84] = SI,
        [0x57] = RX,    [0xD7] = SS,    [0x97] = SI,    [0x17] = RR,
        [0xF8] = SS_b,
};


/* FIXME routine for printing basedisp and baseidxdisp */

/* returns length of instruction -- illegal instructions are written as DB $XX, $XX
   and the length is returned as 2.
 */
int disasm_instr(uint32_t addr, uint8_t instr[6])
{
        if (!iformat[instr[0]]) {
                printf("%04X_%04X\tdb\t$%02X, $%02X\t; illegal instruction\n",
                        addr >> 16, addr & 0xFFFF,
                        instr[0], instr[1]);
                return 2;
        }

        /* special casing:  NOP, NOPR + B */
        /* special casing:  R1 has to be even in some instructions, note in comments? */
        /* special casing:  cond branch, write BH/BH/BP/BL/... in comments */

        /* mnemonic */
        printf("%04X_%04X\t%s\t",
                addr >> 16, addr & 0xFFFF,
                iname[instr[0]]);

        /* operands -- depends on instruction format */
        switch (iformat[instr[0]]) {
        case RR:        printf("R%d, R%d", instr[1] >> 4, instr[1] & 0xF); break;
        case RR_2:      printf("%d, R%d",  instr[1] >> 4, instr[1] & 0xF); break;
        case RR_3:      printf("R%d", instr[1] >> 4); break;
        case RS:        printf("R%d, R%d, B%d, %d",
                                instr[1] >> 4, instr[1] & 0xF,
                                instr[2] >> 4, ((instr[2] & 0xF) << 8) | instr[3]);
                        break;
        case RS_2:      printf("R%d, B%d, %d",
                                instr[1] >> 4,
                                instr[2] >> 4, ((instr[2] & 0xF) << 8) | instr[3]);
                        break;
        case RX:        printf("R%d, X%d, B%d, %d",
                                instr[1] >> 4, instr[1] & 0xF,
                                instr[2] >> 4, ((instr[2] & 0xF) << 8) | instr[3]);
                        break;
        case RX_b:      printf("%d, X%d, B%d, %d",
                                instr[1] >> 4,
                                instr[1] & 0xF, instr[2] >> 4, ((instr[2] & 0xF) << 8) | instr[3]);
                        break;
        case SI:        printf("%d, B%d, %d",
                                instr[1],
                                instr[2] >> 4, ((instr[2] & 0xF) << 8) | instr[3]);
                        break;
        case SI_2:      printf("B%d, %d",
                                instr[2] >> 4, ((instr[2] & 0xF) << 8) | instr[3]);
                        break;

        /* FIXME len8 and len4 fields are 1 off */
        case SS:        printf("%d, B%d, %d, B%d, %d",
                                instr[1],
                                instr[2] >> 4, ((instr[2] & 0xF) << 8) | instr[3],
                                instr[4] >> 4, ((instr[4] & 0xF) << 8) | instr[5]);
                        break;
        case SS_b:      printf("%d, %d, B%d, %d, B%d, %d",
                                instr[1] >> 4, instr[1] & 0xF,
                                instr[2] >> 4, ((instr[2] & 0xF) << 8) | instr[3],
                                instr[4] >> 4, ((instr[4] & 0xF) << 8) | instr[5]);
                        break;
        }

        /* done */
        printf("\n");

        /* instruction length -- depends solely on top 2 bits of byte 0 */
        switch (instr[0] >> 6) {
        case 0: return 2;
        case 1:
        case 2: return 4;
        default:        /* make stoopid compiler not warn */
        case 3: return 6;
        }
}


void disasm_blob(uint32_t addr, size_t blob_size, uint8_t blob[blob_size])
{
        size_t          idx = 0;

        /* FIXME -- handle last 2-4 bytes */
        while (blob_size >= 6) {
                int     sze;
                uint8_t instr[6];

                /* gather instr bytes */
                for (unsigned i=0; i < 6; i++)
                        instr[i] = blob[idx+i];

                /* disasm */
                sze = disasm_instr(addr, instr);

                /* next */
                blob_size -= sze;
                idx += sze;
                addr += sze;
        }
}


dave....@gmail.com

unread,
Dec 19, 2020, 7:10:11 AM12/19/20
to Peter Lund, z390

Peter,

I would say the 360 CPU is straight forward to emulate. What might be more challenging is the IO. The Hercules approach is to basically treat each device as if its on its own channel, so it never returns channel busy.

You might find this useful:-

 

https://github.com/s390guy/SATK

 

Dave

 

--
You received this message because you are subscribed to the Google Groups "z390" group.
To unsubscribe from this group and stop receiving emails from it, send an email to z390+uns...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/z390/f74a06d1-21b9-43a8-bab5-12bd5bcfc98bn%40googlegroups.com.

Peter Lund

unread,
Dec 19, 2020, 7:24:48 AM12/19/20
to dave....@gmail.com, z390
On Sat, Dec 19, 2020 at 1:10 PM <dave....@gmail.com> wrote:

Peter,

I would say the 360 CPU is straight forward to emulate.


Yep, that is my impression, too.  And much easier, in many respects, than a Z80 or a 6502.  They both had undocumented instructions and the Z80 had 2 undocumented flags.  To make it even more fun, it has a single instruction that lets 2 bits of an undocumented internal register bleed through to the those flags.  And in order to be truly useful in a typical machine emulator, the CPU emulator would have to be cycle accurate and generate wait states correctly and what not.  Typical emulated machines also had fun things like bus contention leading to slightly slower execution for some addresses (at some periods, relating to the TV output) or bytes being misread from the data bus.  Or they might have had a CPU and certain peripherals operating at different frequencies with all the fun that entails.

The S/360 is much cleaner, because I don't have to implement a specific model with all its quirks.  I can implement it in the pristine, idealized form.
 

What might be more challenging is the IO.


Yep.  But mostly because the entire concept is so alien, or at least is *presented* in such an alien way.  It really isn't all that different from the way a modern NIC, disk, or GPU works.
 

The Hercules approach is to basically treat each device as if its on its own channel, so it never returns channel busy.


And I'll likely do the same.  I want this emulator to be simple, so it won't emulate more hardware than strictly necessary.
 

You might find this useful:-

 

https://github.com/s390guy/SATK


Almost certainly, yes.  Thanks!

-Peter
Reply all
Reply to author
Forward
0 new messages