/* Copyright (c) Microsoft Corporation. All rights reserved. */
/*
 *    Disassembler for ARM
 *
 *      "ARM Architectural Reference Manual"
 *      Edited by Dave Jaggar.  Prentice Hall ed.
 *      ISBN 0-13-736299-4
 */
#include <windows.h>
#include "bo.h"
#include "arm_instruction.h"

/* Make it easy to change from decimal to hex
 * Me I like hex, but objdump uses decimal and I need to test
 */
#if 0
#define NUMFMT "0x%x"
#else
#define NUMFMT "%d"
#endif

/* All mnemonics are in here, similarly to make easy to change
 * Changing the rest of the syntax is.. not so easy.
 */
#define arm_bang_name "!"
static char *arm_swp_am[2] = { "", "b" };
static char *arm_dp_am[2] = { "", "s" };
static char *arm_ls_am[4] = { "", "t", "b", "bt" };
static char *arm_lsh_am[4] = { "", "h", "sb", "sh" };
static char *arm_lsm_am[4] = { "da", "ia", "db", "ib" };
static char *arm_msr_am[16] = {
/* 0 */ "", "c", "x", "xc", "s", "sc", "sx", "sxc",
/* 8 */ "f", "fc", "fx", "fxc", "fs", "fsc", "fsx", "fsxc"
};

#define arm_swi_name "swi"
#define arm_bx_name "bx"
#define arm_cdp_name "cdp"
#define arm_swp_name "swp"
static char *arm_bl_name[2] = { "b", "bl" };
static char *arm_ls_name[2] = { "str", "ldr" };
static char *arm_ls_shift[5] = { "lsl", "lsr", "asr", "ror", "rrx" };
static char *arm_lsm_name[2] = { "stm", "ldm" };
static char *arm_lsp_name[2] = { "stc", "ldc" };
static char *arm_mc_name[2] = { "mcr", "mrc" };
static char *arm_dp_name[16] = { 
/* 0 */ "and", "eor", "sub", "rsb",
/* 4 */ "add", "adc", "sbc", "rsc",
/* 8 */ "tst", "teq", "cmp", "cmn",
/* 12*/ "orr", "mov", "bic", "mvn"
};
static const char arm_dp_flags[16] = { 
#define ARM_RD 0x1
#define ARM_RN 0x2
/* 0 */ ARM_RD|ARM_RN, ARM_RD|ARM_RN, ARM_RD|ARM_RN, ARM_RD|ARM_RN,
/* 4 */ ARM_RD|ARM_RN, ARM_RD|ARM_RN, ARM_RD|ARM_RN, ARM_RD|ARM_RN,
/* 8 */ ARM_RN,        ARM_RN,        ARM_RN,        ARM_RN,
/* 12*/ ARM_RD|ARM_RN, ARM_RD,        ARM_RD|ARM_RN, ARM_RD
};
static char *arm_msr_name[2] = { "mrs", "msr" };
static char *arm_psr_name[2] = { "cpsr", "spsr" };
static char *arm_mul_name[8] = {
/* 0 */ "mul", "mla", "", "",
/* 4 */ "umull", "umlal", "smull", "smlal"
};

static char *arm_cond_name[16] = {
/* 0 */ "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
/* 8 */ "hi", "ls", "ge", "lt", "gt", "le", "", "never"/*BUGBUG*/
};

static char *arm_sbregister_name[16] = {
        "r0",   "r1",   "r2",   "r3",   "r4",   "r5",   "r6",   "r7",
        "r8",   "r9",   "r10",  "fp",   "ip",   "sp",   "lr",   "pc"
};

UINT32
ror(UINT32 value, UINT amount)
{
    for (;amount--;)
        value = (value >> 1) | ((value & 1) << 31);
    return value;
}

static int regcount;            /* how many regs used in this inst */
static int regnum[16];          /* which regs used in this inst */

static char *
arm_register_name (ireg)
{
    int i;

    for (i = 0; i < regcount; i++)
        if (regnum[i] == ireg)
            break;
    if (i >= regcount)
        regnum[regcount++] = ireg;
    return (arm_sbregister_name[ireg]);
}

static void
arm_reglist(unsigned short list)
{
    int i;
    int more = 0;
    printf("{");
    for (i = 0; i < 16; i++)
        if (list & (1 << i)) {
            printf("%s%s",
                   (more++) ? ", " : "",
                   arm_register_name(i));
        } 
    printf("}");
}

static void
arm_shifter(arm_instruction i)
{
    /* its always shifted but we'll pretend */
    if (i.i_format.shifter.ns.shift != 0) {
        if ((i.i_format.shifter.ls.shiftmode == ARM_SHIFT_RRX) &&
            (i.i_format.shifter.ls.shiftval == 0)) {
            /* nothing, RRX */
            printf(", %s",
                   arm_ls_shift[4]);
        } else {
            printf(", %s #"NUMFMT,
                   arm_ls_shift[i.i_format.shifter.ls.shiftmode],
                   i.i_format.shifter.ls.shiftval);
        }
    }
}

static void
arm_12_immediate(
                unsigned iadr,
                arm_instruction i,
                int runtaddr)
{
    /* BUGBUG relocs ? */
    if (i.i_format.shifter.uu.u_val != 0)
        printf(", #%c"NUMFMT,
               (i.a_format.b23) ? '+' : '-',
               i.i_format.shifter.uu.u_val);
}

static void
arm_dps(
        arm_instruction i)
{
    printf("%s%s", arm_dp_name[i.i_format.opcode],
           arm_cond_name[i.i_format.cond]);
    /* tst..cmn dont print the mandatory "S" */
    if ((i.i_format.opcode < 8) || (i.i_format.opcode > 11))
        printf("%s", arm_dp_am[i.i_format.s_l]);
    if (arm_dp_flags[i.i_format.opcode] & ARM_RD)
        printf(" %s", arm_register_name(i.i_format.shifter.ss.rd));
    if (arm_dp_flags[i.i_format.opcode] & ARM_RN) {
        if (arm_dp_flags[i.i_format.opcode] & ARM_RD)
            printf(",");
        printf(" %s",
               arm_register_name(i.i_format.rn));
    }
}

/* Disassemble instruction I, located at IADR.
 * RUNTADDR is normally tha same, it is used to
 * disassemble from object files [in which case
 * IADR is the file offset and RUNTADDR is the
 * address at which the code has been relocated]
 */
void
ArmDisasm(
       unsigned iadr,
       void *pinst,
       int runtaddr)
{
    arm_instruction i = *((arm_instruction*)pinst);
    int signed_immediate;
    unsigned unsigned_immediate;

    regcount = 0;

    /* The encoding of ARM instructions is fairly irregular.
     * I do admit not spending much time on it, esp not looking
     * carefully at section 3.14 pag 3-22 of the Jaggar book.
     * But even that has a lot of ifs and buts, so.
     * The result is not only largish, but with a few repeats
     * of code in some places with only minor variations.
     * Oh well.
     */

    /* Start partitioning off the top 3 bits of the opcode
     * All but the first case then are fairly unambiguous.
     */
    switch (i.a_format.class) {

    case 0:
        /* Messy case, use bit 4 for starters
         */
        if (i.a_format.b4 == 0) {
            /* Ambiguity resolved here cuz CMP/CMN/TST/TEQ always S=1
             */
            if ((i.g_format.opcode & 0xf9) != 0x10) {
                /* data processing immediate shift
                 * pagg 88,89,91,93,95,97
                 */
                arm_dps(i);
                printf(", %s", arm_register_name(i.a_format.rm));
                arm_shifter(i);
                break;

            } else if (i.a_format.b21 == 0) {
                /* move from Status register
                 */
                printf("%s%s %s, %s",
                       arm_msr_name[i.a_format.b21],
                       arm_cond_name[i.i_format.cond],
                       arm_register_name(i.a_format.rd),
                       arm_psr_name[i.a_format.b22]);
                break;

            } else {
                /* move register to Status register
                 */
                printf("%s%s %s_%s, %s",
                       arm_msr_name[i.a_format.b21],
                       arm_cond_name[i.i_format.cond],
                       arm_psr_name[i.a_format.b22],
                       arm_msr_am[i.a_format.rn],
                       arm_register_name(i.a_format.rm));
                break;
            }
            
        } else {
            /* Still messy, start with bit 7
             */
            if (i.a_format.b7 == 0) {
                /* Ambiguity resolved here cuz CMP/CMN/TST/TEQ always S=1
                 * Also, other restrictions on BX
                 */
                if ((i.g_format.opcode == 0x12) &&
                    (i.g_format.op2 == 0x1)) {
                    /* branch/exchange instruction set
                     * NB b8..b19 should be 1s but we dont check
                     */
                    printf("%s%s  %s", arm_bx_name,
                           arm_cond_name[i.g_format.cond],
                           arm_register_name(i.g_format.rm));
                    break;

                } else {
                    /* data processing register shift
                     * pagg 90,92,94,96
                     */
                    arm_dps(i);
                    printf(", %s, %s %s",
                           arm_register_name(i.a_format.rm),
                           arm_ls_shift[i.i_format.shifter.ls.shiftmode],
                           arm_register_name(i.a_format.rs));
                    break;
                }

            } else {
                /* lshsb cannot have b5==b6==0
                 */
                if ((i.a_format.b5|i.a_format.b6)==0) {
                    /* Use bit 24 to resolve mul viz swp
                     */
                    if (i.a_format.b24) {
                        /* swp/swpb */
                        printf("%s%s%s %s,%s,[%s]", arm_swp_name,
                               arm_cond_name[i.g_format.cond],
                               arm_swp_am[i.a_format.b22],
                               arm_register_name(i.g_format.rd),
                               arm_register_name(i.g_format.rm),
                               arm_register_name(i.g_format.rn));

                    } else if (i.a_format.b23) {
                        /* mul long
                         * pagg 64,65,80,81
                         */
                        printf("%s%s%s %s, %s, %s, %s",
                               arm_mul_name[i.i_format.opcode & 0x7],
                               arm_cond_name[i.g_format.cond],
                               arm_dp_am[i.i_format.s_l],
                               arm_register_name(i.g_format.rd),
                               arm_register_name(i.g_format.rn),
                               arm_register_name(i.g_format.rm),
                               arm_register_name(i.g_format.rs));
                        break;

                    } else {
                        /* mul/mla
                         * pagg 52,58
                         */
                        printf("%s%s%s %s, %s, %s",
                               arm_mul_name[i.i_format.opcode & 0x7],
                               arm_cond_name[i.g_format.cond],
                               arm_dp_am[i.i_format.s_l],
                               arm_register_name(i.g_format.rn),
                               arm_register_name(i.g_format.rm),
                               arm_register_name(i.g_format.rs));
                        if (i.a_format.b21)
                            /* mla */
                            printf(", %s", arm_register_name(i.g_format.rd));
                        break;

                    }
                } else {
                    /* Use bit 22 to resolve the two lshsb classes
                     * First part is common
                     */
                    printf("%s%s%s %s, [%s",
                           arm_ls_name[i.a_format.b20],
                           arm_cond_name[i.j_format.cond],
                           arm_lsh_am[i.i_format.shifter.ls.shiftmode],
                           arm_register_name(i.g_format.rd),
                           arm_register_name(i.g_format.rn));

                    if (i.a_format.b22) {
                        /* lshsb immediate
                         * pagg 110,112,114
                         */
                        printf((i.a_format.b24) ?
                               ", #%c"NUMFMT"]" : "], #%c"NUMFMT,
                               (i.a_format.b23) ? '+' : '-',
                               (i.g_format.rs << 4) | (i.g_format.rm));

                    } else {
                        /* lshsb register
                         * pagg 111,113,115(broken!)
                         */
                        printf((i.a_format.b24) ?
                               ", %c%s]" : "], %c%s",
                               (i.a_format.b23) ? '+' : '-',
                               arm_register_name(i.a_format.rm));
                    }
                    /* Common for writeback
                     */
                    if (i.a_format.b21)
                        printf(arm_bang_name);
                    break;

                }
            }
        }
        break;

    case 1:
        /* Two (ambiguous) groups
         */
        if ((i.g_format.opcode & 0xfb) == 0x32) {
            /* move immediate to status register
             */
            printf("%s%s %s_%s, #"NUMFMT,
                   arm_msr_name[i.a_format.b21],
                   arm_cond_name[i.i_format.cond],
                   arm_psr_name[i.a_format.b22],
                   arm_msr_am[i.a_format.rn], /* overkill, can only be f */
                   ror(i.i_format.shifter.ss.immediate,
                       i.i_format.shifter.ss.rotate<<10));
            break;
        }

        /* data processing with immediate
         * pag 3-87
         */
        arm_dps(i);
        /* BUGBUG relocs ? */
        printf(", #"NUMFMT,
               ror(i.i_format.shifter.ss.immediate,
                   i.i_format.shifter.ss.rotate<<1));
        break;

    case 2:
        /* Load/store immediate offset
         */
        printf("%s%s%s %s,[%s", arm_ls_name[i.a_format.b20],
               arm_cond_name[i.j_format.cond],
               arm_ls_am[(i.a_format.b22<<1) | /* Byte */
                         /* T is a bit weird.. */
                         (i.a_format.b21 & ~i.a_format.b24)],
               arm_register_name(i.a_format.rd),
               arm_register_name(i.a_format.rn));
        if (i.a_format.b24 == 0) {
            /* pagg 106 */
            printf("]");
            arm_12_immediate(iadr,i,runtaddr);
        } else {
            /* pagg 100/103 */
            arm_12_immediate(iadr,i,runtaddr);
            printf("]%s",
                   (i.a_format.b21) ? arm_bang_name : "");
        }
        break;

    case 3:
        /* two groups
         */
        if (i.a_format.b4) {
            /* Undefined instructions
             */
            printf("undefined instruction");
            break;
        }

        /* Load/store register offset
         */
        printf("%s%s%s %s,[%s", arm_ls_name[i.a_format.b20],
               arm_cond_name[i.j_format.cond],
               arm_ls_am[(i.a_format.b22<<1) | /* Byte */
                         /* T is a bit weird.. */
                         (i.a_format.b21 & ~i.a_format.b24)],
               arm_register_name(i.a_format.rd),
               arm_register_name(i.a_format.rn));
        if (i.a_format.b24 == 0) {
            /* pagg 107,108 */
            printf("], %c%s",
                   (i.a_format.b23) ? '+' : '-',
                   arm_register_name(i.a_format.rm));
            arm_shifter(i);
        } else {
            printf(", %c%s",
                   (i.a_format.b23) ? '+' : '-',
                   arm_register_name(i.a_format.rm));
            arm_shifter(i);
            printf("]%s",
               (i.a_format.b21) ? arm_bang_name : "");
        }
        break;

    case 4:
        /* load/store multiple
         */
        printf("%s%s%s %s%s, ", arm_lsm_name[i.a_format.b20],
               arm_lsm_am[(i.a_format.b24<<1) | i.a_format.b23],
               arm_cond_name[i.j_format.cond],
               arm_register_name(i.a_format.rn),
               (i.a_format.b21) ? arm_bang_name : ""
               );
        arm_reglist((unsigned short)i.j_format.target);
        if (i.a_format.b22) printf("^");
        break;

    case 5:
        /* branch & link
         */
        printf("%s%s\t", arm_bl_name[i.a_format.b24],
               arm_cond_name[i.j_format.cond]);
        signed_immediate = i.j_format.target << 2;
#if 0
        printlabel((runtaddr+8) + signed_immediate,
                   (iadr+8) + signed_immediate);
#else
#if 0
        printsym((iadr+8) + signed_immediate,iadr,TRUE);
#else
        printsym((runtaddr+8) + signed_immediate,iadr,TRUE);
#endif
#endif
        break;

    case 6:
        /* coprocessor load and store
         */
        printf("%s%s p%d,cr%d,", arm_lsp_name[i.a_format.b20],
               arm_cond_name[i.j_format.cond],
               i.a_format.rs,
               i.a_format.rd);
        if (i.a_format.b24) {
            printf("[%s, #%c"NUMFMT"]%s",
                   arm_register_name(i.a_format.rn),
                   (i.a_format.b22) ? '+' : '-',
                   i.i_format.shifter.ss.immediate<<2,
                   (i.a_format.b21) ? arm_bang_name : "");

        } else {
            printf("[%s], #%c"NUMFMT,
                   arm_register_name(i.a_format.rn),
                   (i.a_format.b22) ? '+' : '-',
                   i.i_format.shifter.ss.immediate<<2);
        }
        break;

    case 7:
        /* three groups
         */
        if (i.a_format.b24) {
            /* software interrupt
             */
            printf("%s%s "NUMFMT, arm_swi_name,
                   arm_cond_name[i.j_format.cond],
                   i.j_format.target);
            break;

        } else if (i.a_format.b4) {
            /* coprocessor register transfer
             */
            printf("%s%s p%d,"NUMFMT",%s,cr%d,cr%d,"NUMFMT,
                   arm_mc_name[i.a_format.b20],
                   arm_cond_name[i.j_format.cond],
                   i.a_format.rs,
                   i.j_format.target >> 21, /*BUGBUG is this right */
                   arm_register_name(i.a_format.rd),
                   i.a_format.rn,
                   i.a_format.rm,
                   i.g_format.op2 >> 1);
            break;

        } else {
            /* coprocessor data processing
             */
            printf("%s%s p%d,"NUMFMT",cr%d,cr%d,cr%d,"NUMFMT,
                   arm_cdp_name,
                   arm_cond_name[i.j_format.cond],
                   i.a_format.rs,
                   i.j_format.target >> 20, /*BUGBUG is this right */
                   i.a_format.rd,
                   i.a_format.rn,
                   i.a_format.rm,
                   i.g_format.op2 >> 1);
            break;

        }
        break;
    }

}