/* Copyright (c) Microsoft Corporation. All rights reserved. */ /* * Disassembler for ARM * * "ARM Architectural Reference Manual" * Edited by Dave Jaggar. Prentice Hall ed. * ISBN 0-13-736299-4 */ #include <windows.h> #include "bo.h" #include "arm_instruction.h" /* Make it easy to change from decimal to hex * Me I like hex, but objdump uses decimal and I need to test */ #if 0 #define NUMFMT "0x%x" #else #define NUMFMT "%d" #endif /* All mnemonics are in here, similarly to make easy to change * Changing the rest of the syntax is.. not so easy. */ #define arm_bang_name "!" static char *arm_swp_am[2] = { "", "b" }; static char *arm_dp_am[2] = { "", "s" }; static char *arm_ls_am[4] = { "", "t", "b", "bt" }; static char *arm_lsh_am[4] = { "", "h", "sb", "sh" }; static char *arm_lsm_am[4] = { "da", "ia", "db", "ib" }; static char *arm_msr_am[16] = { /* 0 */ "", "c", "x", "xc", "s", "sc", "sx", "sxc", /* 8 */ "f", "fc", "fx", "fxc", "fs", "fsc", "fsx", "fsxc" }; #define arm_swi_name "swi" #define arm_bx_name "bx" #define arm_cdp_name "cdp" #define arm_swp_name "swp" static char *arm_bl_name[2] = { "b", "bl" }; static char *arm_ls_name[2] = { "str", "ldr" }; static char *arm_ls_shift[5] = { "lsl", "lsr", "asr", "ror", "rrx" }; static char *arm_lsm_name[2] = { "stm", "ldm" }; static char *arm_lsp_name[2] = { "stc", "ldc" }; static char *arm_mc_name[2] = { "mcr", "mrc" }; static char *arm_dp_name[16] = { /* 0 */ "and", "eor", "sub", "rsb", /* 4 */ "add", "adc", "sbc", "rsc", /* 8 */ "tst", "teq", "cmp", "cmn", /* 12*/ "orr", "mov", "bic", "mvn" }; static const char arm_dp_flags[16] = { #define ARM_RD 0x1 #define ARM_RN 0x2 /* 0 */ ARM_RD|ARM_RN, ARM_RD|ARM_RN, ARM_RD|ARM_RN, ARM_RD|ARM_RN, /* 4 */ ARM_RD|ARM_RN, ARM_RD|ARM_RN, ARM_RD|ARM_RN, ARM_RD|ARM_RN, /* 8 */ ARM_RN, ARM_RN, ARM_RN, ARM_RN, /* 12*/ ARM_RD|ARM_RN, ARM_RD, ARM_RD|ARM_RN, ARM_RD }; static char *arm_msr_name[2] = { "mrs", "msr" }; static char *arm_psr_name[2] = { "cpsr", "spsr" }; static char *arm_mul_name[8] = { /* 0 */ "mul", "mla", "", "", /* 4 */ "umull", "umlal", "smull", "smlal" }; static char *arm_cond_name[16] = { /* 0 */ "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", /* 8 */ "hi", "ls", "ge", "lt", "gt", "le", "", "never"/*BUGBUG*/ }; static char *arm_sbregister_name[16] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "fp", "ip", "sp", "lr", "pc" }; UINT32 ror(UINT32 value, UINT amount) { for (;amount--;) value = (value >> 1) | ((value & 1) << 31); return value; } static int regcount; /* how many regs used in this inst */ static int regnum[16]; /* which regs used in this inst */ static char * arm_register_name (ireg) { int i; for (i = 0; i < regcount; i++) if (regnum[i] == ireg) break; if (i >= regcount) regnum[regcount++] = ireg; return (arm_sbregister_name[ireg]); } static void arm_reglist(unsigned short list) { int i; int more = 0; printf("{"); for (i = 0; i < 16; i++) if (list & (1 << i)) { printf("%s%s", (more++) ? ", " : "", arm_register_name(i)); } printf("}"); } static void arm_shifter(arm_instruction i) { /* its always shifted but we'll pretend */ if (i.i_format.shifter.ns.shift != 0) { if ((i.i_format.shifter.ls.shiftmode == ARM_SHIFT_RRX) && (i.i_format.shifter.ls.shiftval == 0)) { /* nothing, RRX */ printf(", %s", arm_ls_shift[4]); } else { printf(", %s #"NUMFMT, arm_ls_shift[i.i_format.shifter.ls.shiftmode], i.i_format.shifter.ls.shiftval); } } } static void arm_12_immediate( unsigned iadr, arm_instruction i, int runtaddr) { /* BUGBUG relocs ? */ if (i.i_format.shifter.uu.u_val != 0) printf(", #%c"NUMFMT, (i.a_format.b23) ? '+' : '-', i.i_format.shifter.uu.u_val); } static void arm_dps( arm_instruction i) { printf("%s%s", arm_dp_name[i.i_format.opcode], arm_cond_name[i.i_format.cond]); /* tst..cmn dont print the mandatory "S" */ if ((i.i_format.opcode < 8) || (i.i_format.opcode > 11)) printf("%s", arm_dp_am[i.i_format.s_l]); if (arm_dp_flags[i.i_format.opcode] & ARM_RD) printf(" %s", arm_register_name(i.i_format.shifter.ss.rd)); if (arm_dp_flags[i.i_format.opcode] & ARM_RN) { if (arm_dp_flags[i.i_format.opcode] & ARM_RD) printf(","); printf(" %s", arm_register_name(i.i_format.rn)); } } /* Disassemble instruction I, located at IADR. * RUNTADDR is normally tha same, it is used to * disassemble from object files [in which case * IADR is the file offset and RUNTADDR is the * address at which the code has been relocated] */ void ArmDisasm( unsigned iadr, void *pinst, int runtaddr) { arm_instruction i = *((arm_instruction*)pinst); int signed_immediate; unsigned unsigned_immediate; regcount = 0; /* The encoding of ARM instructions is fairly irregular. * I do admit not spending much time on it, esp not looking * carefully at section 3.14 pag 3-22 of the Jaggar book. * But even that has a lot of ifs and buts, so. * The result is not only largish, but with a few repeats * of code in some places with only minor variations. * Oh well. */ /* Start partitioning off the top 3 bits of the opcode * All but the first case then are fairly unambiguous. */ switch (i.a_format.class) { case 0: /* Messy case, use bit 4 for starters */ if (i.a_format.b4 == 0) { /* Ambiguity resolved here cuz CMP/CMN/TST/TEQ always S=1 */ if ((i.g_format.opcode & 0xf9) != 0x10) { /* data processing immediate shift * pagg 88,89,91,93,95,97 */ arm_dps(i); printf(", %s", arm_register_name(i.a_format.rm)); arm_shifter(i); break; } else if (i.a_format.b21 == 0) { /* move from Status register */ printf("%s%s %s, %s", arm_msr_name[i.a_format.b21], arm_cond_name[i.i_format.cond], arm_register_name(i.a_format.rd), arm_psr_name[i.a_format.b22]); break; } else { /* move register to Status register */ printf("%s%s %s_%s, %s", arm_msr_name[i.a_format.b21], arm_cond_name[i.i_format.cond], arm_psr_name[i.a_format.b22], arm_msr_am[i.a_format.rn], arm_register_name(i.a_format.rm)); break; } } else { /* Still messy, start with bit 7 */ if (i.a_format.b7 == 0) { /* Ambiguity resolved here cuz CMP/CMN/TST/TEQ always S=1 * Also, other restrictions on BX */ if ((i.g_format.opcode == 0x12) && (i.g_format.op2 == 0x1)) { /* branch/exchange instruction set * NB b8..b19 should be 1s but we dont check */ printf("%s%s %s", arm_bx_name, arm_cond_name[i.g_format.cond], arm_register_name(i.g_format.rm)); break; } else { /* data processing register shift * pagg 90,92,94,96 */ arm_dps(i); printf(", %s, %s %s", arm_register_name(i.a_format.rm), arm_ls_shift[i.i_format.shifter.ls.shiftmode], arm_register_name(i.a_format.rs)); break; } } else { /* lshsb cannot have b5==b6==0 */ if ((i.a_format.b5|i.a_format.b6)==0) { /* Use bit 24 to resolve mul viz swp */ if (i.a_format.b24) { /* swp/swpb */ printf("%s%s%s %s,%s,[%s]", arm_swp_name, arm_cond_name[i.g_format.cond], arm_swp_am[i.a_format.b22], arm_register_name(i.g_format.rd), arm_register_name(i.g_format.rm), arm_register_name(i.g_format.rn)); } else if (i.a_format.b23) { /* mul long * pagg 64,65,80,81 */ printf("%s%s%s %s, %s, %s, %s", arm_mul_name[i.i_format.opcode & 0x7], arm_cond_name[i.g_format.cond], arm_dp_am[i.i_format.s_l], arm_register_name(i.g_format.rd), arm_register_name(i.g_format.rn), arm_register_name(i.g_format.rm), arm_register_name(i.g_format.rs)); break; } else { /* mul/mla * pagg 52,58 */ printf("%s%s%s %s, %s, %s", arm_mul_name[i.i_format.opcode & 0x7], arm_cond_name[i.g_format.cond], arm_dp_am[i.i_format.s_l], arm_register_name(i.g_format.rn), arm_register_name(i.g_format.rm), arm_register_name(i.g_format.rs)); if (i.a_format.b21) /* mla */ printf(", %s", arm_register_name(i.g_format.rd)); break; } } else { /* Use bit 22 to resolve the two lshsb classes * First part is common */ printf("%s%s%s %s, [%s", arm_ls_name[i.a_format.b20], arm_cond_name[i.j_format.cond], arm_lsh_am[i.i_format.shifter.ls.shiftmode], arm_register_name(i.g_format.rd), arm_register_name(i.g_format.rn)); if (i.a_format.b22) { /* lshsb immediate * pagg 110,112,114 */ printf((i.a_format.b24) ? ", #%c"NUMFMT"]" : "], #%c"NUMFMT, (i.a_format.b23) ? '+' : '-', (i.g_format.rs << 4) | (i.g_format.rm)); } else { /* lshsb register * pagg 111,113,115(broken!) */ printf((i.a_format.b24) ? ", %c%s]" : "], %c%s", (i.a_format.b23) ? '+' : '-', arm_register_name(i.a_format.rm)); } /* Common for writeback */ if (i.a_format.b21) printf(arm_bang_name); break; } } } break; case 1: /* Two (ambiguous) groups */ if ((i.g_format.opcode & 0xfb) == 0x32) { /* move immediate to status register */ printf("%s%s %s_%s, #"NUMFMT, arm_msr_name[i.a_format.b21], arm_cond_name[i.i_format.cond], arm_psr_name[i.a_format.b22], arm_msr_am[i.a_format.rn], /* overkill, can only be f */ ror(i.i_format.shifter.ss.immediate, i.i_format.shifter.ss.rotate<<10)); break; } /* data processing with immediate * pag 3-87 */ arm_dps(i); /* BUGBUG relocs ? */ printf(", #"NUMFMT, ror(i.i_format.shifter.ss.immediate, i.i_format.shifter.ss.rotate<<1)); break; case 2: /* Load/store immediate offset */ printf("%s%s%s %s,[%s", arm_ls_name[i.a_format.b20], arm_cond_name[i.j_format.cond], arm_ls_am[(i.a_format.b22<<1) | /* Byte */ /* T is a bit weird.. */ (i.a_format.b21 & ~i.a_format.b24)], arm_register_name(i.a_format.rd), arm_register_name(i.a_format.rn)); if (i.a_format.b24 == 0) { /* pagg 106 */ printf("]"); arm_12_immediate(iadr,i,runtaddr); } else { /* pagg 100/103 */ arm_12_immediate(iadr,i,runtaddr); printf("]%s", (i.a_format.b21) ? arm_bang_name : ""); } break; case 3: /* two groups */ if (i.a_format.b4) { /* Undefined instructions */ printf("undefined instruction"); break; } /* Load/store register offset */ printf("%s%s%s %s,[%s", arm_ls_name[i.a_format.b20], arm_cond_name[i.j_format.cond], arm_ls_am[(i.a_format.b22<<1) | /* Byte */ /* T is a bit weird.. */ (i.a_format.b21 & ~i.a_format.b24)], arm_register_name(i.a_format.rd), arm_register_name(i.a_format.rn)); if (i.a_format.b24 == 0) { /* pagg 107,108 */ printf("], %c%s", (i.a_format.b23) ? '+' : '-', arm_register_name(i.a_format.rm)); arm_shifter(i); } else { printf(", %c%s", (i.a_format.b23) ? '+' : '-', arm_register_name(i.a_format.rm)); arm_shifter(i); printf("]%s", (i.a_format.b21) ? arm_bang_name : ""); } break; case 4: /* load/store multiple */ printf("%s%s%s %s%s, ", arm_lsm_name[i.a_format.b20], arm_lsm_am[(i.a_format.b24<<1) | i.a_format.b23], arm_cond_name[i.j_format.cond], arm_register_name(i.a_format.rn), (i.a_format.b21) ? arm_bang_name : "" ); arm_reglist((unsigned short)i.j_format.target); if (i.a_format.b22) printf("^"); break; case 5: /* branch & link */ printf("%s%s\t", arm_bl_name[i.a_format.b24], arm_cond_name[i.j_format.cond]); signed_immediate = i.j_format.target << 2; #if 0 printlabel((runtaddr+8) + signed_immediate, (iadr+8) + signed_immediate); #else #if 0 printsym((iadr+8) + signed_immediate,iadr,TRUE); #else printsym((runtaddr+8) + signed_immediate,iadr,TRUE); #endif #endif break; case 6: /* coprocessor load and store */ printf("%s%s p%d,cr%d,", arm_lsp_name[i.a_format.b20], arm_cond_name[i.j_format.cond], i.a_format.rs, i.a_format.rd); if (i.a_format.b24) { printf("[%s, #%c"NUMFMT"]%s", arm_register_name(i.a_format.rn), (i.a_format.b22) ? '+' : '-', i.i_format.shifter.ss.immediate<<2, (i.a_format.b21) ? arm_bang_name : ""); } else { printf("[%s], #%c"NUMFMT, arm_register_name(i.a_format.rn), (i.a_format.b22) ? '+' : '-', i.i_format.shifter.ss.immediate<<2); } break; case 7: /* three groups */ if (i.a_format.b24) { /* software interrupt */ printf("%s%s "NUMFMT, arm_swi_name, arm_cond_name[i.j_format.cond], i.j_format.target); break; } else if (i.a_format.b4) { /* coprocessor register transfer */ printf("%s%s p%d,"NUMFMT",%s,cr%d,cr%d,"NUMFMT, arm_mc_name[i.a_format.b20], arm_cond_name[i.j_format.cond], i.a_format.rs, i.j_format.target >> 21, /*BUGBUG is this right */ arm_register_name(i.a_format.rd), i.a_format.rn, i.a_format.rm, i.g_format.op2 >> 1); break; } else { /* coprocessor data processing */ printf("%s%s p%d,"NUMFMT",cr%d,cr%d,cr%d,"NUMFMT, arm_cdp_name, arm_cond_name[i.j_format.cond], i.a_format.rs, i.j_format.target >> 20, /*BUGBUG is this right */ i.a_format.rd, i.a_format.rn, i.a_format.rm, i.g_format.op2 >> 1); break; } break; } }