/* disarm.cpp */ #include #include #include #include #include #include #include #include #include #include #include #include #include "arm.hh" #include "basicblock.hh" #include "codesep.hh" #include "symbol.hh" #include "types.hh" using namespace std; #define USAGE \ "Usage: %s [-EB|-EL] [-h] [-m OFFSET] [-s SKIP] BINFILE [SYMFILE]\n" #define HELP \ USAGE \ " Disassemble ARM machine code from FILE or standard input.\n" \ " -EB\t\tRead input as big endian data\n" \ " -EL\t\tRead input as little endian data\n" \ " -h\t\tDisplay this help message\n" \ " -m OFFSET\tUse OFFSET as memory address of input\n" \ " -s SKIP\tNumber of bytes to skip before disassembly\n" \ "Report bugs to <" PACKAGE_BUGREPORT ">.\n" #define BLOCK_SEPARATOR \ "; --------------------------------------------------------------------" static char * simple_addr_string(da_addr_t addr) { char *addrstr = NULL; int r; static const char *format = "0x%x"; r = snprintf(NULL, 0, format, addr); if (r > 0) { addrstr = new char[r+1]; if (addrstr == NULL) abort(); r = snprintf(addrstr, r+1, format, addr); if (r <= 0) { delete addrstr; return NULL; } } else { return NULL; } return addrstr; } static char * sym_addr_string(da_addr_t addr, const map& sym_map) { char *addrstr = NULL; int r; map::const_iterator sym; sym = sym_map.find(addr); if (sym == sym_map.end()) { return simple_addr_string(addr); } static const char *format = "<%s:0x%x>"; da_addr_t sym_addr = sym->first; const char *sym_name = sym->second; r = snprintf(NULL, 0, format, sym_name, sym_addr, addr - sym_addr); if (r > 0) { addrstr = new char[r+1]; if (addrstr == NULL) abort(); r = snprintf(addrstr, r+1, format, sym_name, sym_addr, addr - sym_addr); if (r <= 0) { delete addrstr; return NULL; } } else { return NULL; } return addrstr; } static void print_code_references(const basic_block_t& bb, const map& sym_map) { uint_t coderefs_printed = 0; /* code references in */ multimap::const_iterator coderefs_iter; coderefs_iter = bb.c.in_refs.begin(); while (coderefs_iter != bb.c.in_refs.end()) { ref_code_t* ref = coderefs_iter->second; if (ref->remove) { delete ref; coderefs_iter++; continue; } if (coderefs_printed == 0) { cout << "; code reference from "; } else if (coderefs_printed % 4 == 0) { cout << "," << endl << ";\t\t "; } else { cout << ", "; } char *sourcestr = sym_addr_string(ref->source, sym_map); if (sourcestr == NULL) abort(); cout << sourcestr << "(" << (ref->cond ? "C" : "U") << ")" << (ref->call ? "C" : ((bb.addr > ref->source) ? "F" : "B")); coderefs_printed += 1; free(sourcestr); coderefs_iter++; } if (coderefs_printed > 0) cout << endl; /* code references out */ coderefs_printed = 0; coderefs_iter = bb.c.out_refs.begin(); while (coderefs_iter != bb.c.out_refs.end()) { ref_code_t *ref = coderefs_iter->second; if (ref->remove) { delete ref; coderefs_iter++; continue; } if (coderefs_printed == 0) { cout << "; code reference to "; } else if (coderefs_printed % 4 == 0) { cout << "," << endl << ";\t\t "; } else { cout << ", "; } char *sourcestr = sym_addr_string(ref->target, sym_map); if (sourcestr == NULL) abort(); cout << sourcestr << "(" << (ref->cond ? "C" : "U") << ")" << (ref->link ? "L" : ((bb.addr < ref->target) ? "F" : "B")); coderefs_printed += 1; free(sourcestr); coderefs_iter++; } if (coderefs_printed > 0) cout << endl; } static void print_data_references(const basic_block_t& bb, const map& sym_map) { uint_t datarefs_printed = 0; multimap::const_iterator datarefs_iter; datarefs_iter = bb.d.data_refs.begin(); while (datarefs_iter != bb.d.data_refs.end()) { ref_data_t* ref = datarefs_iter->second; if (ref->remove) { delete ref; datarefs_iter++; continue; } if (datarefs_printed == 0) { cout << "; data reference from "; } else if (datarefs_printed % 2 == 0) { cout << "," << endl << ";\t\t "; } else { cout << ", "; } char *sourcestr = sym_addr_string(ref->source, sym_map); if (sourcestr == NULL) abort(); char *targetstr = sym_addr_string(ref->target, sym_map); if (targetstr == NULL) abort(); cout << sourcestr << "(" << targetstr << "(" << ref->size << "))"; datarefs_printed += 1; free(sourcestr); free(targetstr); datarefs_iter++; } if (datarefs_printed > 0) cout << endl; } static void print_basic_block_code(const basic_block_t& bb, const image& img, const map& sym_map) { int r; if (!img.is_addr_mapped(bb.addr)) return; cout << endl << BLOCK_SEPARATOR << endl; if (bb.type != BASIC_BLOCK_TYPE_CODE) { cout << "; UNKNOWN BLOCK TYPE" << endl; } /* reg use/change analysis */ uint_t bb_use_regs = 0; uint_t bb_change_regs = 0; uint_t bb_use_flags = 0; uint_t bb_change_flags = 0; da_addr_t addr; addr = bb.addr + bb.size; while (addr > bb.addr) { addr -= sizeof(da_addr_t); da_word_t data; r = img.read<32>(addr, data); if (r == 0) break; da_instr_t instr; da_instr_args_t args; da_instr_parse(&instr, data, false); da_instr_parse_args(&args, &instr); /* regs */ uint_t change_regs = arm_instr_changed_regs(&instr, &args); bb_change_regs |= change_regs; uint_t use_regs = arm_instr_used_regs(&instr, &args); bb_use_regs &= (~change_regs & DA_REG_MASK); bb_use_regs |= use_regs; /* flags */ uint_t change_flags = arm_instr_changed_flags(&instr, &args); bb_change_flags |= change_flags; uint_t use_flags = arm_instr_used_flags(&instr, &args); bb_use_flags &= (~change_flags & DA_FLAG_MASK); bb_use_flags |= use_flags; } #if 0 bool change_printed = false; if (bb_change_regs != 0) { cout << "; changed reg(s): {"; arm_reglist_fprint(stdout, bb_change_regs); cout << " }"; change_printed = true; } if (bb_change_flags != 0) { if (change_printed) cout << ", "; else cout << "; "; cout << "changed flag(s): {"; arm_flaglist_fprint(stdout, bb_change_flags); cout << " }"; change_printed = true; } if (change_printed) cout << endl; bool use_printed = false; if (bb_use_regs != 0) { cout << "; reg(s) depended on: {"; arm_reglist_fprint(stdout, bb_use_regs); cout << " }"; use_printed = true; } if (bb_use_flags != 0) { if (use_printed) cout << ", "; else cout << "; "; cout << "flag(s) depended on: {"; arm_flaglist_fprint(stdout, bb_use_flags); cout << " }"; use_printed = true; } if (use_printed) cout << endl; /* code references */ print_code_references(bb, sym_map); /* data references */ print_data_references(bb, sym_map); #endif if (bb.c.func != NULL) { cout << "; Function entry point:\n"; } /* find symbols */ const char *symbol_name = NULL; da_addr_t symbol_addr = 0; map::const_iterator symbol_iter; symbol_iter = sym_map.lower_bound(bb.addr); if (symbol_iter != sym_map.end()) { symbol_addr = symbol_iter->first; symbol_name = symbol_iter->second; } addr = bb.addr; while (addr < bb.addr + bb.size) { da_word_t data; r = img.read<32>(addr, data); if (r == 0) break; da_instr_t instr; da_instr_args_t args; da_instr_parse(&instr, data, false); da_instr_parse_args(&args, &instr); /* symbol */ if (symbol_name != NULL && symbol_addr == addr) { cout << "; " << symbol_name << ":\n"; symbol_iter++; if (symbol_iter != sym_map.end()) { symbol_addr = symbol_iter->first; symbol_name = symbol_iter->second; } else { symbol_name = NULL; } } /* print instruction */ cout << hex << setw(8) << setfill('0') << addr << "\t"; cout << hex << setw(8) << setfill('0') << data << "\t"; da_instr_fprint(stdout, &instr, &args, addr); cout << "\n"; addr += sizeof(da_word_t); } } static char get_print_char(uint8_t data) { if (data >= 32 && data <= 126) return data; else return '.'; } static void print_data_line(da_addr_t addr, uint_t offset, uint_t length, const uint8_t *data, uint_t size) { cout << hex << setw(8) << setfill('0') << addr << "\t"; /* hex data */ for (uint_t i = 0; i < offset; i++) cout << " "; for (uint_t i = 0; i < size; i++) { cout << hex << setw(2) << setfill('0') << static_cast(data[i]) << " "; } for (uint_t i = 0; i < length - offset - size; i++) cout << " "; /* char data */ for (uint_t i = 0; i < offset; i++) cout << " "; cout << " |"; for (uint_t i = 0; i < size; i++) { cout << get_print_char(data[i]); } cout << "|" << endl; } static int print_basic_block_data(const basic_block_t& bb, const image& img, const map& sym_map) { if (!img.is_addr_mapped(bb.addr)) return 0; cout << endl << BLOCK_SEPARATOR << endl << "; data block" << endl; /* code references */ print_code_references(bb, sym_map); /* data references */ print_data_references(bb, sym_map); /* read data */ uint8_t *data = new uint8_t[bb.size]; uint_t read = img.read(bb.addr, data, bb.size); if (read < bb.size) return -1; /* first line */ print_data_line(bb.addr & ~0xf, bb.addr & 0xf, 0x10, data, min(0x10 - (bb.addr & 0xf), bb.size)); da_addr_t addr = 0x10 - (bb.addr & 0xf); while (addr < bb.size) { print_data_line((bb.addr + addr) & ~0xf, 0, 0x10, &data[addr], min(static_cast(16), bb.size - addr)); addr += 0x10; } delete data; return 0; } int main(int argc, char* argv[]) { int r; da_addr_t mem_offset = 0; off_t file_offset = 0; size_t disasm_size = 0; bool big_endian = false; int opt; while ((opt = getopt(argc, argv, "c:E:hm:s:")) != -1) { switch (opt) { case 'c': disasm_size = atoi(optarg); break; case 'E': if (optarg != NULL && (optarg[0] == 'B' || optarg[0] == 'L')) { big_endian = (optarg[0] == 'B'); } else { fprintf(stderr, USAGE, argv[0]); exit(EXIT_FAILURE); } break; case 'h': printf(HELP, argv[0]); exit(EXIT_SUCCESS); break; case 'm': mem_offset = atoi(optarg); break; case 's': file_offset = atoi(optarg); break; default: fprintf(stderr, USAGE, argv[0]); exit(EXIT_FAILURE); } } /* create memory image */ image img; if (optind < argc) { img.create_mapping(mem_offset, disasm_size, argv[optind], file_offset, true, false, big_endian); } else { fprintf(stderr, "Please specify file."); exit(EXIT_FAILURE); } /* add arm entry points */ set ep_set; for (da_addr_t i = 0; i < 0x20; i += 4) { if (i == 0x14) continue; ep_set.insert(i); } /* load symbols */ map sym_map; if (optind+1 < argc) { FILE *symbol_file = fopen(argv[optind+1], "r"); if (symbol_file == NULL) { perror("fopen"); exit(EXIT_FAILURE); } r = symbol_add_from_file(&sym_map, symbol_file); if (r < 0) { perror("symbol_add_from_file"); exit(EXIT_FAILURE); } fclose(symbol_file); } /* find strings */ #if 0 da_addr_t addr = 0; uint_t string_len = 0; while (1) { uint8_t data; r = image_read_byte(image, addr, &data); if (r == 0) break; else if (r < 0) { perror("image_read_byte"); exit(EXIT_FAILURE); } if (data >= 32 && data <= 126) { string_len += 1; addr += 1; continue; } if ((data == '\0' && string_len >= 3) || string_len >= 6) { if (data == '\0') cout << "asciz: "; else cout << "ascii: "; char *str = new char[string_len]; r = image_read(image, addr - string_len, str, string_len); if (r <= 0) { perror("image_read_byte"); exit(EXIT_FAILURE); } cout << hex << setw(8) << setfill('0') << addr - string_len; printf(" `%.*s'\n", string_len, str); delete str; } string_len = 0; addr += 1; } #endif /* basic block analysis */ map bb_map; r = basicblock_analysis(bb_map, ep_set, img); if (r < 0) { cerr << "Unable to finish basic block analysis." << endl; exit(EXIT_FAILURE); } /* print instructions */ map::const_iterator bb_iter; for (bb_iter = bb_map.begin(); bb_iter != bb_map.end(); bb_iter++) { const basic_block_t *bb = bb_iter->second; if (bb->type == BASIC_BLOCK_TYPE_CODE || bb->type == BASIC_BLOCK_TYPE_UNKNOWN) { print_basic_block_code(*bb, img, sym_map); } else { print_basic_block_data(*bb, img, sym_map); } } return EXIT_SUCCESS; }