/* basicblock.cpp */ #include #include #include #include #include #include #include #include #include #include "arm.hh" #include "basicblock.hh" #include "image.hh" #include "symbol.hh" #include "types.hh" using namespace std; bool bb_cmp_t::operator()(const basic_block_t* bb1, const basic_block_t* bb2) const { return (bb1->addr < bb2->addr); } static void data_block_merge(basic_block_t& bb, basic_block_t& bb_next) { /* move data refs */ while (!bb_next.d.data_refs.empty()) { map::iterator ref_iter; ref_iter = bb_next.d.data_refs.begin(); basic_block_t *bb_source = ref_iter->first; ref_data_t *ref = ref_iter->second; bb_next.d.data_refs.erase(ref_iter); if (ref->remove) { delete ref; continue; } bb_source->c.data_refs.erase(&bb_next); bb_source->c.data_refs.insert(make_pair(&bb, ref)); bb.d.data_refs.insert(make_pair(bb_source, ref)); } /* update size */ bb.size += bb_next.size; /* delete merged block */ delete &bb_next; } static void mark_bb_data(basic_block_t& bb_source) { stack stack; if (bb_source.type != BASIC_BLOCK_TYPE_UNKNOWN) return; stack.push(&bb_source); while (!stack.empty()) { basic_block_t *bb = stack.top(); stack.pop(); bb->type = BASIC_BLOCK_TYPE_DATA; /* remove outgoing refs */ while (!bb->c.out_refs.empty()) { map::iterator ref_iter; ref_iter = bb->c.out_refs.begin(); ref_code_t *ref = ref_iter->second; bb->c.out_refs.erase(ref_iter); if (ref->remove) delete ref; else ref->remove = true; } /* remove data refs */ while (!bb->c.data_refs.empty()) { map::iterator ref_iter; ref_iter = bb->c.data_refs.begin(); ref_data_t *ref = ref_iter->second; bb->c.data_refs.erase(ref_iter); if (ref->remove) delete ref; else ref->remove = true; } /* remove incoming refs and add source to stack */ while (!bb->c.in_refs.empty()) { map::iterator ref_iter; ref_iter = bb->c.in_refs.begin(); basic_block_t *bb_next = ref_iter->first; ref_code_t *ref = ref_iter->second; bb->c.in_refs.erase(ref_iter); if (ref->remove) delete ref; else ref->remove = true; if (bb_next->type == BASIC_BLOCK_TYPE_UNKNOWN) { stack.push(bb_next); } } } } static void mark_bb_code(basic_block_t& bb_source) { stack stack; if (bb_source.type != BASIC_BLOCK_TYPE_UNKNOWN) return; stack.push(&bb_source); while (!stack.empty()) { basic_block_t *bb = stack.top(); stack.pop(); bb->type = BASIC_BLOCK_TYPE_CODE; /* remove incoming data refs */ while (!bb->d.data_refs.empty()) { map::iterator ref_iter; ref_iter = bb->d.data_refs.begin(); ref_data_t *ref = ref_iter->second; bb->d.data_refs.erase(ref_iter); if (ref->remove) delete ref; else ref->remove = true; } /* mark outgoing data refs as data blocks */ map::iterator data_iter; for (data_iter = bb->c.data_refs.begin(); data_iter != bb->c.data_refs.end(); data_iter++) { basic_block_t *bb_ref = data_iter->first; mark_bb_data(*bb_ref); } /* add outgoing blocks to stack */ map::iterator code_iter; for (code_iter = bb->c.out_refs.begin(); code_iter != bb->c.out_refs.end(); code_iter++) { basic_block_t *bb_ref = code_iter->first; if (bb_ref->type == BASIC_BLOCK_TYPE_UNKNOWN) { stack.push(bb_ref); } } } } /* return 0 on success, return 1 if source block was changed */ static int reference_code_add(const map& bb_map, basic_block_t& bb_source, da_addr_t source, da_addr_t target, bool cond, bool link) { map::const_iterator bb_iter; basic_block_t *bb_target; bb_iter = bb_map.find(target); if (bb_iter == bb_map.end()) return 0; bb_target = bb_iter->second; if (bb_target->type == BASIC_BLOCK_TYPE_DATA) { mark_bb_data(bb_source); return 1; } ref_code_t* ref = new ref_code_t; if (ref == NULL) abort(); ref->remove = false; ref->source = source; ref->target = target; ref->cond = cond; ref->link = link; ref->call = false; bb_source.c.out_refs[bb_target] = ref; bb_target->c.in_refs[&bb_source] = ref; return 0; } static void reference_data_add(const map& bb_map, basic_block_t& bb_source, da_addr_t source, da_addr_t target, uint_t size) { map::const_iterator bb_iter; basic_block_t *bb_target; bb_iter = bb_map.upper_bound(target); if (bb_iter == bb_map.end()) return; bb_iter--; bb_target = bb_iter->second; if (bb_target == &bb_source) return; ref_data_t* ref = new ref_data_t; if (ref == NULL) abort(); ref->remove = false; ref->source = source; ref->target = target; ref->size = size; bb_source.c.data_refs.insert(make_pair(bb_target, ref)); bb_target->d.data_refs.insert(make_pair(&bb_source, ref)); return; } static void basicblock_add(map& bb_map, da_addr_t addr, const image& img) { if (!img.is_addr_mapped(addr)) return; basic_block_t* bb = new basic_block_t; bb->addr = addr; bb->type = BASIC_BLOCK_TYPE_UNKNOWN; bb->size = 0; bb->c.func = NULL; bb_map[addr] = bb; } static void function_add(basic_block_t& bb) { function_t* func = new function_t; func->addr = bb.addr; func->entry_bb = &bb; bb.c.func = func; /* mark incoming references as calls */ map::iterator code_iter; for (code_iter = bb.c.in_refs.begin(); code_iter != bb.c.in_refs.end(); code_iter++) { ref_code_t* ref = code_iter->second; ref->call = true; } } static int bb_pass_mark(map& bb_map, const image& img) { int r; da_addr_t addr = 0; while (1) { da_word_t data; r = img.read<32>(addr, data); if (r == 0) break; da_instr_t instr; da_instr_args_t args; da_instr_parse(&instr, data, false); da_instr_parse_args(&args, &instr); if (instr.group == DA_GROUP_BL) { da_addr_t target = da_instr_branch_target(args.bl.off, addr); /* basic block for fall-through */ if (!args.bl.link) { basicblock_add(bb_map, addr + sizeof(da_word_t), img); } /* basic block for branch target */ basicblock_add(bb_map, target, img); } else { if (arm_instr_is_reg_changed(&instr, &args, DA_REG_R15)) { basicblock_add(bb_map, addr + sizeof(da_word_t), img); } } addr += sizeof(da_word_t); } return 0; } static int bb_pass_size(map& bb_map, const image& img) { int r; map::iterator bb_iter = bb_map.begin(); if (bb_iter != bb_map.end()) { basic_block_t *bb_prev = (bb_iter++)->second; while (bb_iter != bb_map.end()) { basic_block_t *bb = (bb_iter++)->second; bb_prev->size = bb->addr - bb_prev->addr; bb_prev = bb; } /* find size of last block */ da_addr_t addr = bb_prev->addr; while (img.is_addr_mapped(addr)) addr += 4; bb_prev->size = addr - bb_prev->addr; } return 0; } static int bb_pass_stats(map& bb_map, const image& img) { int r; uint_t funcs = 0; uint_t bb_unknown = 0; uint_t bb_data = 0; uint_t bb_data_blocks = 0; uint_t bb_code = 0; map::iterator bb_iter; bb_iter = bb_map.begin(); while (bb_iter != bb_map.end()) { basic_block_t *bb = (bb_iter++)->second; /* stats */ switch (bb->type) { case BASIC_BLOCK_TYPE_UNKNOWN: bb_unknown += bb->size; break; case BASIC_BLOCK_TYPE_DATA: bb_data += bb->size; bb_data_blocks += 1; break; case BASIC_BLOCK_TYPE_CODE: bb_code += bb->size; if (bb->c.func != NULL) funcs += 1; break; } } cout << "Functions: " << funcs << "\n" << "Unknown: " << bb_unknown << " b\n" << "Code: " << bb_code << " b\n" << "Data: " << bb_data << " b"; if (bb_data_blocks > 0) { cout << " / " << bb_data_blocks << " blocks = " << (bb_data / bb_data_blocks) << " b/block\n"; } else { cout << "\n"; } return 0; } /* Try to backtrack to last cmp rREG, #X instruction and return X. */ static int block_backtrack_reg_bounds(const basic_block_t& bb, da_addr_t addr, uint_t reg, const image& img) { assert(addr >= bb.addr && addr < bb.addr + bb.size); int r; if (addr == bb.addr) return -1; addr -= sizeof(da_instr_t); for (; addr >= bb.addr; addr -= sizeof(da_instr_t)) { da_word_t data; r = img.read<32>(addr, data); if (r <= 0) return -1; da_instr_t instr; da_instr_args_t args; da_instr_parse(&instr, data, false); da_instr_parse_args(&args, &instr); if (!arm_instr_is_flag_changed(&instr, &args, DA_FLAG_C) && !arm_instr_is_flag_changed(&instr, &args, DA_FLAG_Z)) { continue; } /* instr does change carry and/or zero flag */ if (instr.group == DA_GROUP_DATA_IMM) { if (args.data_imm.cond == DA_COND_AL && args.data_imm.op == DA_DATA_OP_CMP && args.data_imm.rn == reg) { return args.data_imm.imm; } } return -1; } return -1; } /* Try to backtrack to last add rREG, r15, rX instruction and return r15 + X + 8 in D. */ static int block_backtrack_reg_add_pc(const basic_block_t& bb, da_addr_t addr, uint_t reg, const image& img, da_addr_t& d) { assert(addr >= bb.addr && addr < bb.addr + bb.size); int r; if (addr == bb.addr) return -1; addr -= sizeof(da_instr_t); for (; addr >= bb.addr; addr -= sizeof(da_instr_t)) { da_word_t data; r = img.read<32>(addr, data); if (r <= 0) return -1; da_instr_t instr; da_instr_args_t args; da_instr_parse(&instr, data, false); da_instr_parse_args(&args, &instr); if (!arm_instr_is_reg_changed(&instr, &args, DA_REG_R14)) continue; /* instr does change r14 */ if (instr.group == DA_GROUP_DATA_IMM) { if (args.data_imm.cond == DA_COND_AL && args.data_imm.op == DA_DATA_OP_ADD && args.data_imm.rd == reg && args.data_imm.rn == DA_REG_R15) { d = addr + args.data_imm.imm + 8; return 0; } } return -1; } return -1; } static int block_pass_third(basic_block_t& bb, map& bb_map, const image& img) { int r; for (da_addr_t addr = bb.addr; addr < bb.addr + bb.size; addr += sizeof(da_instr_t)) { da_word_t data; r = img.read<32>(addr, data); if (r == 0) break; da_instr_t instr; da_instr_args_t args; da_instr_parse(&instr, data, false); da_instr_parse_args(&args, &instr); /* code/data analysis */ if (arm_instr_is_unpredictable(&instr, &args, addr)) { mark_bb_data(bb); break; } /* find references */ bool do_fall_through = false; bool cond_fall_through = false; if (instr.group == DA_GROUP_BL) { da_addr_t target = da_instr_branch_target(args.bl.off, addr); do_fall_through = (args.bl.link || args.bl.cond != DA_COND_AL); cond_fall_through = (!args.bl.link || args.bl.cond != DA_COND_AL); /* target reference */ r = reference_code_add(bb_map, bb, addr, target, (args.bl.cond != DA_COND_AL), args.bl.link); if (r < 0) return -1; else if (r == 1) break; } else if (instr.group == DA_GROUP_DATA_IMM_SH) { if (args.data_imm_sh.cond == DA_COND_LS && args.data_imm_sh.op == DA_DATA_OP_ADD && args.data_imm_sh.rd == DA_REG_R15 && args.data_imm_sh.rn == DA_REG_R15 && args.data_imm_sh.sh == DA_SHIFT_LSL && args.data_imm_sh.sha == 2) { /* try to find upper bound for rm */ int bound = block_backtrack_reg_bounds(bb, addr, args.data_imm_sh.rm, img); if (bound >= 0) { while (bound >= 0) { da_addr_t target = (bound << 2) + addr + 8; r = reference_code_add(bb_map, bb, addr, target, true, false); if (r < 0) return -1; else if (r == 1) break; bound -= 1; } if (r == 1) break; do_fall_through = true; cond_fall_through = true; } } else if (args.data_imm_sh.cond == DA_COND_AL && args.data_imm_sh.op == DA_DATA_OP_MOV && args.data_imm_sh.rd == DA_REG_R15 && args.data_imm_sh.sh == DA_SHIFT_LSL && args.data_imm_sh.sha == 0) { /* could be a link */ /* da_addr_t r14_data; r = block_backtrack_reg_add_pc(bb, addr, 14, img, &r14_data); if (r >= 0 && r14_data == addr + sizeof(da_instr_t)) { r = reference_code_add(bb_map, bb, addr, r14_data, false, true); if (r < 0) return -1; else if (r == 1) break; } do_fall_through = (cond != DA_COND_AL); cond_fall_through = (cond != DA_COND_AL); */ } else if ((args.data_imm_sh.op < DA_DATA_OP_TST && args.data_imm_sh.op > DA_DATA_OP_CMN) && args.data_imm_sh.rd == DA_REG_R15) { do_fall_through = (args.data_imm_sh.cond != DA_COND_AL); cond_fall_through = (args.data_imm_sh.cond != DA_COND_AL); } else { do_fall_through = true; cond_fall_through = (args.data_imm_sh.cond != DA_COND_AL); } } else if (instr.group == DA_GROUP_LS_IMM) { /* data reference */ if (args.ls_imm.rn == DA_REG_R15 && args.ls_imm.cond != DA_COND_NV) { da_addr_t target = addr + 8 + args.ls_imm.off; reference_data_add(bb_map, bb, addr, target, (args.ls_imm.byte ? 1 : 4)); } /* code reference */ if (args.ls_imm.load && args.ls_imm.rd == DA_REG_R15) { if (args.ls_imm.p && !args.ls_imm.w && args.ls_imm.rn == DA_REG_R15) { da_addr_t target = addr + args.ls_imm.off + 8; da_addr_t jump_target; r = img.read<32>(target, jump_target); if (r >= 0) { r = reference_code_add(bb_map, bb, addr, jump_target, (args.ls_imm.cond != DA_COND_AL), false); if (r < 0) return -1; else if (r == 1) break; } } do_fall_through = (args.ls_imm.cond != DA_COND_AL); } else { do_fall_through = true; } cond_fall_through = (args.ls_imm.cond != DA_COND_AL); } else { da_cond_t cond = da_instr_get_cond(&instr); /* r15 is changed */ if (arm_instr_is_reg_changed(&instr, &args, DA_REG_R15)) { do_fall_through = (cond != DA_COND_AL); cond_fall_through = (cond != DA_COND_AL); } else { do_fall_through = true; cond_fall_through = false; } } /* check fall-through reference */ if (do_fall_through && addr == (bb.addr + bb.size - sizeof(da_instr_t))) { /* last instruction in block */ r = reference_code_add(bb_map, bb, addr, addr + sizeof(da_instr_t), cond_fall_through, false); if (r < 0) return -1; else if (r == 1) break; } } return 0; } static int bb_pass_third(map& bb_map, const image& img) { int r; map::iterator bb_iter = bb_map.begin(); while (bb_iter != bb_map.end()) { basic_block_t *bb = (bb_iter++)->second; if (bb->type != BASIC_BLOCK_TYPE_DATA) { r = block_pass_third(*bb, bb_map, img); if (r < 0) return -1; } } return 0; } static int bb_pass_merge(map& bb_map, const image& img) { int r; map::iterator bb_iter, bb_save; bb_iter = bb_map.begin(); if (bb_iter != bb_map.end()) { basic_block_t *bb_prev = (bb_iter++)->second; while (bb_iter != bb_map.end()) { bb_save = bb_iter++; basic_block_t *bb = bb_save->second; /* remove unused */ if (!img.is_addr_mapped(bb->addr)) { delete bb; bb_map.erase(bb_save); continue; } /* merge */ if (bb_prev->type == BASIC_BLOCK_TYPE_DATA && bb->type == BASIC_BLOCK_TYPE_DATA) { /* merge data blocks */ data_block_merge(*bb_prev, *bb); bb_map.erase(bb_save); } else { bb_prev = bb; } } } return 0; } static int bb_pass_codetree(map& bb_map, const set& entrypoints) { /* entrypoints */ set::const_iterator entrypoint_iter; for (entrypoint_iter = entrypoints.begin(); entrypoint_iter != entrypoints.end(); entrypoint_iter++) { mark_bb_code(*bb_map[*entrypoint_iter]); function_add(*bb_map[*entrypoint_iter]); } return 0; } static int bb_pass_func(map& bb_map) { map::iterator bb_iter; bb_iter = bb_map.begin(); while (bb_iter != bb_map.end()) { basic_block_t *bb = (bb_iter++)->second; /* mark code referenced with link reference as function entry points */ if (bb->type == BASIC_BLOCK_TYPE_CODE) { map::iterator code_iter; for (code_iter = bb->c.in_refs.begin(); code_iter != bb->c.in_refs.end(); code_iter++) { ref_code_t* ref = code_iter->second; if (ref->link && bb->c.func == NULL) { function_add(*bb); } } } } return 0; } int basicblock_analysis(map& bb_map, const set& entrypoints, const image& img) { int r; /* add entrypoints */ set::const_iterator entrypoint_iter; for (entrypoint_iter = entrypoints.begin(); entrypoint_iter != entrypoints.end(); entrypoint_iter++) { basicblock_add(bb_map, *entrypoint_iter, img); } /* first pass: */ /* mark basic blocks */ cout << "First pass..."; r = bb_pass_mark(bb_map, img); if (r < 0) return -1; cout << "done\n"; /* second pass: */ /* save basic block size */ cout << "Second pass..."; r = bb_pass_size(bb_map, img); if (r < 0) return -1; cout << "done\n"; r = bb_pass_stats(bb_map, img); if (r < 0) return -1; /* third pass: */ /* do code/data analysis */ /* collect references */ cout << "Third pass..."; r = bb_pass_third(bb_map, img); if (r < 0) return -1; cout << "done\n"; r = bb_pass_stats(bb_map, img); if (r < 0) return -1; /* fourth pass: */ /* follow reference tree from entrypoints and mark code blocks */ cout << "Fourth pass..."; r = bb_pass_codetree(bb_map, entrypoints); if (r < 0) return -1; cout << "done\n"; r = bb_pass_stats(bb_map, img); if (r < 0) return -1; /* fifth pass: */ /* merge adjacent blocks */ cout << "Fifth pass..."; r = bb_pass_merge(bb_map, img); if (r < 0) return -1; cout << "done\n"; r = bb_pass_stats(bb_map, img); if (r < 0) return -1; /* sixth pass: */ /* find function entry blocks */ cout << "Sixth pass..."; r = bb_pass_func(bb_map); if (r < 0) return -1; cout << "done\n"; r = bb_pass_stats(bb_map, img); if (r < 0) return -1; return 0; }