X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=tools%2Fcmpmrg_text.c;h=561c9b28630af0d64ecc4529f74464b432ffaf42;hb=421216a10ad76ab1887e54584849b9fcabe79c96;hp=62a9f6d0e5f495d546af38d19ad9a744d78d68e6;hpb=572a6bea1f61d354b9cd607e1be474f5334b41c7;p=ia32rtools.git diff --git a/tools/cmpmrg_text.c b/tools/cmpmrg_text.c index 62a9f6d..561c9b2 100644 --- a/tools/cmpmrg_text.c +++ b/tools/cmpmrg_text.c @@ -1,3 +1,11 @@ +/* + * ia32rtools + * (C) notaz, 2013,2014 + * + * This work is licensed under the terms of 3-clause BSD license. + * See COPYING file in the top-level directory. + */ + #include #include #include @@ -68,7 +76,8 @@ typedef struct { struct my_symtab { unsigned int addr; - unsigned int fpos; // for patching + //unsigned int fpos; // for patching + unsigned int is_text:1; char *name; }; @@ -90,9 +99,11 @@ static int symt_cmp(const void *p1_, const void *p2_) void parse_headers(FILE *f, unsigned int *base_out, struct my_sect_info *sect_i, - struct my_symtab **symtab_out, long *sym_cnt) + struct my_symtab **symtab_out, long *sym_cnt, + struct my_symtab **raw_symtab_out, long *raw_sym_cnt) { - struct my_symtab *symt_o = NULL; + struct my_symtab *symt_txt = NULL; + struct my_symtab *symt_all = NULL; char *stringtab = NULL; unsigned int base = 0; int text_scnum = 0; @@ -218,8 +229,10 @@ void parse_headers(FILE *f, unsigned int *base_out, if (hdr.f_nsyms != 0) { symname[8] = 0; - symt_o = malloc(hdr.f_nsyms * sizeof(symt_o[0]) + 1); - my_assert_not(symt_o, NULL); + symt_txt = malloc(hdr.f_nsyms * sizeof(symt_txt[0]) + 1); + my_assert_not(symt_txt, NULL); + symt_all = malloc(hdr.f_nsyms * sizeof(symt_all[0]) + 1); + my_assert_not(symt_all, NULL); ret = fseek(f, hdr.f_symptr + hdr.f_nsyms * sizeof(syment), SEEK_SET); @@ -239,7 +252,7 @@ void parse_headers(FILE *f, unsigned int *base_out, } for (i = s = 0; i < hdr.f_nsyms; i++) { - long pos = ftell(f); + //long pos = ftell(f); ret = fread(&syment, 1, sizeof(syment), f); my_assert(ret, sizeof(syment)); @@ -248,16 +261,18 @@ void parse_headers(FILE *f, unsigned int *base_out, //printf("%3d %2d %08x '%s'\n", syment.e_sclass, // syment.e_scnum, syment.e_value, symname); - if (syment.e_scnum != text_scnum || syment.e_sclass != C_EXT) - continue; - - symt_o[s].addr = syment.e_value; - symt_o[s].fpos = pos; + symt_all[i].addr = syment.e_value; + //symt_all[i].fpos = pos; if (syment.e.e.e_zeroes == 0) - symt_o[s].name = stringtab + syment.e.e.e_offset; + symt_all[i].name = stringtab + syment.e.e.e_offset; else - symt_o[s].name = strdup(symname); - s++; + symt_all[i].name = strdup(symname); + + symt_all[i].is_text = (syment.e_scnum == text_scnum); + if (symt_all[i].is_text && syment.e_sclass == C_EXT) { + symt_txt[s] = symt_all[i]; + s++; + } if (syment.e_numaux) { ret = fseek(f, syment.e_numaux * sizeof(syment), @@ -267,113 +282,145 @@ void parse_headers(FILE *f, unsigned int *base_out, } } - if (symt_o != NULL) - qsort(symt_o, s, sizeof(symt_o[0]), symt_cmp); + if (symt_txt != NULL) + qsort(symt_txt, s, sizeof(symt_txt[0]), symt_cmp); *sym_cnt = s; - *symtab_out = symt_o; + *symtab_out = symt_txt; + *raw_sym_cnt = i; + *raw_symtab_out = symt_all; } -static int handle_pad(uint8_t *d_obj, uint8_t *d_exe, int maxlen) +static int try_align(uint8_t *d_obj, uint8_t *d_exe, int maxlen) { - static const uint8_t p7[7] = { 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00 }; - static const uint8_t p6[6] = { 0x8d, 0x9b, 0x00, 0x00, 0x00, 0x00 }; - static const uint8_t p5[5] = { 0x05, 0x00, 0x00, 0x00, 0x00 }; - static const uint8_t p4[4] = { 0x8d, 0x64, 0x24, 0x00 }; - static const uint8_t p3[3] = { 0x8d, 0x49, 0x00 }; - static const uint8_t p2[2] = { 0x8b, 0xff }; - static const uint8_t p1[1] = { 0x90 }; + static const uint8_t aligns[8][7] = { + { }, // [0] not used + { 0x90 }, // [1] nop + { 0x8b, 0xff }, // mov edi, edi + { 0x8d, 0x49, 0x00 }, // lea ecx, [ecx] + { 0x8d, 0x64, 0x24, 0x00 }, // lea + { 0x05, 0x00, 0x00, 0x00, 0x00 }, // add eax, 0 + { 0x8d, 0x9b, 0x00, 0x00, 0x00, 0x00 }, + { 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00 }, + }; + int j = 0; int len; int i; + // check exe for common pad/align patterns for (i = 0; i < maxlen; i++) if (d_exe[i] != 0xcc) break; - for (len = i; len > 0; ) + while (j < 8) { + for (j = 1; j < 8; j++) { + if (maxlen - i < j) { + j = 8; + break; + } + if (memcmp(&d_exe[i], aligns[j], j) == 0) { + i += j; + break; + } + } + } + if (i == 0) + return 0; + + // now check the obj + for (j = 0, len = i; len > 0; ) { i = len; if (i > 7) i = 7; - switch (i) { - #define CASE(x) \ - case sizeof(p ## x): \ - if (memcmp(d_obj, p ## x, sizeof(p ## x))) \ - return 0; \ - memset(d_obj, 0xcc, sizeof(p ## x)); \ + if (memcmp(d_obj, aligns[i], i) != 0) break; - CASE(7) - CASE(6) - CASE(5) - CASE(4) - CASE(3) - CASE(2) - CASE(1) - default: - printf("%s: unhandled len: %d\n", __func__, len); - return 0; - #undef CASE - } + + memcpy(d_obj, d_exe, i); + j += i; len -= i; d_obj += i; + d_exe += i; } - return 1; + return j; } struct equiv_opcode { signed char len; signed char ofs; - short cmp_rm; + unsigned short cmp_rm:1; + unsigned short simple:1; uint8_t v_masm[8]; uint8_t v_masm_mask[8]; uint8_t v_msvc[8]; uint8_t v_msvc_mask[8]; } equiv_ops[] = { // cmp $0x11,%ax - { 4, -1, 0, + { 4, -1, 0, 0, { 0x66,0x83,0xf8,0x03 }, { 0xff,0xff,0xff,0x00 }, { 0x66,0x3d,0x03,0x00 }, { 0xff,0xff,0x00,0xff }, }, // lea -0x1(%ebx,%eax,1),%esi // op mod/rm sib offs // mov, test, imm grp 1 - { 3, -2, 1, + { 3, -2, 1, 0, { 0x8d,0x74,0x03 }, { 0xf0,0x07,0xc0 }, { 0x8d,0x74,0x18 }, { 0xf0,0x07,0xc0 }, }, // movzbl 0x58f24a(%eax,%ecx,1),%eax - { 4, -3, 1, + { 4, -3, 1, 0, { 0x0f,0xb6,0x84,0x08 }, { 0xff,0xff,0x07,0xc0 }, { 0x0f,0xb6,0x84,0x01 }, { 0xff,0xff,0x07,0xc0 }, }, // inc/dec - { 3, -2, 1, + { 3, -2, 1, 0, { 0xfe,0x4c,0x03 }, { 0xfe,0xff,0xc0 }, { 0xfe,0x4c,0x18 }, { 0xfe,0xff,0xc0 }, }, // cmp - { 3, -2, 1, + { 3, -2, 1, 0, { 0x38,0x0c,0x0c }, { 0xff,0xff,0xc0 }, { 0x38,0x0c,0x30 }, { 0xff,0xff,0xc0 }, }, // test %dl,%bl - { 2, -1, 1, + { 2, -1, 1, 0, { 0x84,0xd3 }, { 0xfe,0xc0 }, { 0x84,0xda }, { 0xfe,0xc0 }, }, // cmp r,r/m vs rm/r - { 2, 0, 1, + { 2, 0, 1, 0, { 0x3a,0xca }, { 0xff,0xc0 }, { 0x38,0xd1 }, { 0xff,0xc0 }, }, // rep + 66 prefix - { 2, 0, 0, + { 2, 0, 0, 0, { 0xf3,0x66 }, { 0xfe,0xff }, { 0x66,0xf3 }, { 0xff,0xfe }, }, // fadd st, st(0) vs st(0), st - { 2, 0, 0, + { 2, 0, 0, 0, { 0xd8,0xc0 }, { 0xff,0xf7 }, { 0xdc,0xc0 }, { 0xff,0xf7 }, }, + // [esp] vs [esp+0] + { 4, -1, 0, 0, + { 0x00,0x04,0x24,0x90 }, { 0x00,0xc7,0xff,0xff }, + { 0x00,0x44,0x24,0x00 }, { 0x00,0xc7,0xff,0xff }, }, + { 5, -1, 0, 0, + { 0x00,0x04,0x24,0x00,0x90 }, { 0x00,0xc7,0xff,0x00,0xff }, + { 0x00,0x44,0x24,0x00,0x00 }, { 0x00,0xc7,0xff,0xff,0x00 }, }, + { 8, -1, 0, 0, + { 0x00,0x04,0x24,0x00,0x00,0x00,0x00,0x90 }, { 0x00,0xc7,0xff,0x00,0x00,0x00,0x00,0xff }, + { 0x00,0x44,0x24,0x00,0x00,0x00,0x00,0x00 }, { 0x00,0xc7,0xff,0xff,0x00,0x00,0x00,0x00 }, }, + + // various align insns/fillups + { 2, -1, 0, 0, + { 0x8b,0xff }, { 0xff,0xff }, + { 0x8b,0xc0 }, { 0xff,0xff }, }, + { 2, 0, 0, 1, + { 0x00,0x00 }, { 0x00,0x00 }, + { 0x8b,0xc0 }, { 0xff,0xff }, }, + { 3, 0, 0, 1, + { 0x00,0x00,0x00 }, { 0x50,0x00,0x00 }, + { 0x2e,0x8b,0xc0 }, { 0xff,0xff,0xff }, }, // broad filters (may take too much..) // testb $0x4,0x1d(%esi,%eax,1) // movb, push, .. - { 3, -2, 1, + { 3, -2, 1, 0, { 0xf6,0x44,0x06 }, { 0x00,0x07,0xc0 }, { 0xf6,0x44,0x30 }, { 0x00,0x07,0xc0 }, }, }; @@ -411,6 +458,9 @@ static int check_equiv(uint8_t *d_obj, uint8_t *d_exe, int maxlen) op->v_msvc_mask, len)) continue; + if (op->simple) + return len + ofs; + jo = je = 0; d_obj += ofs; d_exe += ofs; @@ -424,11 +474,11 @@ static int check_equiv(uint8_t *d_obj, uint8_t *d_exe, int maxlen) break; if ((jo == len && je != len) || (jo != len && je == len)) { - printf("invalid equiv_ops\n"); + printf("invalid equiv_op #%td\n", op - equiv_ops); return -1; } if (jo == len) - return len + ofs - 1; // matched + return len + ofs; // matched // var byte vo = d_obj[jo] & ~op->v_masm_mask[jo]; @@ -457,7 +507,7 @@ static int check_equiv(uint8_t *d_obj, uint8_t *d_exe, int maxlen) static void fill_int3(unsigned char *d, int len) { while (len-- > 0) { - if (*d == 0xcc) + if (d[0] == 0xcc && d[1] == 0xcc) break; *d++ = 0xcc; } @@ -465,39 +515,51 @@ static void fill_int3(unsigned char *d, int len) int main(int argc, char *argv[]) { + unsigned int base = 0, addr, end, sym, *t; struct my_sect_info s_text_obj, s_text_exe; + struct my_symtab *raw_syms_obj = NULL; struct my_symtab *syms_obj = NULL; - long sym_cnt_obj; + long sym_cnt_obj, raw_sym_cnt_obj; FILE *f_obj, *f_exe; - unsigned int base = 0, addr, end; SCNHDR tmphdr; long sztext_cmn; + int do_cmp = 1; int retval = 1; + int bad = 0; int left; + int arg; int ret; int i; - if (argc != 3) { - printf("usage:\n%s \n", argv[0]); + for (arg = 1; arg < argc; arg++) { + if (!strcmp(argv[arg], "-n")) + do_cmp = 0; + else + break; + } + + if (argc != arg + 2) { + printf("usage:\n%s [-n] \n", argv[0]); return 1; } - f_obj = fopen(argv[1], "r+b"); + f_obj = fopen(argv[arg++], "r+b"); if (f_obj == NULL) { - fprintf(stderr, "%s", argv[1]); + fprintf(stderr, "%s: ", argv[1]); perror(""); return 1; } - f_exe = fopen(argv[2], "r"); + f_exe = fopen(argv[arg++], "r"); if (f_exe == NULL) { - fprintf(stderr, "%s", argv[2]); + fprintf(stderr, "%s: ", argv[2]); perror(""); return 1; } - parse_headers(f_obj, NULL, &s_text_obj, &syms_obj, &sym_cnt_obj); - parse_headers(f_exe, &base, &s_text_exe, NULL, NULL); + parse_headers(f_obj, NULL, &s_text_obj, &syms_obj, &sym_cnt_obj, + &raw_syms_obj, &raw_sym_cnt_obj); + parse_headers(f_exe, &base, &s_text_exe, NULL, NULL, NULL, NULL); sztext_cmn = s_text_obj.size; if (sztext_cmn > s_text_exe.size) @@ -528,28 +590,34 @@ int main(int argc, char *argv[]) } } + if (do_cmp) for (i = 0; i < sztext_cmn; i++) { - if (s_text_obj.data[i] == s_text_exe.data[i]) + if (s_text_obj.data[i] == s_text_exe.data[i]) { + bad = 0; continue; + } left = sztext_cmn - i; - if (s_text_exe.data[i] == 0xcc) { // padding - if (handle_pad(s_text_obj.data + i, - s_text_exe.data + i, left)) - continue; + ret = try_align(s_text_obj.data + i, s_text_exe.data + i, left); + if (ret > 0) { + i += ret - 1; + continue; } ret = check_equiv(s_text_obj.data + i, s_text_exe.data + i, left); if (ret >= 0) { - i += ret; + i += ret - 1; continue; } printf("%x: %02x vs %02x\n", base + i, s_text_obj.data[i], s_text_exe.data[i]); - goto out; + if (bad) + goto out; + + bad = 1; } // fill removed funcs with 'int3' @@ -571,11 +639,25 @@ int main(int argc, char *argv[]) // remove relocs for (i = 0; i < s_text_obj.reloc_cnt; i++) { addr = s_text_obj.relocs[i].r_vaddr; + sym = s_text_obj.relocs[i].r_symndx; if (addr > s_text_obj.size - 4) { printf("reloc addr OOR: %x\n", addr); goto out; } - if (*(unsigned int *)(s_text_obj.data + addr) == 0xcccccccc) { + if (sym >= raw_sym_cnt_obj) { + printf("reloc sym OOR: %d/%ld\n", + sym, raw_sym_cnt_obj); + goto out; + } +#if 0 + printf("r %08x -> %08x %s\n", base + addr, + raw_syms_obj[sym].addr, + raw_syms_obj[sym].name); +#endif + t = (unsigned int *)(s_text_obj.data + addr); + if (t[0] == 0xcccccccc + || t[-1] == 0xcccccccc) { // jumptab of a func? + t[0] = 0xcccccccc; memmove(&s_text_obj.relocs[i], &s_text_obj.relocs[i + 1], (s_text_obj.reloc_cnt - i - 1) @@ -583,6 +665,20 @@ int main(int argc, char *argv[]) i--; s_text_obj.reloc_cnt--; } +#if 0 + // note: branches/calls already linked, + // so only useful for dd refs + // XXX: rm'd because of switch tables + else if (raw_syms_obj[sym].is_text) { + unsigned int addr2 = raw_syms_obj[sym].addr; + if (s_text_obj.data[addr2] == 0xcc) { + printf("warning: reloc %08x -> %08x " + "points to rm'd target '%s'\n", + base + addr, base + addr2, + raw_syms_obj[sym].name); + } + } +#endif } // patch .text