4 #include <linux/coff.h>
10 /* http://www.delorie.com/djgpp/doc/coff/ */
13 unsigned short f_magic; /* magic number */
14 unsigned short f_nscns; /* number of sections */
15 unsigned int f_timdat; /* time & date stamp */
16 unsigned int f_symptr; /* file pointer to symtab */
17 unsigned int f_nsyms; /* number of symtab entries */
18 unsigned short f_opthdr; /* sizeof(optional hdr) */
19 unsigned short f_flags; /* flags */
23 unsigned short magic; /* type of file */
24 unsigned short vstamp; /* version stamp */
25 unsigned int tsize; /* text size in bytes, padded to FW bdry*/
26 unsigned int dsize; /* initialized data " " */
27 unsigned int bsize; /* uninitialized data " " */
28 unsigned int entry; /* entry pt. */
29 unsigned int text_start; /* base of text used for this file */
30 unsigned int data_start; /* base of data used for this file */
34 char s_name[8]; /* section name */
35 unsigned int s_paddr; /* physical address, aliased s_nlib */
36 unsigned int s_vaddr; /* virtual address */
37 unsigned int s_size; /* section size */
38 unsigned int s_scnptr; /* file ptr to raw data for section */
39 unsigned int s_relptr; /* file ptr to relocation */
40 unsigned int s_lnnoptr; /* file ptr to line numbers */
41 unsigned short s_nreloc; /* number of relocation entries */
42 unsigned short s_nlnno; /* number of line number entries */
43 unsigned int s_flags; /* flags */
47 unsigned int r_vaddr; /* address of relocation */
48 unsigned int r_symndx; /* symbol we're adjusting for */
49 unsigned short r_type; /* type of relocation */
50 } __attribute__((packed)) RELOC;
54 char e_name[E_SYMNMLEN];
56 unsigned int e_zeroes;
57 unsigned int e_offset;
62 unsigned short e_type;
63 unsigned char e_sclass;
64 unsigned char e_numaux;
65 } __attribute__((packed)) SYMENT;
71 //unsigned int fpos; // for patching
72 unsigned int is_text:1;
86 static int symt_cmp(const void *p1_, const void *p2_)
88 const struct my_symtab *p1 = p1_, *p2 = p2_;
89 return p1->addr - p2->addr;
92 void parse_headers(FILE *f, unsigned int *base_out,
93 struct my_sect_info *sect_i,
94 struct my_symtab **symtab_out, long *sym_cnt,
95 struct my_symtab **raw_symtab_out, long *raw_sym_cnt)
97 struct my_symtab *symt_txt = NULL;
98 struct my_symtab *symt_all = NULL;
99 char *stringtab = NULL;
100 unsigned int base = 0;
113 ret = fseek(f, 0, SEEK_END);
118 ret = fseek(f, 0, SEEK_SET);
121 ret = fread(&hdr, 1, sizeof(hdr), f);
122 my_assert(ret, sizeof(hdr));
124 if (hdr.f_magic == 0x5a4d) // MZ
126 ret = fseek(f, 0x3c, SEEK_SET);
128 ret = fread(&val, 1, sizeof(val), f);
129 my_assert(ret, sizeof(val));
131 ret = fseek(f, val, SEEK_SET);
133 ret = fread(&val, 1, sizeof(val), f);
134 my_assert(ret, sizeof(val));
135 my_assert(val, 0x4550); // PE
137 // should be COFF now
138 ret = fread(&hdr, 1, sizeof(hdr), f);
139 my_assert(ret, sizeof(hdr));
142 my_assert(hdr.f_magic, COFF_I386MAGIC);
144 if (hdr.f_opthdr != 0)
146 opthdr_pos = ftell(f);
148 if (hdr.f_opthdr < sizeof(opthdr))
151 ret = fread(&opthdr, 1, sizeof(opthdr), f);
152 my_assert(ret, sizeof(opthdr));
153 my_assert(opthdr.magic, COFF_ZMAGIC);
155 //printf("text_start: %x\n", opthdr.text_start);
157 if (hdr.f_opthdr > sizeof(opthdr)) {
158 ret = fread(&base, 1, sizeof(base), f);
159 my_assert(ret, sizeof(base));
160 //printf("base: %x\n", base);
162 ret = fseek(f, opthdr_pos + hdr.f_opthdr, SEEK_SET);
166 // note: assuming first non-empty one is .text ..
167 for (s = 0; s < hdr.f_nscns; s++) {
168 sect_i->scnhdr_fofs = ftell(f);
170 ret = fread(&scnhdr, 1, sizeof(scnhdr), f);
171 my_assert(ret, sizeof(scnhdr));
173 if (scnhdr.s_size != 0) {
178 my_assert(s < hdr.f_nscns, 1);
181 printf("f_nsyms: %x\n", hdr.f_nsyms);
182 printf("s_name: '%s'\n", scnhdr.s_name);
183 printf("s_vaddr: %x\n", scnhdr.s_vaddr);
184 printf("s_size: %x\n", scnhdr.s_size);
185 //printf("s_scnptr: %x\n", scnhdr.s_scnptr);
186 printf("s_nreloc: %x\n", scnhdr.s_nreloc);
190 ret = fseek(f, scnhdr.s_scnptr, SEEK_SET);
193 sect_i->data = malloc(scnhdr.s_size);
194 my_assert_not(sect_i->data, NULL);
195 ret = fread(sect_i->data, 1, scnhdr.s_size, f);
196 my_assert(ret, scnhdr.s_size);
198 sect_i->sect_fofs = scnhdr.s_scnptr;
199 sect_i->size = scnhdr.s_size;
202 ret = fseek(f, scnhdr.s_relptr, SEEK_SET);
205 reloc_size = scnhdr.s_nreloc * sizeof(sect_i->relocs[0]);
206 sect_i->relocs = malloc(reloc_size + 1);
207 my_assert_not(sect_i->relocs, NULL);
208 ret = fread(sect_i->relocs, 1, reloc_size, f);
209 my_assert(ret, reloc_size);
211 sect_i->reloc_cnt = scnhdr.s_nreloc;
212 sect_i->reloc_fofs = scnhdr.s_relptr;
214 if (base != 0 && base_out != NULL)
215 *base_out = base + scnhdr.s_vaddr;
217 if (symtab_out == NULL || sym_cnt == NULL)
221 if (hdr.f_nsyms != 0) {
224 symt_txt = malloc(hdr.f_nsyms * sizeof(symt_txt[0]) + 1);
225 my_assert_not(symt_txt, NULL);
226 symt_all = malloc(hdr.f_nsyms * sizeof(symt_all[0]) + 1);
227 my_assert_not(symt_all, NULL);
229 ret = fseek(f, hdr.f_symptr
230 + hdr.f_nsyms * sizeof(syment), SEEK_SET);
232 ret = fread(&i, 1, sizeof(i), f);
233 my_assert(ret, sizeof(i));
234 my_assert((unsigned int)i < filesize, 1);
236 stringtab = malloc(i);
237 my_assert_not(stringtab, NULL);
238 memset(stringtab, 0, 4);
239 ret = fread(stringtab + 4, 1, i - 4, f);
240 my_assert(ret, i - 4);
242 ret = fseek(f, hdr.f_symptr, SEEK_SET);
246 for (i = s = 0; i < hdr.f_nsyms; i++) {
247 //long pos = ftell(f);
249 ret = fread(&syment, 1, sizeof(syment), f);
250 my_assert(ret, sizeof(syment));
252 strncpy(symname, syment.e.e_name, 8);
253 //printf("%3d %2d %08x '%s'\n", syment.e_sclass,
254 // syment.e_scnum, syment.e_value, symname);
256 symt_all[i].addr = syment.e_value;
257 //symt_all[i].fpos = pos;
258 if (syment.e.e.e_zeroes == 0)
259 symt_all[i].name = stringtab + syment.e.e.e_offset;
261 symt_all[i].name = strdup(symname);
263 symt_all[i].is_text = (syment.e_scnum == text_scnum);
264 if (symt_all[i].is_text && syment.e_sclass == C_EXT) {
265 symt_txt[s] = symt_all[i];
269 if (syment.e_numaux) {
270 ret = fseek(f, syment.e_numaux * sizeof(syment),
273 i += syment.e_numaux;
277 if (symt_txt != NULL)
278 qsort(symt_txt, s, sizeof(symt_txt[0]), symt_cmp);
281 *symtab_out = symt_txt;
283 *raw_symtab_out = symt_all;
286 static int handle_pad(uint8_t *d_obj, uint8_t *d_exe, int maxlen)
288 static const uint8_t p7[7] = { 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00 };
289 static const uint8_t p6[6] = { 0x8d, 0x9b, 0x00, 0x00, 0x00, 0x00 };
290 static const uint8_t p5[5] = { 0x05, 0x00, 0x00, 0x00, 0x00 }; // add eax, 0
291 static const uint8_t p4[4] = { 0x8d, 0x64, 0x24, 0x00 }; // lea
292 static const uint8_t p3[3] = { 0x8d, 0x49, 0x00 }; // lea ecx, [ecx]
293 static const uint8_t p2[2] = { 0x8b, 0xff }; // mov edi, edi
294 static const uint8_t p1[1] = { 0x90 }; // nop
298 for (i = 0; i < maxlen; i++)
299 if (d_exe[i] != 0xcc)
302 for (len = i; len > 0; )
310 case sizeof(p ## x): \
311 if (memcmp(d_obj, p ## x, sizeof(p ## x))) \
313 memset(d_obj, 0xcc, sizeof(p ## x)); \
323 printf("%s: unhandled len: %d\n", __func__, len);
335 struct equiv_opcode {
340 uint8_t v_masm_mask[8];
342 uint8_t v_msvc_mask[8];
346 { 0x66,0x83,0xf8,0x03 }, { 0xff,0xff,0xff,0x00 },
347 { 0x66,0x3d,0x03,0x00 }, { 0xff,0xff,0x00,0xff }, },
348 // lea -0x1(%ebx,%eax,1),%esi // op mod/rm sib offs
349 // mov, test, imm grp 1
351 { 0x8d,0x74,0x03 }, { 0xf0,0x07,0xc0 },
352 { 0x8d,0x74,0x18 }, { 0xf0,0x07,0xc0 }, },
353 // movzbl 0x58f24a(%eax,%ecx,1),%eax
355 { 0x0f,0xb6,0x84,0x08 }, { 0xff,0xff,0x07,0xc0 },
356 { 0x0f,0xb6,0x84,0x01 }, { 0xff,0xff,0x07,0xc0 }, },
359 { 0xfe,0x4c,0x03 }, { 0xfe,0xff,0xc0 },
360 { 0xfe,0x4c,0x18 }, { 0xfe,0xff,0xc0 }, },
363 { 0x38,0x0c,0x0c }, { 0xff,0xff,0xc0 },
364 { 0x38,0x0c,0x30 }, { 0xff,0xff,0xc0 }, },
367 { 0x84,0xd3 }, { 0xfe,0xc0 },
368 { 0x84,0xda }, { 0xfe,0xc0 }, },
371 { 0x3a,0xca }, { 0xff,0xc0 },
372 { 0x38,0xd1 }, { 0xff,0xc0 }, },
375 { 0xf3,0x66 }, { 0xfe,0xff },
376 { 0x66,0xf3 }, { 0xff,0xfe }, },
377 // fadd st, st(0) vs st(0), st
379 { 0xd8,0xc0 }, { 0xff,0xf7 },
380 { 0xdc,0xc0 }, { 0xff,0xf7 }, },
382 // broad filters (may take too much..)
383 // testb $0x4,0x1d(%esi,%eax,1)
386 { 0xf6,0x44,0x06 }, { 0x00,0x07,0xc0 },
387 { 0xf6,0x44,0x30 }, { 0x00,0x07,0xc0 }, },
390 static int cmp_mask(uint8_t *d, uint8_t *expect, uint8_t *mask, int len)
394 for (i = 0; i < len; i++)
395 if ((d[i] & mask[i]) != (expect[i] & mask[i]))
401 static int check_equiv(uint8_t *d_obj, uint8_t *d_exe, int maxlen)
403 uint8_t vo, ve, vo2, ve2;
407 for (i = 0; i < sizeof(equiv_ops) / sizeof(equiv_ops[0]); i++)
409 struct equiv_opcode *op = &equiv_ops[i];
416 if (cmp_mask(d_obj + ofs, op->v_masm,
417 op->v_masm_mask, len))
419 if (cmp_mask(d_exe + ofs, op->v_msvc,
420 op->v_msvc_mask, len))
428 for (; jo < len; jo++)
429 if (op->v_masm_mask[jo] != 0xff)
431 for (; je < len; je++)
432 if (op->v_msvc_mask[je] != 0xff)
435 if ((jo == len && je != len) || (jo != len && je == len)) {
436 printf("invalid equiv_ops\n");
440 return len + ofs - 1; // matched
443 vo = d_obj[jo] & ~op->v_masm_mask[jo];
444 ve = d_exe[je] & ~op->v_msvc_mask[je];
445 if (op->cmp_rm && op->v_masm_mask[jo] == 0xc0) {
450 if (vo != ve || vo2 != ve2)
466 static void fill_int3(unsigned char *d, int len)
469 if (d[0] == 0xcc && d[1] == 0xcc)
475 int main(int argc, char *argv[])
477 unsigned int base = 0, addr, end, sym, *t;
478 struct my_sect_info s_text_obj, s_text_exe;
479 struct my_symtab *raw_syms_obj = NULL;
480 struct my_symtab *syms_obj = NULL;
481 long sym_cnt_obj, raw_sym_cnt_obj;
492 for (arg = 1; arg < argc; arg++) {
493 if (!strcmp(argv[arg], "-n"))
499 if (argc != arg + 2) {
500 printf("usage:\n%s [-n] <a_obj> <exe>\n", argv[0]);
504 f_obj = fopen(argv[arg++], "r+b");
506 fprintf(stderr, "%s: ", argv[1]);
511 f_exe = fopen(argv[arg++], "r");
513 fprintf(stderr, "%s: ", argv[2]);
518 parse_headers(f_obj, NULL, &s_text_obj, &syms_obj, &sym_cnt_obj,
519 &raw_syms_obj, &raw_sym_cnt_obj);
520 parse_headers(f_exe, &base, &s_text_exe, NULL, NULL, NULL, NULL);
522 sztext_cmn = s_text_obj.size;
523 if (sztext_cmn > s_text_exe.size)
524 sztext_cmn = s_text_exe.size;
526 if (sztext_cmn == 0) {
527 printf("bad .text size(s): %ld, %ld\n",
528 s_text_obj.size, s_text_exe.size);
532 for (i = 0; i < s_text_obj.reloc_cnt; i++)
534 unsigned int a = s_text_obj.relocs[i].r_vaddr;
535 //printf("%04x %08x\n", s_text_obj.relocs[i].r_type, a);
537 switch (s_text_obj.relocs[i].r_type) {
538 case 0x06: // RELOC_ADDR32
539 case 0x14: // RELOC_REL32
540 // must preserve stored val,
541 // so trash exe so that cmp passes
542 memcpy(s_text_exe.data + a, s_text_obj.data + a, 4);
545 printf("unknown reloc %x @%08x/%08x\n",
546 s_text_obj.relocs[i].r_type, a, base + a);
552 for (i = 0; i < sztext_cmn; i++)
554 if (s_text_obj.data[i] == s_text_exe.data[i])
557 left = sztext_cmn - i;
559 if (s_text_exe.data[i] == 0xcc) { // padding
560 if (handle_pad(s_text_obj.data + i,
561 s_text_exe.data + i, left))
565 ret = check_equiv(s_text_obj.data + i, s_text_exe.data + i, left);
571 printf("%x: %02x vs %02x\n", base + i,
572 s_text_obj.data[i], s_text_exe.data[i]);
576 // fill removed funcs with 'int3'
577 for (i = 0; i < sym_cnt_obj; i++) {
578 if (strncmp(syms_obj[i].name, "rm_", 3))
581 addr = syms_obj[i].addr;
582 end = (i < sym_cnt_obj - 1)
583 ? syms_obj[i + 1].addr : s_text_obj.size;
584 if (addr >= s_text_obj.size || end > s_text_obj.size) {
585 printf("addr OOR: %x-%x '%s'\n", addr, end,
589 fill_int3(s_text_obj.data + addr, end - addr);
593 for (i = 0; i < s_text_obj.reloc_cnt; i++) {
594 addr = s_text_obj.relocs[i].r_vaddr;
595 sym = s_text_obj.relocs[i].r_symndx;
596 if (addr > s_text_obj.size - 4) {
597 printf("reloc addr OOR: %x\n", addr);
600 if (sym >= raw_sym_cnt_obj) {
601 printf("reloc sym OOR: %d/%ld\n",
602 sym, raw_sym_cnt_obj);
606 printf("r %08x -> %08x %s\n", base + addr,
607 raw_syms_obj[sym].addr,
608 raw_syms_obj[sym].name);
610 t = (unsigned int *)(s_text_obj.data + addr);
611 if (t[0] == 0xcccccccc
612 || t[-1] == 0xcccccccc) { // jumptab of a func?
614 memmove(&s_text_obj.relocs[i],
615 &s_text_obj.relocs[i + 1],
616 (s_text_obj.reloc_cnt - i - 1)
617 * sizeof(s_text_obj.relocs[0]));
619 s_text_obj.reloc_cnt--;
622 // note: branches/calls already linked,
623 // so only useful for dd refs
624 // XXX: rm'd because of switch tables
625 else if (raw_syms_obj[sym].is_text) {
626 unsigned int addr2 = raw_syms_obj[sym].addr;
627 if (s_text_obj.data[addr2] == 0xcc) {
628 printf("warning: reloc %08x -> %08x "
629 "points to rm'd target '%s'\n",
630 base + addr, base + addr2,
631 raw_syms_obj[sym].name);
638 ret = fseek(f_obj, s_text_obj.sect_fofs, SEEK_SET);
640 ret = fwrite(s_text_obj.data, 1, s_text_obj.size, f_obj);
641 my_assert(ret, s_text_obj.size);
644 ret = fseek(f_obj, s_text_obj.reloc_fofs, SEEK_SET);
646 ret = fwrite(s_text_obj.relocs, sizeof(s_text_obj.relocs[0]),
647 s_text_obj.reloc_cnt, f_obj);
648 my_assert(ret, s_text_obj.reloc_cnt);
650 ret = fseek(f_obj, s_text_obj.scnhdr_fofs, SEEK_SET);
652 ret = fread(&tmphdr, 1, sizeof(tmphdr), f_obj);
653 my_assert(ret, sizeof(tmphdr));
655 tmphdr.s_nreloc = s_text_obj.reloc_cnt;
657 ret = fseek(f_obj, s_text_obj.scnhdr_fofs, SEEK_SET);
659 ret = fwrite(&tmphdr, 1, sizeof(tmphdr), f_obj);
660 my_assert(ret, sizeof(tmphdr));