implement rewritten func fill
[ia32rtools.git] / tools / cmpmrg_text.c
CommitLineData
cfd23479 1#include <stdio.h>
2#include <stdlib.h>
3#include <string.h>
4#include <linux/coff.h>
5#include <assert.h>
6#include <stdint.h>
7
ede51255 8#include "my_assert.h"
9
cfd23479 10/* http://www.delorie.com/djgpp/doc/coff/ */
11
12typedef struct {
13 unsigned short f_magic; /* magic number */
14 unsigned short f_nscns; /* number of sections */
15 unsigned int f_timdat; /* time & date stamp */
16 unsigned int f_symptr; /* file pointer to symtab */
17 unsigned int f_nsyms; /* number of symtab entries */
18 unsigned short f_opthdr; /* sizeof(optional hdr) */
19 unsigned short f_flags; /* flags */
20} FILHDR;
21
22typedef struct {
23 unsigned short magic; /* type of file */
24 unsigned short vstamp; /* version stamp */
25 unsigned int tsize; /* text size in bytes, padded to FW bdry*/
26 unsigned int dsize; /* initialized data " " */
27 unsigned int bsize; /* uninitialized data " " */
28 unsigned int entry; /* entry pt. */
29 unsigned int text_start; /* base of text used for this file */
30 unsigned int data_start; /* base of data used for this file */
31} AOUTHDR;
32
33typedef struct {
34 char s_name[8]; /* section name */
35 unsigned int s_paddr; /* physical address, aliased s_nlib */
36 unsigned int s_vaddr; /* virtual address */
37 unsigned int s_size; /* section size */
38 unsigned int s_scnptr; /* file ptr to raw data for section */
39 unsigned int s_relptr; /* file ptr to relocation */
40 unsigned int s_lnnoptr; /* file ptr to line numbers */
41 unsigned short s_nreloc; /* number of relocation entries */
42 unsigned short s_nlnno; /* number of line number entries */
43 unsigned int s_flags; /* flags */
44} SCNHDR;
45
46typedef struct {
47 unsigned int r_vaddr; /* address of relocation */
48 unsigned int r_symndx; /* symbol we're adjusting for */
49 unsigned short r_type; /* type of relocation */
50} __attribute__((packed)) RELOC;
51
e86c9ee6 52typedef struct {
53 union {
54 char e_name[E_SYMNMLEN];
55 struct {
56 unsigned int e_zeroes;
57 unsigned int e_offset;
58 } e;
59 } e;
60 unsigned int e_value;
61 short e_scnum;
62 unsigned short e_type;
63 unsigned char e_sclass;
64 unsigned char e_numaux;
65} __attribute__((packed)) SYMENT;
66
67#define C_EXT 2
68
69struct my_symtab {
70 unsigned int addr;
71 unsigned int fpos; // for patching
72 char *name;
73};
74
e86c9ee6 75static int symt_cmp(const void *p1_, const void *p2_)
76{
77 const struct my_symtab *p1 = p1_, *p2 = p2_;
78 return p1->addr - p2->addr;
79}
80
cfd23479 81void parse_headers(FILE *f, unsigned int *base_out,
82 long *sect_ofs, uint8_t **sect_data, long *sect_sz,
e86c9ee6 83 RELOC **relocs, long *reloc_cnt,
84 struct my_symtab **symtab_out, long *sym_cnt)
cfd23479 85{
e86c9ee6 86 struct my_symtab *symt_o = NULL;
87 char *stringtab = NULL;
cfd23479 88 unsigned int base = 0;
e86c9ee6 89 int text_scnum = 0;
90 long filesize;
91 char symname[9];
cfd23479 92 long opthdr_pos;
93 long reloc_size;
94 FILHDR hdr;
95 AOUTHDR opthdr;
96 SCNHDR scnhdr;
e86c9ee6 97 SYMENT syment;
98 int i, s, val;
cfd23479 99 int ret;
100
e86c9ee6 101 ret = fseek(f, 0, SEEK_END);
102 my_assert(ret, 0);
103
104 filesize = ftell(f);
105
106 ret = fseek(f, 0, SEEK_SET);
107 my_assert(ret, 0);
108
cfd23479 109 ret = fread(&hdr, 1, sizeof(hdr), f);
110 my_assert(ret, sizeof(hdr));
111
112 if (hdr.f_magic == 0x5a4d) // MZ
113 {
114 ret = fseek(f, 0x3c, SEEK_SET);
115 my_assert(ret, 0);
116 ret = fread(&val, 1, sizeof(val), f);
117 my_assert(ret, sizeof(val));
118
119 ret = fseek(f, val, SEEK_SET);
120 my_assert(ret, 0);
121 ret = fread(&val, 1, sizeof(val), f);
122 my_assert(ret, sizeof(val));
123 my_assert(val, 0x4550); // PE
124
125 // should be COFF now
126 ret = fread(&hdr, 1, sizeof(hdr), f);
127 my_assert(ret, sizeof(hdr));
128 }
129
130 my_assert(hdr.f_magic, COFF_I386MAGIC);
131
132 if (hdr.f_opthdr != 0)
133 {
134 opthdr_pos = ftell(f);
135
136 if (hdr.f_opthdr < sizeof(opthdr))
137 my_assert(1, 0);
138
139 ret = fread(&opthdr, 1, sizeof(opthdr), f);
140 my_assert(ret, sizeof(opthdr));
141 my_assert(opthdr.magic, COFF_ZMAGIC);
142
2c605b97 143 //printf("text_start: %x\n", opthdr.text_start);
cfd23479 144
145 if (hdr.f_opthdr > sizeof(opthdr)) {
146 ret = fread(&base, 1, sizeof(base), f);
147 my_assert(ret, sizeof(base));
2c605b97 148 //printf("base: %x\n", base);
cfd23479 149 }
150 ret = fseek(f, opthdr_pos + hdr.f_opthdr, SEEK_SET);
151 my_assert(ret, 0);
152 }
153
e86c9ee6 154 // note: assuming first non-empty one is .text ..
cfd23479 155 for (s = 0; s < hdr.f_nscns; s++) {
156 ret = fread(&scnhdr, 1, sizeof(scnhdr), f);
157 my_assert(ret, sizeof(scnhdr));
158
e86c9ee6 159 if (scnhdr.s_size != 0) {
160 text_scnum = s + 1;
cfd23479 161 break;
e86c9ee6 162 }
cfd23479 163 }
164
2c605b97 165#if 0
e86c9ee6 166 printf("f_nsyms: %x\n", hdr.f_nsyms);
cfd23479 167 printf("s_name: '%s'\n", scnhdr.s_name);
168 printf("s_vaddr: %x\n", scnhdr.s_vaddr);
169 printf("s_size: %x\n", scnhdr.s_size);
e86c9ee6 170 //printf("s_scnptr: %x\n", scnhdr.s_scnptr);
cfd23479 171 printf("s_nreloc: %x\n", scnhdr.s_nreloc);
172 printf("--\n");
2c605b97 173#endif
cfd23479 174
175 ret = fseek(f, scnhdr.s_scnptr, SEEK_SET);
176 my_assert(ret, 0);
177
178 *sect_data = malloc(scnhdr.s_size);
179 my_assert_not(*sect_data, NULL);
180 ret = fread(*sect_data, 1, scnhdr.s_size, f);
181 my_assert(ret, scnhdr.s_size);
182
183 *sect_ofs = scnhdr.s_scnptr;
184 *sect_sz = scnhdr.s_size;
185
e86c9ee6 186 // relocs
cfd23479 187 ret = fseek(f, scnhdr.s_relptr, SEEK_SET);
188 my_assert(ret, 0);
189
190 reloc_size = scnhdr.s_nreloc * sizeof((*relocs)[0]);
191 *relocs = malloc(reloc_size + 1);
192 my_assert_not(*relocs, NULL);
193 ret = fread(*relocs, 1, reloc_size, f);
194 my_assert(ret, reloc_size);
195
196 *reloc_cnt = scnhdr.s_nreloc;
197
e86c9ee6 198 // symtab
199 if (hdr.f_nsyms != 0) {
200 symname[8] = 0;
201
202 symt_o = malloc(hdr.f_nsyms * sizeof(symt_o[0]) + 1);
203 my_assert_not(symt_o, NULL);
204
205 ret = fseek(f, hdr.f_symptr
206 + hdr.f_nsyms * sizeof(syment), SEEK_SET);
207 my_assert(ret, 0);
208 ret = fread(&i, 1, sizeof(i), f);
209 my_assert(ret, sizeof(i));
210 my_assert((unsigned int)i < filesize, 1);
211
212 stringtab = malloc(i);
213 my_assert_not(stringtab, NULL);
214 memset(stringtab, 0, 4);
215 ret = fread(stringtab + 4, 1, i - 4, f);
216 my_assert(ret, i - 4);
217
218 ret = fseek(f, hdr.f_symptr, SEEK_SET);
219 my_assert(ret, 0);
220 }
221
222 for (i = s = 0; i < hdr.f_nsyms; i++) {
223 long pos = ftell(f);
224
225 ret = fread(&syment, 1, sizeof(syment), f);
226 my_assert(ret, sizeof(syment));
227
228 strncpy(symname, syment.e.e_name, 8);
229 //printf("%3d %2d %08x '%s'\n", syment.e_sclass,
230 // syment.e_scnum, syment.e_value, symname);
231
232 if (syment.e_scnum != text_scnum || syment.e_sclass != C_EXT)
233 continue;
234
235 symt_o[s].addr = syment.e_value;
236 symt_o[s].fpos = pos;
237 if (syment.e.e.e_zeroes == 0)
238 symt_o[s].name = stringtab + syment.e.e.e_offset;
239 else
240 symt_o[s].name = strdup(symname);
241 s++;
242
243 if (syment.e_numaux) {
244 ret = fseek(f, syment.e_numaux * sizeof(syment),
245 SEEK_CUR);
246 my_assert(ret, 0);
247 i += syment.e_numaux;
248 }
249 }
250
251 if (symt_o != NULL)
252 qsort(symt_o, s, sizeof(symt_o[0]), symt_cmp);
253
254 *sym_cnt = s;
255 *symtab_out = symt_o;
256
2c605b97 257 // seek to .text start
258 ret = fseek(f, scnhdr.s_scnptr, SEEK_SET);
259 my_assert(ret, 0);
260
e86c9ee6 261 if (base != 0 && base_out != NULL)
cfd23479 262 *base_out = base + scnhdr.s_vaddr;
263}
264
265static int handle_pad(uint8_t *d_obj, uint8_t *d_exe, int maxlen)
266{
267 static const uint8_t p7[7] = { 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00 };
268 static const uint8_t p6[6] = { 0x8d, 0x9b, 0x00, 0x00, 0x00, 0x00 };
269 static const uint8_t p5[5] = { 0x05, 0x00, 0x00, 0x00, 0x00 };
270 static const uint8_t p4[4] = { 0x8d, 0x64, 0x24, 0x00 };
271 static const uint8_t p3[3] = { 0x8d, 0x49, 0x00 };
272 static const uint8_t p2[2] = { 0x8b, 0xff };
273 static const uint8_t p1[1] = { 0x90 };
274 int len;
275 int i;
276
277 for (i = 0; i < maxlen; i++)
278 if (d_exe[i] != 0xcc)
279 break;
280
281 for (len = i; len > 0; )
282 {
283 i = len;
284 if (i > 7)
285 i = 7;
286
287 switch (i) {
288 #define CASE(x) \
289 case sizeof(p ## x): \
290 if (memcmp(d_obj, p ## x, sizeof(p ## x))) \
291 return 0; \
292 memset(d_obj, 0xcc, sizeof(p ## x)); \
293 break;
294 CASE(7)
295 CASE(6)
296 CASE(5)
297 CASE(4)
298 CASE(3)
299 CASE(2)
300 CASE(1)
301 default:
302 printf("%s: unhandled len: %d\n", __func__, len);
303 return 0;
304 #undef CASE
305 }
306
307 len -= i;
308 d_obj += i;
309 }
310
311 return 1;
312}
313
314struct equiv_opcode {
315 signed char len;
316 signed char ofs;
317 short cmp_rm;
318 uint8_t v_masm[8];
319 uint8_t v_masm_mask[8];
320 uint8_t v_msvc[8];
321 uint8_t v_msvc_mask[8];
322} equiv_ops[] = {
323 // cmp $0x11,%ax
324 { 4, -1, 0,
325 { 0x66,0x83,0xf8,0x03 }, { 0xff,0xff,0xff,0x00 },
326 { 0x66,0x3d,0x03,0x00 }, { 0xff,0xff,0x00,0xff }, },
327 // lea -0x1(%ebx,%eax,1),%esi // op mod/rm sib offs
328 // mov, test, imm grp 1
329 { 3, -2, 1,
330 { 0x8d,0x74,0x03 }, { 0xf0,0x07,0xc0 },
331 { 0x8d,0x74,0x18 }, { 0xf0,0x07,0xc0 }, },
332 // movzbl 0x58f24a(%eax,%ecx,1),%eax
333 { 4, -3, 1,
334 { 0x0f,0xb6,0x84,0x08 }, { 0xff,0xff,0x07,0xc0 },
335 { 0x0f,0xb6,0x84,0x01 }, { 0xff,0xff,0x07,0xc0 }, },
336 // inc/dec
337 { 3, -2, 1,
338 { 0xfe,0x4c,0x03 }, { 0xfe,0xff,0xc0 },
339 { 0xfe,0x4c,0x18 }, { 0xfe,0xff,0xc0 }, },
340 // cmp
341 { 3, -2, 1,
342 { 0x38,0x0c,0x0c }, { 0xff,0xff,0xc0 },
343 { 0x38,0x0c,0x30 }, { 0xff,0xff,0xc0 }, },
344 // test %dl,%bl
345 { 2, -1, 1,
346 { 0x84,0xd3 }, { 0xfe,0xc0 },
347 { 0x84,0xda }, { 0xfe,0xc0 }, },
348 // cmp r,r/m vs rm/r
349 { 2, 0, 1,
350 { 0x3a,0xca }, { 0xff,0xc0 },
351 { 0x38,0xd1 }, { 0xff,0xc0 }, },
352 // rep + 66 prefix
353 { 2, 0, 0,
354 { 0xf3,0x66 }, { 0xfe,0xff },
355 { 0x66,0xf3 }, { 0xff,0xfe }, },
356 // fadd st, st(0) vs st(0), st
357 { 2, 0, 0,
358 { 0xd8,0xc0 }, { 0xff,0xf7 },
359 { 0xdc,0xc0 }, { 0xff,0xf7 }, },
360
361 // broad filters (may take too much..)
362 // testb $0x4,0x1d(%esi,%eax,1)
363 // movb, push, ..
364 { 3, -2, 1,
365 { 0xf6,0x44,0x06 }, { 0x00,0x07,0xc0 },
366 { 0xf6,0x44,0x30 }, { 0x00,0x07,0xc0 }, },
367};
368
369static int cmp_mask(uint8_t *d, uint8_t *expect, uint8_t *mask, int len)
370{
371 int i;
372
373 for (i = 0; i < len; i++)
374 if ((d[i] & mask[i]) != (expect[i] & mask[i]))
375 return 1;
376
377 return 0;
378}
379
380static int check_equiv(uint8_t *d_obj, uint8_t *d_exe, int maxlen)
381{
382 uint8_t vo, ve, vo2, ve2;
383 int i, jo, je;
384 int len, ofs;
385
386 for (i = 0; i < sizeof(equiv_ops) / sizeof(equiv_ops[0]); i++)
387 {
388 struct equiv_opcode *op = &equiv_ops[i];
389
390 len = op->len;
391 if (maxlen < len)
392 continue;
393
394 ofs = op->ofs;
395 if (cmp_mask(d_obj + ofs, op->v_masm,
396 op->v_masm_mask, len))
397 continue;
398 if (cmp_mask(d_exe + ofs, op->v_msvc,
399 op->v_msvc_mask, len))
400 continue;
401
402 jo = je = 0;
403 d_obj += ofs;
404 d_exe += ofs;
405 while (1)
406 {
407 for (; jo < len; jo++)
408 if (op->v_masm_mask[jo] != 0xff)
409 break;
410 for (; je < len; je++)
411 if (op->v_msvc_mask[je] != 0xff)
412 break;
413
414 if ((jo == len && je != len) || (jo != len && je == len)) {
415 printf("invalid equiv_ops\n");
416 return -1;
417 }
418 if (jo == len)
419 return len + ofs - 1; // matched
420
421 // var byte
422 vo = d_obj[jo] & ~op->v_masm_mask[jo];
423 ve = d_exe[je] & ~op->v_msvc_mask[je];
424 if (op->cmp_rm && op->v_masm_mask[jo] == 0xc0) {
425 vo2 = vo >> 3;
426 vo &= 7;
427 ve2 = ve & 7;
428 ve >>= 3;
429 if (vo != ve || vo2 != ve2)
430 return -1;
431 }
432 else {
433 if (vo != ve)
434 return -1;
435 }
436
437 jo++;
438 je++;
439 }
440 }
441
442 return -1;
443}
444
445int main(int argc, char *argv[])
446{
447 FILE *f_obj, *f_exe;
448 long text_ofs_obj, text_ofs_exe;
449 long sztext_obj, sztext_exe, sztext_cmn;
450 RELOC *relocs_obj, *relocs_exe;
e86c9ee6 451 long reloc_cnt_obj, reloc_cnt_exe;
452 struct my_symtab *syms_obj, *syms_exe;
453 long sym_cnt_obj, sym_cnt_exe;
cfd23479 454 uint8_t *d_obj, *d_exe;
2c605b97 455 unsigned int base = 0, addr, end;
cfd23479 456 int retval = 1;
457 int left;
458 int ret;
459 int i;
460
461 if (argc != 3) {
e86c9ee6 462 printf("usage:\n%s <a_obj> <exe>\n", argv[0]);
cfd23479 463 return 1;
464 }
465
466 f_obj = fopen(argv[1], "r+b");
467 if (f_obj == NULL) {
468 fprintf(stderr, "%s", argv[1]);
469 perror("");
470 return 1;
471 }
472
473 f_exe = fopen(argv[2], "r");
474 if (f_exe == NULL) {
e86c9ee6 475 fprintf(stderr, "%s", argv[2]);
cfd23479 476 perror("");
477 return 1;
478 }
479
e86c9ee6 480 parse_headers(f_obj, NULL, &text_ofs_obj, &d_obj, &sztext_obj,
481 &relocs_obj, &reloc_cnt_obj, &syms_obj, &sym_cnt_obj);
cfd23479 482 parse_headers(f_exe, &base, &text_ofs_exe, &d_exe, &sztext_exe,
e86c9ee6 483 &relocs_exe, &reloc_cnt_exe, &syms_exe, &sym_cnt_exe);
cfd23479 484
485 sztext_cmn = sztext_obj;
486 if (sztext_cmn > sztext_exe)
487 sztext_cmn = sztext_exe;
cfd23479 488
489 if (sztext_cmn == 0) {
490 printf("bad .text size(s): %ld, %ld\n",
491 sztext_obj, sztext_exe);
492 return 1;
493 }
494
e86c9ee6 495 for (i = 0; i < reloc_cnt_obj; i++)
cfd23479 496 {
497 unsigned int a = relocs_obj[i].r_vaddr;
498 //printf("%04x %08x\n", relocs_obj[i].r_type, a);
499
500 switch (relocs_obj[i].r_type) {
2c605b97 501 case 0x06: // RELOC_ADDR32
502 case 0x14: // RELOC_REL32
503 // must preserve stored val,
504 // so trash d_exe so that cmp passes
505 memcpy(d_exe + a, d_obj + a, 4);
cfd23479 506 break;
507 default:
2c605b97 508 printf("unknown reloc %x @%08x/%08x\n",
509 relocs_obj[i].r_type, a, base + a);
cfd23479 510 return 1;
511 }
512 }
513
514 for (i = 0; i < sztext_cmn; i++)
515 {
516 if (d_obj[i] == d_exe[i])
517 continue;
518
519 left = sztext_cmn - i;
520
521 if (d_exe[i] == 0xcc) { // padding
522 if (handle_pad(d_obj + i, d_exe + i, left))
523 continue;
524 }
525
526 ret = check_equiv(d_obj + i, d_exe + i, left);
527 if (ret >= 0) {
528 i += ret;
529 continue;
530 }
531
532 printf("%x: %02x vs %02x\n", base + i, d_obj[i], d_exe[i]);
533 goto out;
534 }
535
2c605b97 536 for (i = 0; i < sym_cnt_obj; i++) {
537 if (strncmp(syms_obj[i].name, "rm_", 3))
538 continue;
539
540 addr = syms_obj[i].addr;
541 end = (i < sym_cnt_obj - 1)
542 ? syms_obj[i + 1].addr : sztext_obj;
543 if (addr >= sztext_obj || end > sztext_obj) {
544 printf("addr OOR: %x-%x '%s'\n", addr, end,
545 syms_obj[i].name);
546 goto out;
547 }
548 memset(d_obj + addr, 0xcc, end - addr);
549 }
550
551 // parse_headers has set pos to .text
552 ret = fwrite(d_obj, 1, sztext_obj, f_obj);
553 my_assert(ret, sztext_obj);
554
555 fclose(f_obj);
556 fclose(f_exe);
557
558 retval = 0;
cfd23479 559out:
560 return retval;
561}