.text cmp tool, updates for it to pass
[ia32rtools.git] / cmpmrg_text.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <linux/coff.h>
5 #include <assert.h>
6 #include <stdint.h>
7
8 /* http://www.delorie.com/djgpp/doc/coff/ */
9
10 typedef struct {
11   unsigned short f_magic;         /* magic number             */
12   unsigned short f_nscns;         /* number of sections       */
13   unsigned int   f_timdat;        /* time & date stamp        */
14   unsigned int   f_symptr;        /* file pointer to symtab   */
15   unsigned int   f_nsyms;         /* number of symtab entries */
16   unsigned short f_opthdr;        /* sizeof(optional hdr)     */
17   unsigned short f_flags;         /* flags                    */
18 } FILHDR;
19
20 typedef struct {
21   unsigned short magic;          /* type of file                         */
22   unsigned short vstamp;         /* version stamp                        */
23   unsigned int   tsize;          /* text size in bytes, padded to FW bdry*/
24   unsigned int   dsize;          /* initialized data    "  "             */
25   unsigned int   bsize;          /* uninitialized data  "  "             */
26   unsigned int   entry;          /* entry pt.                            */
27   unsigned int   text_start;     /* base of text used for this file      */
28   unsigned int   data_start;     /* base of data used for this file      */
29 } AOUTHDR;
30
31 typedef struct {
32   char           s_name[8];  /* section name                     */
33   unsigned int   s_paddr;    /* physical address, aliased s_nlib */
34   unsigned int   s_vaddr;    /* virtual address                  */
35   unsigned int   s_size;     /* section size                     */
36   unsigned int   s_scnptr;   /* file ptr to raw data for section */
37   unsigned int   s_relptr;   /* file ptr to relocation           */
38   unsigned int   s_lnnoptr;  /* file ptr to line numbers         */
39   unsigned short s_nreloc;   /* number of relocation entries     */
40   unsigned short s_nlnno;    /* number of line number entries    */
41   unsigned int   s_flags;    /* flags                            */
42 } SCNHDR;
43
44 typedef struct {
45   unsigned int  r_vaddr;   /* address of relocation      */
46   unsigned int  r_symndx;  /* symbol we're adjusting for */
47   unsigned short r_type;    /* type of relocation         */
48 } __attribute__((packed)) RELOC;
49
50 static void my_assert_(int line, const char *name, long v, long expect, int is_eq)
51 {
52         int ok;
53         if (is_eq)
54                 ok = (v == expect);
55         else
56                 ok = (v != expect);
57
58         if (!ok)
59         {
60                 printf("%d: '%s' is %lx, need %s%lx\n", line, name,
61                         v, is_eq ? "" : "!", expect);
62                 exit(1);
63         }
64 }
65 #define my_assert(v, exp) \
66         my_assert_(__LINE__, #v, (long)(v), (long)(exp), 1)
67 #define my_assert_not(v, exp) \
68         my_assert_(__LINE__, #v, (long)(v), (long)(exp), 0)
69
70 void parse_headers(FILE *f, unsigned int *base_out,
71         long *sect_ofs, uint8_t **sect_data, long *sect_sz,
72         RELOC **relocs, long *reloc_cnt)
73 {
74         unsigned int base = 0;
75         long opthdr_pos;
76         long reloc_size;
77         FILHDR hdr;
78         AOUTHDR opthdr;
79         SCNHDR scnhdr;
80         int s, val;
81         int ret;
82         
83         ret = fread(&hdr, 1, sizeof(hdr), f);
84         my_assert(ret, sizeof(hdr));
85
86         if (hdr.f_magic == 0x5a4d) // MZ
87         {
88                 ret = fseek(f, 0x3c, SEEK_SET);
89                 my_assert(ret, 0);
90                 ret = fread(&val, 1, sizeof(val), f);
91                 my_assert(ret, sizeof(val));
92
93                 ret = fseek(f, val, SEEK_SET);
94                 my_assert(ret, 0);
95                 ret = fread(&val, 1, sizeof(val), f);
96                 my_assert(ret, sizeof(val));
97                 my_assert(val, 0x4550); // PE
98
99                 // should be COFF now
100                 ret = fread(&hdr, 1, sizeof(hdr), f);
101                 my_assert(ret, sizeof(hdr));
102         }
103
104         my_assert(hdr.f_magic, COFF_I386MAGIC);
105
106         if (hdr.f_opthdr != 0)
107         {
108                 opthdr_pos = ftell(f);
109
110                 if (hdr.f_opthdr < sizeof(opthdr))
111                         my_assert(1, 0);
112
113                 ret = fread(&opthdr, 1, sizeof(opthdr), f);
114                 my_assert(ret, sizeof(opthdr));
115                 my_assert(opthdr.magic, COFF_ZMAGIC);
116
117                 printf("text_start: %x\n", opthdr.text_start);
118
119                 if (hdr.f_opthdr > sizeof(opthdr)) {
120                         ret = fread(&base, 1, sizeof(base), f);
121                         my_assert(ret, sizeof(base));
122                         printf("base: %x\n", base);
123                 }
124                 ret = fseek(f, opthdr_pos + hdr.f_opthdr, SEEK_SET);
125                 my_assert(ret, 0);
126         }
127
128         for (s = 0; s < hdr.f_nscns; s++) {
129                 ret = fread(&scnhdr, 1, sizeof(scnhdr), f);
130                 my_assert(ret, sizeof(scnhdr));
131
132                 if (scnhdr.s_size != 0)
133                         break;
134         }
135
136         printf("s_name:   '%s'\n", scnhdr.s_name);
137         printf("s_vaddr:  %x\n", scnhdr.s_vaddr);
138         printf("s_size:   %x\n", scnhdr.s_size);
139         printf("s_scnptr: %x\n", scnhdr.s_scnptr);
140         printf("s_nreloc: %x\n", scnhdr.s_nreloc);
141         printf("--\n");
142
143         ret = fseek(f, scnhdr.s_scnptr, SEEK_SET);
144         my_assert(ret, 0);
145
146         *sect_data = malloc(scnhdr.s_size);
147         my_assert_not(*sect_data, NULL);
148         ret = fread(*sect_data, 1, scnhdr.s_size, f);
149         my_assert(ret, scnhdr.s_size);
150
151         *sect_ofs = scnhdr.s_scnptr;
152         *sect_sz = scnhdr.s_size;
153
154         ret = fseek(f, scnhdr.s_relptr, SEEK_SET);
155         my_assert(ret, 0);
156
157         reloc_size = scnhdr.s_nreloc * sizeof((*relocs)[0]);
158         *relocs = malloc(reloc_size + 1);
159         my_assert_not(*relocs, NULL);
160         ret = fread(*relocs, 1, reloc_size, f);
161         my_assert(ret, reloc_size);
162
163         *reloc_cnt = scnhdr.s_nreloc;
164
165         if (base != 0)
166                 *base_out = base + scnhdr.s_vaddr;
167 }
168
169 static int handle_pad(uint8_t *d_obj, uint8_t *d_exe, int maxlen)
170 {
171         static const uint8_t p7[7] = { 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00 };
172         static const uint8_t p6[6] = { 0x8d, 0x9b, 0x00, 0x00, 0x00, 0x00 };
173         static const uint8_t p5[5] = { 0x05, 0x00, 0x00, 0x00, 0x00 };
174         static const uint8_t p4[4] = { 0x8d, 0x64, 0x24, 0x00 };
175         static const uint8_t p3[3] = { 0x8d, 0x49, 0x00 };
176         static const uint8_t p2[2] = { 0x8b, 0xff };
177         static const uint8_t p1[1] = { 0x90 };
178         int len;
179         int i;
180
181         for (i = 0; i < maxlen; i++)
182                 if (d_exe[i] != 0xcc)
183                         break;
184
185         for (len = i; len > 0; )
186         {
187                 i = len;
188                 if (i > 7)
189                         i = 7;
190
191                 switch (i) {
192                 #define CASE(x) \
193                 case sizeof(p ## x): \
194                         if (memcmp(d_obj, p ## x, sizeof(p ## x))) \
195                                 return 0; \
196                         memset(d_obj, 0xcc, sizeof(p ## x)); \
197                         break;
198                 CASE(7)
199                 CASE(6)
200                 CASE(5)
201                 CASE(4)
202                 CASE(3)
203                 CASE(2)
204                 CASE(1)
205                 default:
206                         printf("%s: unhandled len: %d\n", __func__, len);
207                         return 0;
208                 #undef CASE
209                 }
210
211                 len -= i;
212                 d_obj += i;
213         }
214
215         return 1;
216 }
217
218 struct equiv_opcode {
219         signed char len;
220         signed char ofs;
221         short cmp_rm;
222         uint8_t v_masm[8];
223         uint8_t v_masm_mask[8];
224         uint8_t v_msvc[8];
225         uint8_t v_msvc_mask[8];
226 } equiv_ops[] = {
227         // cmp    $0x11,%ax
228         { 4, -1, 0,
229          { 0x66,0x83,0xf8,0x03 }, { 0xff,0xff,0xff,0x00 },
230          { 0x66,0x3d,0x03,0x00 }, { 0xff,0xff,0x00,0xff }, },
231         // lea    -0x1(%ebx,%eax,1),%esi // op mod/rm sib offs
232         // mov, test, imm grp 1
233         { 3, -2, 1,
234          { 0x8d,0x74,0x03 }, { 0xf0,0x07,0xc0 },
235          { 0x8d,0x74,0x18 }, { 0xf0,0x07,0xc0 }, },
236         // movzbl 0x58f24a(%eax,%ecx,1),%eax
237         { 4, -3, 1,
238          { 0x0f,0xb6,0x84,0x08 }, { 0xff,0xff,0x07,0xc0 },
239          { 0x0f,0xb6,0x84,0x01 }, { 0xff,0xff,0x07,0xc0 }, },
240         // inc/dec
241         { 3, -2, 1,
242          { 0xfe,0x4c,0x03 }, { 0xfe,0xff,0xc0 },
243          { 0xfe,0x4c,0x18 }, { 0xfe,0xff,0xc0 }, },
244         // cmp
245         { 3, -2, 1,
246          { 0x38,0x0c,0x0c }, { 0xff,0xff,0xc0 },
247          { 0x38,0x0c,0x30 }, { 0xff,0xff,0xc0 }, },
248         // test   %dl,%bl
249         { 2, -1, 1,
250          { 0x84,0xd3 }, { 0xfe,0xc0 },
251          { 0x84,0xda }, { 0xfe,0xc0 }, },
252         // cmp    r,r/m vs rm/r
253         { 2, 0, 1,
254          { 0x3a,0xca }, { 0xff,0xc0 },
255          { 0x38,0xd1 }, { 0xff,0xc0 }, },
256         // rep + 66 prefix
257         { 2, 0, 0,
258          { 0xf3,0x66 }, { 0xfe,0xff },
259          { 0x66,0xf3 }, { 0xff,0xfe }, },
260         // fadd   st, st(0) vs st(0), st
261         { 2, 0, 0,
262          { 0xd8,0xc0 }, { 0xff,0xf7 },
263          { 0xdc,0xc0 }, { 0xff,0xf7 }, },
264
265         // broad filters (may take too much..)
266         // testb  $0x4,0x1d(%esi,%eax,1)
267         // movb, push, ..
268         { 3, -2, 1,
269          { 0xf6,0x44,0x06 }, { 0x00,0x07,0xc0 },
270          { 0xf6,0x44,0x30 }, { 0x00,0x07,0xc0 }, },
271 };
272
273 static int cmp_mask(uint8_t *d, uint8_t *expect, uint8_t *mask, int len)
274 {
275         int i;
276
277         for (i = 0; i < len; i++)
278                 if ((d[i] & mask[i]) != (expect[i] & mask[i]))
279                         return 1;
280
281         return 0;
282 }
283
284 static int check_equiv(uint8_t *d_obj, uint8_t *d_exe, int maxlen)
285 {
286         uint8_t vo, ve, vo2, ve2;
287         int i, jo, je;
288         int len, ofs;
289
290         for (i = 0; i < sizeof(equiv_ops) / sizeof(equiv_ops[0]); i++)
291         {
292                 struct equiv_opcode *op = &equiv_ops[i];
293
294                 len = op->len;
295                 if (maxlen < len)
296                         continue;
297
298                 ofs = op->ofs;
299                 if (cmp_mask(d_obj + ofs, op->v_masm,
300                              op->v_masm_mask, len))
301                         continue;
302                 if (cmp_mask(d_exe + ofs, op->v_msvc,
303                              op->v_msvc_mask, len))
304                         continue;
305
306                 jo = je = 0;
307                 d_obj += ofs;
308                 d_exe += ofs;
309                 while (1)
310                 {
311                         for (; jo < len; jo++)
312                                 if (op->v_masm_mask[jo] != 0xff)
313                                         break;
314                         for (; je < len; je++)
315                                 if (op->v_msvc_mask[je] != 0xff)
316                                         break;
317
318                         if ((jo == len && je != len) || (jo != len && je == len)) {
319                                 printf("invalid equiv_ops\n");
320                                 return -1;
321                         }
322                         if (jo == len)
323                                 return len + ofs - 1; // matched
324
325                         // var byte
326                         vo = d_obj[jo] & ~op->v_masm_mask[jo];
327                         ve = d_exe[je] & ~op->v_msvc_mask[je];
328                         if (op->cmp_rm && op->v_masm_mask[jo] == 0xc0) {
329                                 vo2 = vo >> 3;
330                                 vo &= 7;
331                                 ve2 = ve & 7;
332                                 ve >>= 3;
333                                 if (vo != ve || vo2 != ve2)
334                                         return -1;
335                         }
336                         else {
337                                 if (vo != ve)
338                                         return -1;
339                         }
340
341                         jo++;
342                         je++;
343                 }
344         }
345
346         return -1;
347 }
348
349 int main(int argc, char *argv[])
350 {
351         FILE *f_obj, *f_exe;
352         long text_ofs_obj, text_ofs_exe;
353         long sztext_obj, sztext_exe, sztext_cmn;
354         RELOC *relocs_obj, *relocs_exe;
355         long reloc_cnt_obj, reloc_cnt_exe, reloc_cnt_cmn;
356         unsigned int base = 0;
357         uint8_t *d_obj, *d_exe;
358         int retval = 1;
359         int left;
360         int ret;
361         int i;
362
363         if (argc != 3) {
364                 printf("usage:\n%s <obj> <exe>\n", argv[0]);
365                 return 1;
366         }
367
368         f_obj = fopen(argv[1], "r+b");
369         if (f_obj == NULL) {
370                 fprintf(stderr, "%s", argv[1]);
371                 perror("");
372                 return 1;
373         }
374
375         f_exe = fopen(argv[2], "r");
376         if (f_exe == NULL) {
377                 fprintf(stderr, "%s", argv[1]);
378                 perror("");
379                 return 1;
380         }
381
382         parse_headers(f_obj, &base, &text_ofs_obj, &d_obj, &sztext_obj,
383                 &relocs_obj, &reloc_cnt_obj);
384         parse_headers(f_exe, &base, &text_ofs_exe, &d_exe, &sztext_exe,
385                 &relocs_exe, &reloc_cnt_exe);
386
387         sztext_cmn = sztext_obj;
388         if (sztext_cmn > sztext_exe)
389                 sztext_cmn = sztext_exe;
390         reloc_cnt_cmn = reloc_cnt_obj;
391         if (reloc_cnt_cmn > reloc_cnt_exe)
392                 reloc_cnt_cmn = reloc_cnt_exe;
393
394         if (sztext_cmn == 0) {
395                 printf("bad .text size(s): %ld, %ld\n",
396                         sztext_obj, sztext_exe);
397                 return 1;
398         }
399
400         for (i = 0; i < reloc_cnt_obj; i++) // reloc_cnt_cmn
401         {
402                 unsigned int a = relocs_obj[i].r_vaddr;
403                 //printf("%04x %08x\n", relocs_obj[i].r_type, a);
404
405                 switch (relocs_obj[i].r_type) {
406                 case 0x06:
407                         memset(d_obj + a, 0, 4);
408                         memset(d_exe + a, 0, 4);
409                         break;
410                 default:
411                         printf("unknown reloc %x @%08x\n",
412                                 relocs_obj[i].r_type, base + a);
413                         return 1;
414                 }
415         }
416
417         for (i = 0; i < sztext_cmn; i++)
418         {
419                 if (d_obj[i] == d_exe[i])
420                         continue;
421
422                 left = sztext_cmn - i;
423
424                 if (d_exe[i] == 0xcc) { // padding
425                         if (handle_pad(d_obj + i, d_exe + i, left))
426                                 continue;
427                 }
428
429                 ret = check_equiv(d_obj + i, d_exe + i, left);
430                 if (ret >= 0) {
431                         i += ret;
432                         continue;
433                 }
434
435                 printf("%x: %02x vs %02x\n", base + i, d_obj[i], d_exe[i]);
436                 goto out;
437         }
438
439 out:
440         return retval;
441 }