.text cmp tool, updates for it to pass
[ia32rtools.git] / cmpmrg_text.c
CommitLineData
cfd23479 1#include <stdio.h>
2#include <stdlib.h>
3#include <string.h>
4#include <linux/coff.h>
5#include <assert.h>
6#include <stdint.h>
7
8/* http://www.delorie.com/djgpp/doc/coff/ */
9
10typedef struct {
11 unsigned short f_magic; /* magic number */
12 unsigned short f_nscns; /* number of sections */
13 unsigned int f_timdat; /* time & date stamp */
14 unsigned int f_symptr; /* file pointer to symtab */
15 unsigned int f_nsyms; /* number of symtab entries */
16 unsigned short f_opthdr; /* sizeof(optional hdr) */
17 unsigned short f_flags; /* flags */
18} FILHDR;
19
20typedef struct {
21 unsigned short magic; /* type of file */
22 unsigned short vstamp; /* version stamp */
23 unsigned int tsize; /* text size in bytes, padded to FW bdry*/
24 unsigned int dsize; /* initialized data " " */
25 unsigned int bsize; /* uninitialized data " " */
26 unsigned int entry; /* entry pt. */
27 unsigned int text_start; /* base of text used for this file */
28 unsigned int data_start; /* base of data used for this file */
29} AOUTHDR;
30
31typedef struct {
32 char s_name[8]; /* section name */
33 unsigned int s_paddr; /* physical address, aliased s_nlib */
34 unsigned int s_vaddr; /* virtual address */
35 unsigned int s_size; /* section size */
36 unsigned int s_scnptr; /* file ptr to raw data for section */
37 unsigned int s_relptr; /* file ptr to relocation */
38 unsigned int s_lnnoptr; /* file ptr to line numbers */
39 unsigned short s_nreloc; /* number of relocation entries */
40 unsigned short s_nlnno; /* number of line number entries */
41 unsigned int s_flags; /* flags */
42} SCNHDR;
43
44typedef struct {
45 unsigned int r_vaddr; /* address of relocation */
46 unsigned int r_symndx; /* symbol we're adjusting for */
47 unsigned short r_type; /* type of relocation */
48} __attribute__((packed)) RELOC;
49
50static void my_assert_(int line, const char *name, long v, long expect, int is_eq)
51{
52 int ok;
53 if (is_eq)
54 ok = (v == expect);
55 else
56 ok = (v != expect);
57
58 if (!ok)
59 {
60 printf("%d: '%s' is %lx, need %s%lx\n", line, name,
61 v, is_eq ? "" : "!", expect);
62 exit(1);
63 }
64}
65#define my_assert(v, exp) \
66 my_assert_(__LINE__, #v, (long)(v), (long)(exp), 1)
67#define my_assert_not(v, exp) \
68 my_assert_(__LINE__, #v, (long)(v), (long)(exp), 0)
69
70void parse_headers(FILE *f, unsigned int *base_out,
71 long *sect_ofs, uint8_t **sect_data, long *sect_sz,
72 RELOC **relocs, long *reloc_cnt)
73{
74 unsigned int base = 0;
75 long opthdr_pos;
76 long reloc_size;
77 FILHDR hdr;
78 AOUTHDR opthdr;
79 SCNHDR scnhdr;
80 int s, val;
81 int ret;
82
83 ret = fread(&hdr, 1, sizeof(hdr), f);
84 my_assert(ret, sizeof(hdr));
85
86 if (hdr.f_magic == 0x5a4d) // MZ
87 {
88 ret = fseek(f, 0x3c, SEEK_SET);
89 my_assert(ret, 0);
90 ret = fread(&val, 1, sizeof(val), f);
91 my_assert(ret, sizeof(val));
92
93 ret = fseek(f, val, SEEK_SET);
94 my_assert(ret, 0);
95 ret = fread(&val, 1, sizeof(val), f);
96 my_assert(ret, sizeof(val));
97 my_assert(val, 0x4550); // PE
98
99 // should be COFF now
100 ret = fread(&hdr, 1, sizeof(hdr), f);
101 my_assert(ret, sizeof(hdr));
102 }
103
104 my_assert(hdr.f_magic, COFF_I386MAGIC);
105
106 if (hdr.f_opthdr != 0)
107 {
108 opthdr_pos = ftell(f);
109
110 if (hdr.f_opthdr < sizeof(opthdr))
111 my_assert(1, 0);
112
113 ret = fread(&opthdr, 1, sizeof(opthdr), f);
114 my_assert(ret, sizeof(opthdr));
115 my_assert(opthdr.magic, COFF_ZMAGIC);
116
117 printf("text_start: %x\n", opthdr.text_start);
118
119 if (hdr.f_opthdr > sizeof(opthdr)) {
120 ret = fread(&base, 1, sizeof(base), f);
121 my_assert(ret, sizeof(base));
122 printf("base: %x\n", base);
123 }
124 ret = fseek(f, opthdr_pos + hdr.f_opthdr, SEEK_SET);
125 my_assert(ret, 0);
126 }
127
128 for (s = 0; s < hdr.f_nscns; s++) {
129 ret = fread(&scnhdr, 1, sizeof(scnhdr), f);
130 my_assert(ret, sizeof(scnhdr));
131
132 if (scnhdr.s_size != 0)
133 break;
134 }
135
136 printf("s_name: '%s'\n", scnhdr.s_name);
137 printf("s_vaddr: %x\n", scnhdr.s_vaddr);
138 printf("s_size: %x\n", scnhdr.s_size);
139 printf("s_scnptr: %x\n", scnhdr.s_scnptr);
140 printf("s_nreloc: %x\n", scnhdr.s_nreloc);
141 printf("--\n");
142
143 ret = fseek(f, scnhdr.s_scnptr, SEEK_SET);
144 my_assert(ret, 0);
145
146 *sect_data = malloc(scnhdr.s_size);
147 my_assert_not(*sect_data, NULL);
148 ret = fread(*sect_data, 1, scnhdr.s_size, f);
149 my_assert(ret, scnhdr.s_size);
150
151 *sect_ofs = scnhdr.s_scnptr;
152 *sect_sz = scnhdr.s_size;
153
154 ret = fseek(f, scnhdr.s_relptr, SEEK_SET);
155 my_assert(ret, 0);
156
157 reloc_size = scnhdr.s_nreloc * sizeof((*relocs)[0]);
158 *relocs = malloc(reloc_size + 1);
159 my_assert_not(*relocs, NULL);
160 ret = fread(*relocs, 1, reloc_size, f);
161 my_assert(ret, reloc_size);
162
163 *reloc_cnt = scnhdr.s_nreloc;
164
165 if (base != 0)
166 *base_out = base + scnhdr.s_vaddr;
167}
168
169static int handle_pad(uint8_t *d_obj, uint8_t *d_exe, int maxlen)
170{
171 static const uint8_t p7[7] = { 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00 };
172 static const uint8_t p6[6] = { 0x8d, 0x9b, 0x00, 0x00, 0x00, 0x00 };
173 static const uint8_t p5[5] = { 0x05, 0x00, 0x00, 0x00, 0x00 };
174 static const uint8_t p4[4] = { 0x8d, 0x64, 0x24, 0x00 };
175 static const uint8_t p3[3] = { 0x8d, 0x49, 0x00 };
176 static const uint8_t p2[2] = { 0x8b, 0xff };
177 static const uint8_t p1[1] = { 0x90 };
178 int len;
179 int i;
180
181 for (i = 0; i < maxlen; i++)
182 if (d_exe[i] != 0xcc)
183 break;
184
185 for (len = i; len > 0; )
186 {
187 i = len;
188 if (i > 7)
189 i = 7;
190
191 switch (i) {
192 #define CASE(x) \
193 case sizeof(p ## x): \
194 if (memcmp(d_obj, p ## x, sizeof(p ## x))) \
195 return 0; \
196 memset(d_obj, 0xcc, sizeof(p ## x)); \
197 break;
198 CASE(7)
199 CASE(6)
200 CASE(5)
201 CASE(4)
202 CASE(3)
203 CASE(2)
204 CASE(1)
205 default:
206 printf("%s: unhandled len: %d\n", __func__, len);
207 return 0;
208 #undef CASE
209 }
210
211 len -= i;
212 d_obj += i;
213 }
214
215 return 1;
216}
217
218struct equiv_opcode {
219 signed char len;
220 signed char ofs;
221 short cmp_rm;
222 uint8_t v_masm[8];
223 uint8_t v_masm_mask[8];
224 uint8_t v_msvc[8];
225 uint8_t v_msvc_mask[8];
226} equiv_ops[] = {
227 // cmp $0x11,%ax
228 { 4, -1, 0,
229 { 0x66,0x83,0xf8,0x03 }, { 0xff,0xff,0xff,0x00 },
230 { 0x66,0x3d,0x03,0x00 }, { 0xff,0xff,0x00,0xff }, },
231 // lea -0x1(%ebx,%eax,1),%esi // op mod/rm sib offs
232 // mov, test, imm grp 1
233 { 3, -2, 1,
234 { 0x8d,0x74,0x03 }, { 0xf0,0x07,0xc0 },
235 { 0x8d,0x74,0x18 }, { 0xf0,0x07,0xc0 }, },
236 // movzbl 0x58f24a(%eax,%ecx,1),%eax
237 { 4, -3, 1,
238 { 0x0f,0xb6,0x84,0x08 }, { 0xff,0xff,0x07,0xc0 },
239 { 0x0f,0xb6,0x84,0x01 }, { 0xff,0xff,0x07,0xc0 }, },
240 // inc/dec
241 { 3, -2, 1,
242 { 0xfe,0x4c,0x03 }, { 0xfe,0xff,0xc0 },
243 { 0xfe,0x4c,0x18 }, { 0xfe,0xff,0xc0 }, },
244 // cmp
245 { 3, -2, 1,
246 { 0x38,0x0c,0x0c }, { 0xff,0xff,0xc0 },
247 { 0x38,0x0c,0x30 }, { 0xff,0xff,0xc0 }, },
248 // test %dl,%bl
249 { 2, -1, 1,
250 { 0x84,0xd3 }, { 0xfe,0xc0 },
251 { 0x84,0xda }, { 0xfe,0xc0 }, },
252 // cmp r,r/m vs rm/r
253 { 2, 0, 1,
254 { 0x3a,0xca }, { 0xff,0xc0 },
255 { 0x38,0xd1 }, { 0xff,0xc0 }, },
256 // rep + 66 prefix
257 { 2, 0, 0,
258 { 0xf3,0x66 }, { 0xfe,0xff },
259 { 0x66,0xf3 }, { 0xff,0xfe }, },
260 // fadd st, st(0) vs st(0), st
261 { 2, 0, 0,
262 { 0xd8,0xc0 }, { 0xff,0xf7 },
263 { 0xdc,0xc0 }, { 0xff,0xf7 }, },
264
265 // broad filters (may take too much..)
266 // testb $0x4,0x1d(%esi,%eax,1)
267 // movb, push, ..
268 { 3, -2, 1,
269 { 0xf6,0x44,0x06 }, { 0x00,0x07,0xc0 },
270 { 0xf6,0x44,0x30 }, { 0x00,0x07,0xc0 }, },
271};
272
273static int cmp_mask(uint8_t *d, uint8_t *expect, uint8_t *mask, int len)
274{
275 int i;
276
277 for (i = 0; i < len; i++)
278 if ((d[i] & mask[i]) != (expect[i] & mask[i]))
279 return 1;
280
281 return 0;
282}
283
284static int check_equiv(uint8_t *d_obj, uint8_t *d_exe, int maxlen)
285{
286 uint8_t vo, ve, vo2, ve2;
287 int i, jo, je;
288 int len, ofs;
289
290 for (i = 0; i < sizeof(equiv_ops) / sizeof(equiv_ops[0]); i++)
291 {
292 struct equiv_opcode *op = &equiv_ops[i];
293
294 len = op->len;
295 if (maxlen < len)
296 continue;
297
298 ofs = op->ofs;
299 if (cmp_mask(d_obj + ofs, op->v_masm,
300 op->v_masm_mask, len))
301 continue;
302 if (cmp_mask(d_exe + ofs, op->v_msvc,
303 op->v_msvc_mask, len))
304 continue;
305
306 jo = je = 0;
307 d_obj += ofs;
308 d_exe += ofs;
309 while (1)
310 {
311 for (; jo < len; jo++)
312 if (op->v_masm_mask[jo] != 0xff)
313 break;
314 for (; je < len; je++)
315 if (op->v_msvc_mask[je] != 0xff)
316 break;
317
318 if ((jo == len && je != len) || (jo != len && je == len)) {
319 printf("invalid equiv_ops\n");
320 return -1;
321 }
322 if (jo == len)
323 return len + ofs - 1; // matched
324
325 // var byte
326 vo = d_obj[jo] & ~op->v_masm_mask[jo];
327 ve = d_exe[je] & ~op->v_msvc_mask[je];
328 if (op->cmp_rm && op->v_masm_mask[jo] == 0xc0) {
329 vo2 = vo >> 3;
330 vo &= 7;
331 ve2 = ve & 7;
332 ve >>= 3;
333 if (vo != ve || vo2 != ve2)
334 return -1;
335 }
336 else {
337 if (vo != ve)
338 return -1;
339 }
340
341 jo++;
342 je++;
343 }
344 }
345
346 return -1;
347}
348
349int main(int argc, char *argv[])
350{
351 FILE *f_obj, *f_exe;
352 long text_ofs_obj, text_ofs_exe;
353 long sztext_obj, sztext_exe, sztext_cmn;
354 RELOC *relocs_obj, *relocs_exe;
355 long reloc_cnt_obj, reloc_cnt_exe, reloc_cnt_cmn;
356 unsigned int base = 0;
357 uint8_t *d_obj, *d_exe;
358 int retval = 1;
359 int left;
360 int ret;
361 int i;
362
363 if (argc != 3) {
364 printf("usage:\n%s <obj> <exe>\n", argv[0]);
365 return 1;
366 }
367
368 f_obj = fopen(argv[1], "r+b");
369 if (f_obj == NULL) {
370 fprintf(stderr, "%s", argv[1]);
371 perror("");
372 return 1;
373 }
374
375 f_exe = fopen(argv[2], "r");
376 if (f_exe == NULL) {
377 fprintf(stderr, "%s", argv[1]);
378 perror("");
379 return 1;
380 }
381
382 parse_headers(f_obj, &base, &text_ofs_obj, &d_obj, &sztext_obj,
383 &relocs_obj, &reloc_cnt_obj);
384 parse_headers(f_exe, &base, &text_ofs_exe, &d_exe, &sztext_exe,
385 &relocs_exe, &reloc_cnt_exe);
386
387 sztext_cmn = sztext_obj;
388 if (sztext_cmn > sztext_exe)
389 sztext_cmn = sztext_exe;
390 reloc_cnt_cmn = reloc_cnt_obj;
391 if (reloc_cnt_cmn > reloc_cnt_exe)
392 reloc_cnt_cmn = reloc_cnt_exe;
393
394 if (sztext_cmn == 0) {
395 printf("bad .text size(s): %ld, %ld\n",
396 sztext_obj, sztext_exe);
397 return 1;
398 }
399
400 for (i = 0; i < reloc_cnt_obj; i++) // reloc_cnt_cmn
401 {
402 unsigned int a = relocs_obj[i].r_vaddr;
403 //printf("%04x %08x\n", relocs_obj[i].r_type, a);
404
405 switch (relocs_obj[i].r_type) {
406 case 0x06:
407 memset(d_obj + a, 0, 4);
408 memset(d_exe + a, 0, 4);
409 break;
410 default:
411 printf("unknown reloc %x @%08x\n",
412 relocs_obj[i].r_type, base + a);
413 return 1;
414 }
415 }
416
417 for (i = 0; i < sztext_cmn; i++)
418 {
419 if (d_obj[i] == d_exe[i])
420 continue;
421
422 left = sztext_cmn - i;
423
424 if (d_exe[i] == 0xcc) { // padding
425 if (handle_pad(d_obj + i, d_exe + i, left))
426 continue;
427 }
428
429 ret = check_equiv(d_obj + i, d_exe + i, left);
430 if (ret >= 0) {
431 i += ret;
432 continue;
433 }
434
435 printf("%x: %02x vs %02x\n", base + i, d_obj[i], d_exe[i]);
436 goto out;
437 }
438
439out:
440 return retval;
441}