type, bp frame, arg tracking improvements
[ia32rtools.git] / tools / protoparse.h
... / ...
CommitLineData
1
2struct parsed_proto;
3
4struct parsed_type {
5 char *name;
6 unsigned int is_array:1;
7 unsigned int is_ptr:1;
8 unsigned int is_struct:1; // split for args
9};
10
11struct parsed_proto_arg {
12 char *reg;
13 struct parsed_type type;
14 struct parsed_proto *fptr;
15 void *datap;
16};
17
18struct parsed_proto {
19 char name[256];
20 union {
21 struct parsed_type ret_type;
22 struct parsed_type type;
23 };
24 struct parsed_proto_arg arg[16];
25 int argc;
26 int argc_stack;
27 int argc_reg;
28 unsigned int is_func:1;
29 unsigned int is_stdcall:1;
30 unsigned int is_vararg:1;
31 unsigned int is_fptr:1;
32 unsigned int is_noreturn:1;
33 unsigned int has_structarg:1;
34};
35
36static const char *hdrfn;
37static int hdrfline = 0;
38
39static void pp_copy_arg(struct parsed_proto_arg *d,
40 const struct parsed_proto_arg *s);
41
42static int b_pp_c_handler(char *proto, const char *fname);
43
44static int do_protostrs(FILE *fhdr, const char *fname)
45{
46 const char *finc_name;
47 const char *hdrfn_saved;
48 char protostr[256];
49 FILE *finc;
50 int line = 0;
51 int ret;
52 char *p;
53
54 hdrfn_saved = hdrfn;
55 hdrfn = fname;
56
57 while (fgets(protostr, sizeof(protostr), fhdr))
58 {
59 line++;
60 if (strncmp(protostr, "//#include ", 11) == 0) {
61 finc_name = protostr + 11;
62 p = strpbrk(finc_name, "\r\n ");
63 if (p != NULL)
64 *p = 0;
65
66 finc = fopen(finc_name, "r");
67 if (finc == NULL) {
68 printf("%s:%d: can't open '%s'\n",
69 fname, line, finc_name);
70 continue;
71 }
72 ret = do_protostrs(finc, finc_name);
73 fclose(finc);
74 if (ret < 0)
75 break;
76 continue;
77 }
78 if (strncmp(sskip(protostr), "//", 2) == 0)
79 continue;
80
81 p = protostr + strlen(protostr);
82 for (p--; p >= protostr && my_isblank(*p); --p)
83 *p = 0;
84 if (p < protostr)
85 continue;
86
87 hdrfline = line;
88
89 ret = b_pp_c_handler(protostr, hdrfn);
90 if (ret < 0)
91 break;
92 }
93
94 hdrfn = hdrfn_saved;
95
96 if (feof(fhdr))
97 return 0;
98
99 return -1;
100}
101
102static int get_regparm(char *dst, size_t dlen, char *p)
103{
104 int i, o;
105
106 if (*p != '<')
107 return 0;
108
109 for (o = 0, i = 1; o < dlen; i++) {
110 if (p[i] == 0)
111 return 0;
112 if (p[i] == '>')
113 break;
114 dst[o++] = p[i];
115 }
116 dst[o] = 0;
117 return i + 1;
118}
119
120// hmh..
121static const char *known_type_mod[] = {
122 "const",
123 "signed",
124 "unsigned",
125 "struct",
126 "enum",
127 "CONST",
128};
129
130static const char *known_ptr_types[] = {
131 "HACCEL",
132 "HANDLE",
133 "HBITMAP",
134 "HCURSOR",
135 "HDC",
136 "HFONT",
137 "HGDIOBJ",
138 "HGLOBAL",
139 "HINSTANCE",
140 "HMODULE",
141 "HRGN",
142 "HRSRC",
143 "HKEY",
144 "HMENU",
145 "HWND",
146 "PLONG",
147 "PDWORD",
148 "PVOID",
149 "PCVOID",
150 "DLGPROC",
151 "va_list",
152 "__VALIST",
153};
154
155static const char *ignored_keywords[] = {
156 "extern",
157 "WINBASEAPI",
158 "WINUSERAPI",
159 "WINGDIAPI",
160 "WINADVAPI",
161};
162
163// returns ptr to char after type ends
164static int typecmp(const char *n, const char *t)
165{
166 for (; *t != 0; n++, t++) {
167 while (n[0] == ' ' && (n[1] == ' ' || n[1] == '*'))
168 n++;
169 while (t[0] == ' ' && (t[1] == ' ' || t[1] == '*'))
170 t++;
171 if (*n != *t)
172 return *n - *t;
173 }
174
175 return 0;
176}
177
178static const char *skip_type_mod(const char *n)
179{
180 int len;
181 int i;
182
183 for (i = 0; i < ARRAY_SIZE(known_type_mod); i++) {
184 len = strlen(known_type_mod[i]);
185 if (strncmp(n, known_type_mod[i], len) != 0)
186 continue;
187 if (!my_isblank(n[len]))
188 continue;
189
190 n += len;
191 while (my_isblank(*n))
192 n++;
193 i = 0;
194 }
195
196 return n;
197}
198
199static int check_type(const char *name, struct parsed_type *type)
200{
201 const char *n, *n1;
202 int ret = -1;
203 int i;
204
205 n = skip_type_mod(name);
206
207 for (i = 0; i < ARRAY_SIZE(known_ptr_types); i++) {
208 if (typecmp(n, known_ptr_types[i]))
209 continue;
210
211 type->is_ptr = 1;
212 break;
213 }
214
215 if (n[0] == 'L' && n[1] == 'P' && strncmp(n, "LPARAM", 6))
216 type->is_ptr = 1;
217
218 // assume single word
219 while (!my_isblank(*n) && !my_issep(*n))
220 n++;
221
222 while (1) {
223 n1 = n;
224 while (my_isblank(*n))
225 n++;
226 if (*n == '*') {
227 type->is_ptr = 1;
228 n++;
229 continue;
230 }
231 break;
232 }
233
234 ret = n1 - name;
235 type->name = strndup(name, ret);
236 return ret;
237}
238
239/* args are always expanded to 32bit */
240static const char *map_reg(const char *reg)
241{
242 const char *regs_f[] = { "eax", "ebx", "ecx", "edx", "esi", "edi" };
243 const char *regs_w[] = { "ax", "bx", "cx", "dx", "si", "di" };
244 const char *regs_b[] = { "al", "bl", "cl", "dl" };
245 int i;
246
247 for (i = 0; i < ARRAY_SIZE(regs_w); i++)
248 if (IS(reg, regs_w[i]))
249 return regs_f[i];
250
251 for (i = 0; i < ARRAY_SIZE(regs_b); i++)
252 if (IS(reg, regs_b[i]))
253 return regs_f[i];
254
255 return reg;
256}
257
258static int check_struct_arg(struct parsed_proto_arg *arg)
259{
260 if (IS(arg->type.name, "POINT"))
261 return 2 - 1;
262
263 return 0;
264}
265
266static int parse_protostr(char *protostr, struct parsed_proto *pp)
267{
268 struct parsed_proto_arg *arg;
269 char regparm[16];
270 char buf[256];
271 char cconv[32];
272 int xarg = 0;
273 char *p, *p1;
274 int i, l;
275 int ret;
276
277 p = sskip(protostr);
278 if (p[0] == '/' && p[1] == '/') {
279 printf("%s:%d: commented out?\n", hdrfn, hdrfline);
280 p = sskip(p + 2);
281 }
282
283 // strip unneeded stuff
284 for (p1 = p; p1[0] != 0 && p1[1] != 0; p1++) {
285 if ((p1[0] == '/' && p1[1] == '*')
286 || (p1[0] == '*' && p1[1] == '/'))
287 p1[0] = p1[1] = ' ';
288 }
289
290 if (!strncmp(p, "DECLSPEC_NORETURN ", 18)) {
291 pp->is_noreturn = 1;
292 p = sskip(p + 18);
293 }
294
295 for (i = 0; i < ARRAY_SIZE(ignored_keywords); i++) {
296 l = strlen(ignored_keywords[i]);
297 if (!strncmp(p, ignored_keywords[i], l) && my_isblank(p[l]))
298 p = sskip(p + l + 1);
299 }
300
301 ret = check_type(p, &pp->ret_type);
302 if (ret <= 0) {
303 printf("%s:%d:%zd: unhandled return in '%s'\n",
304 hdrfn, hdrfline, (p - protostr) + 1, protostr);
305 return -1;
306 }
307 p = sskip(p + ret);
308
309 if (!strchr(p, ')')) {
310 p = next_idt(buf, sizeof(buf), p);
311 p = sskip(p);
312 if (buf[0] == 0) {
313 printf("%s:%d:%zd: var name missing\n",
314 hdrfn, hdrfline, (p - protostr) + 1);
315 return -1;
316 }
317 strcpy(pp->name, buf);
318
319 p1 = strchr(p, ']');
320 if (p1 != NULL) {
321 p = p1 + 1;
322 pp->ret_type.is_array = 1;
323 }
324 return p - protostr;
325 }
326
327 pp->is_func = 1;
328
329 if (*p == '(') {
330 pp->is_fptr = 1;
331 p = sskip(p + 1);
332 }
333
334 p = next_word(cconv, sizeof(cconv), p);
335 p = sskip(p);
336 if (cconv[0] == 0) {
337 printf("%s:%d:%zd: cconv missing\n",
338 hdrfn, hdrfline, (p - protostr) + 1);
339 return -1;
340 }
341 if (IS(cconv, "__cdecl"))
342 pp->is_stdcall = 0;
343 else if (IS(cconv, "__stdcall"))
344 pp->is_stdcall = 1;
345 else if (IS(cconv, "__fastcall"))
346 pp->is_stdcall = 1;
347 else if (IS(cconv, "__thiscall"))
348 pp->is_stdcall = 1;
349 else if (IS(cconv, "__userpurge"))
350 pp->is_stdcall = 1; // IDA
351 else if (IS(cconv, "__usercall"))
352 pp->is_stdcall = 0; // IDA
353 else if (IS(cconv, "WINAPI"))
354 pp->is_stdcall = 1;
355 else {
356 printf("%s:%d:%zd: unhandled cconv: '%s'\n",
357 hdrfn, hdrfline, (p - protostr) + 1, cconv);
358 return -1;
359 }
360
361 if (pp->is_fptr) {
362 if (*p != '*') {
363 printf("%s:%d:%zd: '*' expected\n",
364 hdrfn, hdrfline, (p - protostr) + 1);
365 return -1;
366 }
367 p++;
368 // XXX: skipping extra asterisks, for now
369 while (*p == '*')
370 p++;
371 p = sskip(p);
372 }
373
374 p = next_idt(buf, sizeof(buf), p);
375 p = sskip(p);
376 if (buf[0] == 0) {
377 //printf("%s:%d:%zd: func name missing\n",
378 // hdrfn, hdrfline, (p - protostr) + 1);
379 //return -1;
380 }
381 strcpy(pp->name, buf);
382
383 ret = get_regparm(regparm, sizeof(regparm), p);
384 if (ret > 0) {
385 if (!IS(regparm, "eax") && !IS(regparm, "ax")
386 && !IS(regparm, "al") && !IS(regparm, "edx:eax"))
387 {
388 printf("%s:%d:%zd: bad regparm: %s\n",
389 hdrfn, hdrfline, (p - protostr) + 1, regparm);
390 return -1;
391 }
392 p += ret;
393 p = sskip(p);
394 }
395
396 if (pp->is_fptr) {
397 if (*p == '[') {
398 // not really ret_type is array, but ohwell
399 pp->ret_type.is_array = 1;
400 p = strchr(p + 1, ']');
401 if (p == NULL) {
402 printf("%s:%d:%zd: ']' expected\n",
403 hdrfn, hdrfline, (p - protostr) + 1);
404 return -1;
405 }
406 p = sskip(p + 1);
407 }
408 if (*p != ')') {
409 printf("%s:%d:%zd: ')' expected\n",
410 hdrfn, hdrfline, (p - protostr) + 1);
411 return -1;
412 }
413 p = sskip(p + 1);
414 }
415
416 if (*p != '(') {
417 printf("%s:%d:%zd: '(' expected, got '%c'\n",
418 hdrfn, hdrfline, (p - protostr) + 1, *p);
419 return -1;
420 }
421 p++;
422
423 // check for x(void)
424 p = sskip(p);
425 if ((!strncmp(p, "void", 4) || !strncmp(p, "VOID", 4))
426 && *sskip(p + 4) == ')')
427 p += 4;
428
429 while (1) {
430 p = sskip(p);
431 if (*p == ')') {
432 p++;
433 break;
434 }
435 if (xarg > 0) {
436 if (*p != ',') {
437 printf("%s:%d:%zd: ',' expected\n",
438 hdrfn, hdrfline, (p - protostr) + 1);
439 return -1;
440 }
441 p = sskip(p + 1);
442 }
443
444 if (!strncmp(p, "...", 3)) {
445 pp->is_vararg = 1;
446 p = sskip(p + 3);
447 if (*p == ')') {
448 p++;
449 break;
450 }
451 printf("%s:%d:%zd: ')' expected\n",
452 hdrfn, hdrfline, (p - protostr) + 1);
453 return -1;
454 }
455
456 arg = &pp->arg[xarg];
457 xarg++;
458
459 p1 = p;
460 ret = check_type(p, &arg->type);
461 if (ret <= 0) {
462 printf("%s:%d:%zd: unhandled type for arg%d\n",
463 hdrfn, hdrfline, (p - protostr) + 1, xarg);
464 return -1;
465 }
466 p = sskip(p + ret);
467
468 if (*p == '(') {
469 // func ptr
470 arg->fptr = calloc(1, sizeof(*arg->fptr));
471 ret = parse_protostr(p1, arg->fptr);
472 if (ret < 0) {
473 printf("%s:%d:%zd: funcarg parse failed\n",
474 hdrfn, hdrfline, p1 - protostr);
475 return -1;
476 }
477 // we'll treat it as void * for non-calls
478 arg->type.name = strdup("void *");
479 arg->type.is_ptr = 1;
480
481 p = p1 + ret;
482 }
483
484 p = next_idt(buf, sizeof(buf), p);
485 p = sskip(p);
486#if 0
487 if (buf[0] == 0) {
488 printf("%s:%d:%zd: idt missing for arg%d\n",
489 hdrfn, hdrfline, (p - protostr) + 1, xarg);
490 return -1;
491 }
492#endif
493 arg->reg = NULL;
494
495 ret = get_regparm(regparm, sizeof(regparm), p);
496 if (ret > 0) {
497 p += ret;
498 p = sskip(p);
499
500 arg->reg = strdup(map_reg(regparm));
501 }
502
503 ret = check_struct_arg(arg);
504 if (ret > 0) {
505 pp->has_structarg = 1;
506 arg->type.is_struct = 1;
507 free(arg->type.name);
508 arg->type.name = strdup("int");
509 for (l = 0; l < ret; l++) {
510 pp_copy_arg(&pp->arg[xarg], arg);
511 xarg++;
512 }
513 }
514 }
515
516 if (xarg > 0 && (IS(cconv, "__fastcall") || IS(cconv, "__thiscall"))) {
517 if (pp->arg[0].reg != NULL) {
518 printf("%s:%d: %s with arg1 spec %s?\n",
519 hdrfn, hdrfline, cconv, pp->arg[0].reg);
520 }
521 pp->arg[0].reg = strdup("ecx");
522 }
523
524 if (xarg > 1 && IS(cconv, "__fastcall")) {
525 if (pp->arg[1].reg != NULL) {
526 printf("%s:%d: %s with arg2 spec %s?\n",
527 hdrfn, hdrfline, cconv, pp->arg[1].reg);
528 }
529 pp->arg[1].reg = strdup("edx");
530 }
531
532 if (pp->is_vararg && pp->is_stdcall) {
533 printf("%s:%d: vararg stdcall?\n", hdrfn, hdrfline);
534 return -1;
535 }
536
537 pp->argc = xarg;
538
539 for (i = 0; i < pp->argc; i++) {
540 if (pp->arg[i].reg == NULL)
541 pp->argc_stack++;
542 else
543 pp->argc_reg++;
544 }
545
546 return p - protostr;
547}
548
549static int pp_name_cmp(const void *p1, const void *p2)
550{
551 const struct parsed_proto *pp1 = p1, *pp2 = p2;
552 return strcmp(pp1->name, pp2->name);
553}
554
555static struct parsed_proto *pp_cache;
556static int pp_cache_size;
557static int pp_cache_alloc;
558
559static int b_pp_c_handler(char *proto, const char *fname)
560{
561 int ret;
562
563 if (pp_cache_size >= pp_cache_alloc) {
564 pp_cache_alloc = pp_cache_alloc * 2 + 64;
565 pp_cache = realloc(pp_cache, pp_cache_alloc
566 * sizeof(pp_cache[0]));
567 my_assert_not(pp_cache, NULL);
568 memset(pp_cache + pp_cache_size, 0,
569 (pp_cache_alloc - pp_cache_size)
570 * sizeof(pp_cache[0]));
571 }
572
573 ret = parse_protostr(proto, &pp_cache[pp_cache_size]);
574 if (ret < 0)
575 return -1;
576
577 pp_cache_size++;
578 return 0;
579}
580
581static void build_pp_cache(FILE *fhdr)
582{
583 int ret;
584
585 rewind(fhdr);
586
587 ret = do_protostrs(fhdr, hdrfn);
588 if (ret < 0)
589 exit(1);
590
591 qsort(pp_cache, pp_cache_size, sizeof(pp_cache[0]), pp_name_cmp);
592}
593
594static const struct parsed_proto *proto_parse(FILE *fhdr, const char *sym)
595{
596 const struct parsed_proto *pp_ret;
597 struct parsed_proto pp_search;
598
599 if (pp_cache == NULL)
600 build_pp_cache(fhdr);
601
602 if (sym[0] == '_') // && strncmp(fname, "stdc", 4) == 0)
603 sym++;
604
605 strcpy(pp_search.name, sym);
606 pp_ret = bsearch(&pp_search, pp_cache, pp_cache_size,
607 sizeof(pp_cache[0]), pp_name_cmp);
608 if (pp_ret == NULL)
609 printf("%s: sym '%s' is missing\n", hdrfn, sym);
610
611 return pp_ret;
612}
613
614static void pp_copy_arg(struct parsed_proto_arg *d,
615 const struct parsed_proto_arg *s)
616{
617 memcpy(d, s, sizeof(*d));
618
619 if (s->reg != NULL) {
620 d->reg = strdup(s->reg);
621 my_assert_not(d->reg, NULL);
622 }
623 if (s->type.name != NULL) {
624 d->type.name = strdup(s->type.name);
625 my_assert_not(d->type.name, NULL);
626 }
627 if (s->fptr != NULL) {
628 d->fptr = malloc(sizeof(*d->fptr));
629 my_assert_not(d->fptr, NULL);
630 memcpy(d->fptr, s->fptr, sizeof(*d->fptr));
631 }
632}
633
634struct parsed_proto *proto_clone(const struct parsed_proto *pp_c)
635{
636 struct parsed_proto *pp;
637 int i;
638
639 pp = malloc(sizeof(*pp));
640 my_assert_not(pp, NULL);
641 memcpy(pp, pp_c, sizeof(*pp)); // lazy..
642
643 // do the actual deep copy..
644 for (i = 0; i < pp_c->argc; i++)
645 pp_copy_arg(&pp->arg[i], &pp_c->arg[i]);
646 if (pp_c->ret_type.name != NULL)
647 pp->ret_type.name = strdup(pp_c->ret_type.name);
648
649 return pp;
650}
651
652static inline void proto_release(struct parsed_proto *pp)
653{
654 int i;
655
656 for (i = 0; i < pp->argc; i++) {
657 if (pp->arg[i].reg != NULL)
658 free(pp->arg[i].reg);
659 if (pp->arg[i].type.name != NULL)
660 free(pp->arg[i].type.name);
661 if (pp->arg[i].fptr != NULL)
662 free(pp->arg[i].fptr);
663 }
664 if (pp->ret_type.name != NULL)
665 free(pp->ret_type.name);
666 free(pp);
667}