type, bp frame, arg tracking improvements
[ia32rtools.git] / tools / protoparse.h
CommitLineData
c36e914d 1
39b168b8 2struct parsed_proto;
3
3ebea2cf 4struct parsed_type {
5 char *name;
6 unsigned int is_array:1;
7 unsigned int is_ptr:1;
a652aa9f 8 unsigned int is_struct:1; // split for args
3ebea2cf 9};
10
39b168b8 11struct parsed_proto_arg {
12 char *reg;
3ebea2cf 13 struct parsed_type type;
39b168b8 14 struct parsed_proto *fptr;
15 void *datap;
16};
17
c36e914d 18struct parsed_proto {
39b168b8 19 char name[256];
3ebea2cf 20 union {
21 struct parsed_type ret_type;
22 struct parsed_type type;
23 };
39b168b8 24 struct parsed_proto_arg arg[16];
c36e914d 25 int argc;
26 int argc_stack;
27 int argc_reg;
06c5d854 28 unsigned int is_func:1;
39b168b8 29 unsigned int is_stdcall:1;
7ba45c34 30 unsigned int is_vararg:1;
39b168b8 31 unsigned int is_fptr:1;
e56ab892 32 unsigned int is_noreturn:1;
a652aa9f 33 unsigned int has_structarg:1;
c36e914d 34};
35
36static const char *hdrfn;
37static int hdrfline = 0;
38
a652aa9f 39static void pp_copy_arg(struct parsed_proto_arg *d,
40 const struct parsed_proto_arg *s);
41
bd96f656 42static int b_pp_c_handler(char *proto, const char *fname);
43
44static int do_protostrs(FILE *fhdr, const char *fname)
c36e914d 45{
06c5d854 46 const char *finc_name;
bd96f656 47 const char *hdrfn_saved;
48 char protostr[256];
39b168b8 49 FILE *finc;
c36e914d 50 int line = 0;
39b168b8 51 int ret;
c36e914d 52 char *p;
53
bd96f656 54 hdrfn_saved = hdrfn;
55 hdrfn = fname;
39b168b8 56
bd96f656 57 while (fgets(protostr, sizeof(protostr), fhdr))
c36e914d 58 {
59 line++;
bd96f656 60 if (strncmp(protostr, "//#include ", 11) == 0) {
61 finc_name = protostr + 11;
06c5d854 62 p = strpbrk(finc_name, "\r\n ");
39b168b8 63 if (p != NULL)
64 *p = 0;
65
06c5d854 66 finc = fopen(finc_name, "r");
39b168b8 67 if (finc == NULL) {
68 printf("%s:%d: can't open '%s'\n",
06c5d854 69 fname, line, finc_name);
39b168b8 70 continue;
71 }
bd96f656 72 ret = do_protostrs(finc, finc_name);
39b168b8 73 fclose(finc);
bd96f656 74 if (ret < 0)
39b168b8 75 break;
76 continue;
77 }
bd96f656 78 if (strncmp(sskip(protostr), "//", 2) == 0)
79 continue;
80
81 p = protostr + strlen(protostr);
82 for (p--; p >= protostr && my_isblank(*p); --p)
83 *p = 0;
84 if (p < protostr)
06c5d854 85 continue;
39b168b8 86
bd96f656 87 hdrfline = line;
88
89 ret = b_pp_c_handler(protostr, hdrfn);
90 if (ret < 0)
c36e914d 91 break;
92 }
c36e914d 93
bd96f656 94 hdrfn = hdrfn_saved;
c36e914d 95
bd96f656 96 if (feof(fhdr))
97 return 0;
c36e914d 98
bd96f656 99 return -1;
c36e914d 100}
101
102static int get_regparm(char *dst, size_t dlen, char *p)
103{
104 int i, o;
105
106 if (*p != '<')
107 return 0;
108
109 for (o = 0, i = 1; o < dlen; i++) {
110 if (p[i] == 0)
111 return 0;
112 if (p[i] == '>')
113 break;
114 dst[o++] = p[i];
115 }
116 dst[o] = 0;
117 return i + 1;
118}
119
120// hmh..
64c59faf 121static const char *known_type_mod[] = {
122 "const",
123 "signed",
124 "unsigned",
3ebea2cf 125 "struct",
126 "enum",
840257f6 127 "CONST",
64c59faf 128};
129
3ebea2cf 130static const char *known_ptr_types[] = {
e56ab892 131 "HACCEL",
c36e914d 132 "HANDLE",
e56ab892 133 "HBITMAP",
134 "HCURSOR",
3ebea2cf 135 "HDC",
a652aa9f 136 "HFONT",
3ebea2cf 137 "HGDIOBJ",
e56ab892 138 "HGLOBAL",
139 "HINSTANCE",
140 "HMODULE",
141 "HRGN",
142 "HRSRC",
143 "HKEY",
144 "HMENU",
145 "HWND",
3ebea2cf 146 "PLONG",
147 "PDWORD",
148 "PVOID",
149 "PCVOID",
e56ab892 150 "DLGPROC",
4f12f671 151 "va_list",
152 "__VALIST",
3ebea2cf 153};
154
155static const char *ignored_keywords[] = {
156 "extern",
157 "WINBASEAPI",
158 "WINUSERAPI",
4f12f671 159 "WINGDIAPI",
160 "WINADVAPI",
c36e914d 161};
162
64c59faf 163// returns ptr to char after type ends
3ebea2cf 164static int typecmp(const char *n, const char *t)
39b168b8 165{
166 for (; *t != 0; n++, t++) {
167 while (n[0] == ' ' && (n[1] == ' ' || n[1] == '*'))
168 n++;
169 while (t[0] == ' ' && (t[1] == ' ' || t[1] == '*'))
170 t++;
171 if (*n != *t)
3ebea2cf 172 return *n - *t;
39b168b8 173 }
174
3ebea2cf 175 return 0;
39b168b8 176}
177
3ebea2cf 178static const char *skip_type_mod(const char *n)
c36e914d 179{
64c59faf 180 int len;
39b168b8 181 int i;
c36e914d 182
64c59faf 183 for (i = 0; i < ARRAY_SIZE(known_type_mod); i++) {
184 len = strlen(known_type_mod[i]);
185 if (strncmp(n, known_type_mod[i], len) != 0)
186 continue;
bd96f656 187 if (!my_isblank(n[len]))
188 continue;
64c59faf 189
190 n += len;
191 while (my_isblank(*n))
192 n++;
193 i = 0;
194 }
195
3ebea2cf 196 return n;
197}
198
199static int check_type(const char *name, struct parsed_type *type)
200{
201 const char *n, *n1;
202 int ret = -1;
203 int i;
204
205 n = skip_type_mod(name);
206
207 for (i = 0; i < ARRAY_SIZE(known_ptr_types); i++) {
208 if (typecmp(n, known_ptr_types[i]))
64c59faf 209 continue;
210
3ebea2cf 211 type->is_ptr = 1;
212 break;
c36e914d 213 }
214
a652aa9f 215 if (n[0] == 'L' && n[1] == 'P' && strncmp(n, "LPARAM", 6))
3ebea2cf 216 type->is_ptr = 1;
217
218 // assume single word
219 while (!my_isblank(*n) && !my_issep(*n))
220 n++;
221
222 while (1) {
223 n1 = n;
224 while (my_isblank(*n))
225 n++;
226 if (*n == '*') {
227 type->is_ptr = 1;
228 n++;
229 continue;
230 }
231 break;
232 }
233
234 ret = n1 - name;
235 type->name = strndup(name, ret);
236 return ret;
c36e914d 237}
238
239/* args are always expanded to 32bit */
240static const char *map_reg(const char *reg)
241{
242 const char *regs_f[] = { "eax", "ebx", "ecx", "edx", "esi", "edi" };
243 const char *regs_w[] = { "ax", "bx", "cx", "dx", "si", "di" };
244 const char *regs_b[] = { "al", "bl", "cl", "dl" };
245 int i;
246
247 for (i = 0; i < ARRAY_SIZE(regs_w); i++)
248 if (IS(reg, regs_w[i]))
249 return regs_f[i];
250
251 for (i = 0; i < ARRAY_SIZE(regs_b); i++)
252 if (IS(reg, regs_b[i]))
253 return regs_f[i];
254
255 return reg;
256}
257
a652aa9f 258static int check_struct_arg(struct parsed_proto_arg *arg)
259{
260 if (IS(arg->type.name, "POINT"))
261 return 2 - 1;
262
263 return 0;
264}
265
c36e914d 266static int parse_protostr(char *protostr, struct parsed_proto *pp)
267{
39b168b8 268 struct parsed_proto_arg *arg;
c36e914d 269 char regparm[16];
270 char buf[256];
271 char cconv[32];
c36e914d 272 int xarg = 0;
39b168b8 273 char *p, *p1;
3ebea2cf 274 int i, l;
c36e914d 275 int ret;
c36e914d 276
06c5d854 277 p = sskip(protostr);
c36e914d 278 if (p[0] == '/' && p[1] == '/') {
06c5d854 279 printf("%s:%d: commented out?\n", hdrfn, hdrfline);
c36e914d 280 p = sskip(p + 2);
281 }
282
06c5d854 283 // strip unneeded stuff
284 for (p1 = p; p1[0] != 0 && p1[1] != 0; p1++) {
285 if ((p1[0] == '/' && p1[1] == '*')
286 || (p1[0] == '*' && p1[1] == '/'))
287 p1[0] = p1[1] = ' ';
288 }
289
e56ab892 290 if (!strncmp(p, "DECLSPEC_NORETURN ", 18)) {
291 pp->is_noreturn = 1;
292 p = sskip(p + 18);
293 }
294
3ebea2cf 295 for (i = 0; i < ARRAY_SIZE(ignored_keywords); i++) {
296 l = strlen(ignored_keywords[i]);
297 if (!strncmp(p, ignored_keywords[i], l) && my_isblank(p[l]))
298 p = sskip(p + l + 1);
299 }
06c5d854 300
3ebea2cf 301 ret = check_type(p, &pp->ret_type);
302 if (ret <= 0) {
63df67be 303 printf("%s:%d:%zd: unhandled return in '%s'\n",
c36e914d 304 hdrfn, hdrfline, (p - protostr) + 1, protostr);
39b168b8 305 return -1;
c36e914d 306 }
64c59faf 307 p = sskip(p + ret);
c36e914d 308
06c5d854 309 if (!strchr(p, ')')) {
310 p = next_idt(buf, sizeof(buf), p);
311 p = sskip(p);
312 if (buf[0] == 0) {
63df67be 313 printf("%s:%d:%zd: var name missing\n",
06c5d854 314 hdrfn, hdrfline, (p - protostr) + 1);
315 return -1;
316 }
317 strcpy(pp->name, buf);
318
319 p1 = strchr(p, ']');
320 if (p1 != NULL) {
321 p = p1 + 1;
3ebea2cf 322 pp->ret_type.is_array = 1;
06c5d854 323 }
324 return p - protostr;
325 }
326
327 pp->is_func = 1;
328
39b168b8 329 if (*p == '(') {
330 pp->is_fptr = 1;
331 p = sskip(p + 1);
332 }
333
c36e914d 334 p = next_word(cconv, sizeof(cconv), p);
335 p = sskip(p);
336 if (cconv[0] == 0) {
63df67be 337 printf("%s:%d:%zd: cconv missing\n",
c36e914d 338 hdrfn, hdrfline, (p - protostr) + 1);
39b168b8 339 return -1;
c36e914d 340 }
341 if (IS(cconv, "__cdecl"))
342 pp->is_stdcall = 0;
343 else if (IS(cconv, "__stdcall"))
344 pp->is_stdcall = 1;
345 else if (IS(cconv, "__fastcall"))
346 pp->is_stdcall = 1;
347 else if (IS(cconv, "__thiscall"))
348 pp->is_stdcall = 1;
349 else if (IS(cconv, "__userpurge"))
de50b98b 350 pp->is_stdcall = 1; // IDA
c36e914d 351 else if (IS(cconv, "__usercall"))
de50b98b 352 pp->is_stdcall = 0; // IDA
64c59faf 353 else if (IS(cconv, "WINAPI"))
354 pp->is_stdcall = 1;
c36e914d 355 else {
63df67be 356 printf("%s:%d:%zd: unhandled cconv: '%s'\n",
c36e914d 357 hdrfn, hdrfline, (p - protostr) + 1, cconv);
39b168b8 358 return -1;
359 }
360
361 if (pp->is_fptr) {
362 if (*p != '*') {
63df67be 363 printf("%s:%d:%zd: '*' expected\n",
39b168b8 364 hdrfn, hdrfline, (p - protostr) + 1);
365 return -1;
366 }
bd96f656 367 p++;
368 // XXX: skipping extra asterisks, for now
369 while (*p == '*')
370 p++;
371 p = sskip(p);
c36e914d 372 }
373
374 p = next_idt(buf, sizeof(buf), p);
375 p = sskip(p);
376 if (buf[0] == 0) {
de50b98b 377 //printf("%s:%d:%zd: func name missing\n",
378 // hdrfn, hdrfline, (p - protostr) + 1);
379 //return -1;
c36e914d 380 }
39b168b8 381 strcpy(pp->name, buf);
c36e914d 382
383 ret = get_regparm(regparm, sizeof(regparm), p);
384 if (ret > 0) {
385 if (!IS(regparm, "eax") && !IS(regparm, "ax")
2b43685d 386 && !IS(regparm, "al") && !IS(regparm, "edx:eax"))
c36e914d 387 {
63df67be 388 printf("%s:%d:%zd: bad regparm: %s\n",
c36e914d 389 hdrfn, hdrfline, (p - protostr) + 1, regparm);
39b168b8 390 return -1;
c36e914d 391 }
392 p += ret;
393 p = sskip(p);
394 }
395
39b168b8 396 if (pp->is_fptr) {
bd96f656 397 if (*p == '[') {
398 // not really ret_type is array, but ohwell
399 pp->ret_type.is_array = 1;
400 p = strchr(p + 1, ']');
401 if (p == NULL) {
402 printf("%s:%d:%zd: ']' expected\n",
403 hdrfn, hdrfline, (p - protostr) + 1);
404 return -1;
405 }
406 p = sskip(p + 1);
407 }
39b168b8 408 if (*p != ')') {
63df67be 409 printf("%s:%d:%zd: ')' expected\n",
39b168b8 410 hdrfn, hdrfline, (p - protostr) + 1);
411 return -1;
412 }
413 p = sskip(p + 1);
414 }
415
c36e914d 416 if (*p != '(') {
63df67be 417 printf("%s:%d:%zd: '(' expected, got '%c'\n",
c36e914d 418 hdrfn, hdrfline, (p - protostr) + 1, *p);
39b168b8 419 return -1;
c36e914d 420 }
421 p++;
422
39b168b8 423 // check for x(void)
424 p = sskip(p);
06c5d854 425 if ((!strncmp(p, "void", 4) || !strncmp(p, "VOID", 4))
426 && *sskip(p + 4) == ')')
39b168b8 427 p += 4;
428
c36e914d 429 while (1) {
430 p = sskip(p);
39b168b8 431 if (*p == ')') {
432 p++;
c36e914d 433 break;
39b168b8 434 }
840257f6 435 if (xarg > 0) {
436 if (*p != ',') {
437 printf("%s:%d:%zd: ',' expected\n",
438 hdrfn, hdrfline, (p - protostr) + 1);
439 return -1;
440 }
c36e914d 441 p = sskip(p + 1);
840257f6 442 }
c36e914d 443
7ba45c34 444 if (!strncmp(p, "...", 3)) {
445 pp->is_vararg = 1;
446 p = sskip(p + 3);
447 if (*p == ')') {
448 p++;
449 break;
450 }
63df67be 451 printf("%s:%d:%zd: ')' expected\n",
7ba45c34 452 hdrfn, hdrfline, (p - protostr) + 1);
453 return -1;
454 }
455
39b168b8 456 arg = &pp->arg[xarg];
c36e914d 457 xarg++;
458
39b168b8 459 p1 = p;
3ebea2cf 460 ret = check_type(p, &arg->type);
461 if (ret <= 0) {
63df67be 462 printf("%s:%d:%zd: unhandled type for arg%d\n",
c36e914d 463 hdrfn, hdrfline, (p - protostr) + 1, xarg);
39b168b8 464 return -1;
c36e914d 465 }
64c59faf 466 p = sskip(p + ret);
c36e914d 467
39b168b8 468 if (*p == '(') {
469 // func ptr
470 arg->fptr = calloc(1, sizeof(*arg->fptr));
471 ret = parse_protostr(p1, arg->fptr);
472 if (ret < 0) {
63df67be 473 printf("%s:%d:%zd: funcarg parse failed\n",
39b168b8 474 hdrfn, hdrfline, p1 - protostr);
475 return -1;
476 }
477 // we'll treat it as void * for non-calls
a652aa9f 478 arg->type.name = strdup("void *");
3ebea2cf 479 arg->type.is_ptr = 1;
39b168b8 480
481 p = p1 + ret;
482 }
483
c36e914d 484 p = next_idt(buf, sizeof(buf), p);
485 p = sskip(p);
486#if 0
487 if (buf[0] == 0) {
63df67be 488 printf("%s:%d:%zd: idt missing for arg%d\n",
c36e914d 489 hdrfn, hdrfline, (p - protostr) + 1, xarg);
39b168b8 490 return -1;
c36e914d 491 }
492#endif
39b168b8 493 arg->reg = NULL;
c36e914d 494
495 ret = get_regparm(regparm, sizeof(regparm), p);
496 if (ret > 0) {
497 p += ret;
498 p = sskip(p);
499
39b168b8 500 arg->reg = strdup(map_reg(regparm));
c36e914d 501 }
a652aa9f 502
503 ret = check_struct_arg(arg);
504 if (ret > 0) {
505 pp->has_structarg = 1;
506 arg->type.is_struct = 1;
507 free(arg->type.name);
508 arg->type.name = strdup("int");
509 for (l = 0; l < ret; l++) {
510 pp_copy_arg(&pp->arg[xarg], arg);
511 xarg++;
512 }
513 }
c36e914d 514 }
515
516 if (xarg > 0 && (IS(cconv, "__fastcall") || IS(cconv, "__thiscall"))) {
517 if (pp->arg[0].reg != NULL) {
518 printf("%s:%d: %s with arg1 spec %s?\n",
519 hdrfn, hdrfline, cconv, pp->arg[0].reg);
520 }
521 pp->arg[0].reg = strdup("ecx");
522 }
523
524 if (xarg > 1 && IS(cconv, "__fastcall")) {
525 if (pp->arg[1].reg != NULL) {
526 printf("%s:%d: %s with arg2 spec %s?\n",
527 hdrfn, hdrfline, cconv, pp->arg[1].reg);
528 }
529 pp->arg[1].reg = strdup("edx");
530 }
531
7ba45c34 532 if (pp->is_vararg && pp->is_stdcall) {
533 printf("%s:%d: vararg stdcall?\n", hdrfn, hdrfline);
534 return -1;
535 }
536
c36e914d 537 pp->argc = xarg;
538
539 for (i = 0; i < pp->argc; i++) {
540 if (pp->arg[i].reg == NULL)
541 pp->argc_stack++;
542 else
543 pp->argc_reg++;
544 }
545
39b168b8 546 return p - protostr;
c36e914d 547}
548
bd96f656 549static int pp_name_cmp(const void *p1, const void *p2)
550{
551 const struct parsed_proto *pp1 = p1, *pp2 = p2;
552 return strcmp(pp1->name, pp2->name);
553}
554
555static struct parsed_proto *pp_cache;
556static int pp_cache_size;
557static int pp_cache_alloc;
558
559static int b_pp_c_handler(char *proto, const char *fname)
560{
561 int ret;
562
563 if (pp_cache_size >= pp_cache_alloc) {
564 pp_cache_alloc = pp_cache_alloc * 2 + 64;
565 pp_cache = realloc(pp_cache, pp_cache_alloc
566 * sizeof(pp_cache[0]));
567 my_assert_not(pp_cache, NULL);
568 memset(pp_cache + pp_cache_size, 0,
569 (pp_cache_alloc - pp_cache_size)
570 * sizeof(pp_cache[0]));
571 }
572
573 ret = parse_protostr(proto, &pp_cache[pp_cache_size]);
574 if (ret < 0)
575 return -1;
576
577 pp_cache_size++;
578 return 0;
579}
580
581static void build_pp_cache(FILE *fhdr)
c36e914d 582{
c36e914d 583 int ret;
584
bd96f656 585 rewind(fhdr);
586
587 ret = do_protostrs(fhdr, hdrfn);
588 if (ret < 0)
589 exit(1);
590
591 qsort(pp_cache, pp_cache_size, sizeof(pp_cache[0]), pp_name_cmp);
592}
593
594static const struct parsed_proto *proto_parse(FILE *fhdr, const char *sym)
595{
596 const struct parsed_proto *pp_ret;
597 struct parsed_proto pp_search;
598
599 if (pp_cache == NULL)
600 build_pp_cache(fhdr);
601
602 if (sym[0] == '_') // && strncmp(fname, "stdc", 4) == 0)
603 sym++;
c36e914d 604
bd96f656 605 strcpy(pp_search.name, sym);
606 pp_ret = bsearch(&pp_search, pp_cache, pp_cache_size,
607 sizeof(pp_cache[0]), pp_name_cmp);
608 if (pp_ret == NULL)
c36e914d 609 printf("%s: sym '%s' is missing\n", hdrfn, sym);
bd96f656 610
611 return pp_ret;
612}
613
a652aa9f 614static void pp_copy_arg(struct parsed_proto_arg *d,
615 const struct parsed_proto_arg *s)
616{
617 memcpy(d, s, sizeof(*d));
618
619 if (s->reg != NULL) {
620 d->reg = strdup(s->reg);
621 my_assert_not(d->reg, NULL);
622 }
623 if (s->type.name != NULL) {
624 d->type.name = strdup(s->type.name);
625 my_assert_not(d->type.name, NULL);
626 }
627 if (s->fptr != NULL) {
628 d->fptr = malloc(sizeof(*d->fptr));
629 my_assert_not(d->fptr, NULL);
630 memcpy(d->fptr, s->fptr, sizeof(*d->fptr));
631 }
632}
633
bd96f656 634struct parsed_proto *proto_clone(const struct parsed_proto *pp_c)
635{
636 struct parsed_proto *pp;
637 int i;
638
639 pp = malloc(sizeof(*pp));
640 my_assert_not(pp, NULL);
641 memcpy(pp, pp_c, sizeof(*pp)); // lazy..
642
643 // do the actual deep copy..
a652aa9f 644 for (i = 0; i < pp_c->argc; i++)
645 pp_copy_arg(&pp->arg[i], &pp_c->arg[i]);
bd96f656 646 if (pp_c->ret_type.name != NULL)
647 pp->ret_type.name = strdup(pp_c->ret_type.name);
c36e914d 648
bd96f656 649 return pp;
c36e914d 650}
651
bd96f656 652static inline void proto_release(struct parsed_proto *pp)
c36e914d 653{
654 int i;
655
656 for (i = 0; i < pp->argc; i++) {
39b168b8 657 if (pp->arg[i].reg != NULL)
c36e914d 658 free(pp->arg[i].reg);
3ebea2cf 659 if (pp->arg[i].type.name != NULL)
660 free(pp->arg[i].type.name);
39b168b8 661 if (pp->arg[i].fptr != NULL)
662 free(pp->arg[i].fptr);
c36e914d 663 }
3ebea2cf 664 if (pp->ret_type.name != NULL)
665 free(pp->ret_type.name);
bd96f656 666 free(pp);
c36e914d 667}