yet more stuff for storm
[ia32rtools.git] / tools / protoparse.h
1
2 struct parsed_proto;
3
4 struct parsed_type {
5         char *name;
6         unsigned int is_array:1;
7         unsigned int is_ptr:1;
8         unsigned int is_struct:1; // split for args
9 };
10
11 struct parsed_proto_arg {
12         char *reg;
13         struct parsed_type type;
14         struct parsed_proto *fptr;
15         void *datap;
16 };
17
18 struct parsed_proto {
19         char name[256];
20         union {
21                 struct parsed_type ret_type;
22                 struct parsed_type type;
23         };
24         struct parsed_proto_arg arg[16];
25         int argc;
26         int argc_stack;
27         int argc_reg;
28         unsigned int is_func:1;
29         unsigned int is_stdcall:1;
30         unsigned int is_fastcall:1;
31         unsigned int is_vararg:1;     // vararg func
32         unsigned int is_fptr:1;
33         unsigned int is_noreturn:1;
34         unsigned int is_unresolved:1;
35         unsigned int is_arg:1;        // decl in func arg
36         unsigned int has_structarg:1;
37 };
38
39 static const char *hdrfn;
40 static int hdrfline = 0;
41
42 static void pp_copy_arg(struct parsed_proto_arg *d,
43         const struct parsed_proto_arg *s);
44
45 static int b_pp_c_handler(char *proto, const char *fname);
46
47 static int do_protostrs(FILE *fhdr, const char *fname)
48 {
49         const char *finc_name;
50         const char *hdrfn_saved;
51         char protostr[256];
52         char path[256];
53         char fname_inc[256];
54         FILE *finc;
55         int line = 0;
56         int ret;
57         char *p;
58
59         hdrfn_saved = hdrfn;
60         hdrfn = fname;
61
62         while (fgets(protostr, sizeof(protostr), fhdr))
63         {
64                 line++;
65                 if (strncmp(protostr, "//#include ", 11) == 0) {
66                         finc_name = protostr + 11;
67                         p = strpbrk(finc_name, "\r\n ");
68                         if (p != NULL)
69                                 *p = 0;
70
71                         path[0] = 0;
72                         p = strrchr(hdrfn_saved, '/');
73                         if (p) {
74                                 memcpy(path, hdrfn_saved,
75                                         p - hdrfn_saved + 1);
76                                 path[p - hdrfn_saved + 1] = 0;
77                         }
78                         snprintf(fname_inc, sizeof(fname_inc), "%s%s", 
79                                 path, finc_name);
80                         finc = fopen(fname_inc, "r");
81                         if (finc == NULL) {
82                                 printf("%s:%d: can't open '%s'\n",
83                                         fname_inc, line, finc_name);
84                                 continue;
85                         }
86                         ret = do_protostrs(finc, finc_name);
87                         fclose(finc);
88                         if (ret < 0)
89                                 break;
90                         continue;
91                 }
92                 if (strncmp(sskip(protostr), "//", 2) == 0)
93                         continue;
94
95                 p = protostr + strlen(protostr);
96                 for (p--; p >= protostr && my_isblank(*p); --p)
97                         *p = 0;
98                 if (p < protostr)
99                         continue;
100
101                 hdrfline = line;
102
103                 ret = b_pp_c_handler(protostr, hdrfn);
104                 if (ret < 0)
105                         break;
106         }
107
108         hdrfn = hdrfn_saved;
109
110         if (feof(fhdr))
111                 return 0;
112
113         return -1;
114 }
115
116 static int get_regparm(char *dst, size_t dlen, char *p)
117 {
118         int i, o;
119
120         if (*p != '<')
121                 return 0;
122
123         for (o = 0, i = 1; o < dlen; i++) {
124                 if (p[i] == 0)
125                         return 0;
126                 if (p[i] == '>')
127                         break;
128                 dst[o++] = p[i];
129         }
130         dst[o] = 0;
131         return i + 1;
132 }
133
134 // hmh..
135 static const char *known_type_mod[] = {
136         "const",
137         "signed",
138         "unsigned",
139         "struct",
140         "enum",
141         "CONST",
142         "volatile",
143 };
144
145 static const char *known_ptr_types[] = {
146         "FARPROC",
147         "WNDPROC",
148         "HACCEL",
149         "HANDLE",
150         "HBITMAP",
151         "HCURSOR",
152         "HDC",
153         "HFONT",
154         "HGDIOBJ",
155         "HGLOBAL",
156         "HICON",
157         "HINSTANCE",
158         //"HIMC", // DWORD
159         "HMODULE",
160         "HPALETTE",
161         "HRGN",
162         "HRSRC",
163         "HKEY",
164         "HMENU",
165         "HWND",
166         "PBYTE",
167         "PCRITICAL_SECTION",
168         "PDWORD",
169         "PFILETIME",
170         "PHKEY",
171         "PLONG",
172         "PMEMORY_BASIC_INFORMATION",
173         "PUINT",
174         "PVOID",
175         "PCVOID",
176         "PWORD",
177         "DLGPROC",
178         "TIMERPROC",
179         "WNDENUMPROC",
180         "va_list",
181         "__VALIST",
182 };
183
184 static const char *ignored_keywords[] = {
185         "extern",
186         "WINBASEAPI",
187         "WINUSERAPI",
188         "WINGDIAPI",
189         "WINADVAPI",
190 };
191
192 // returns ptr to char after type ends
193 static int typecmp(const char *n, const char *t)
194 {
195         for (; *t != 0; n++, t++) {
196                 while (n[0] == ' ' && (n[1] == ' ' || n[1] == '*'))
197                         n++;
198                 while (t[0] == ' ' && (t[1] == ' ' || t[1] == '*'))
199                         t++;
200                 if (*n != *t)
201                         return *n - *t;
202         }
203
204         return 0;
205 }
206
207 static const char *skip_type_mod(const char *n)
208 {
209         int len;
210         int i;
211
212         for (i = 0; i < ARRAY_SIZE(known_type_mod); i++) {
213                 len = strlen(known_type_mod[i]);
214                 if (strncmp(n, known_type_mod[i], len) != 0)
215                         continue;
216                 if (!my_isblank(n[len]))
217                         continue;
218
219                 n += len;
220                 while (my_isblank(*n))
221                         n++;
222                 i = 0;
223         }
224
225         return n;
226 }
227
228 static int check_type(const char *name, struct parsed_type *type)
229 {
230         const char *n, *n1;
231         int ret = -1;
232         int i;
233
234         n = skip_type_mod(name);
235
236         for (i = 0; i < ARRAY_SIZE(known_ptr_types); i++) {
237                 if (typecmp(n, known_ptr_types[i]))
238                         continue;
239
240                 type->is_ptr = 1;
241                 break;
242         }
243
244         if (n[0] == 'L' && n[1] == 'P' && strncmp(n, "LPARAM", 6))
245                 type->is_ptr = 1;
246
247         // assume single word
248         while (!my_isblank(*n) && !my_issep(*n))
249                 n++;
250
251         while (1) {
252                 n1 = n;
253                 while (my_isblank(*n))
254                         n++;
255                 if (*n == '*') {
256                         type->is_ptr = 1;
257                         n++;
258                         continue;
259                 }
260                 break;
261         }
262
263         ret = n1 - name;
264         type->name = strndup(name, ret);
265         if (IS(type->name, "VOID"))
266                 memcpy(type->name, "void", 4);
267
268         return ret;
269 }
270
271 /* args are always expanded to 32bit */
272 static const char *map_reg(const char *reg)
273 {
274         const char *regs_f[] = { "eax", "ebx", "ecx", "edx", "esi", "edi" };
275         const char *regs_w[] = { "ax",  "bx",  "cx",  "dx",  "si",  "di" };
276         const char *regs_b[] = { "al",  "bl",  "cl",  "dl" };
277         int i;
278
279         for (i = 0; i < ARRAY_SIZE(regs_w); i++)
280                 if (IS(reg, regs_w[i]))
281                         return regs_f[i];
282
283         for (i = 0; i < ARRAY_SIZE(regs_b); i++)
284                 if (IS(reg, regs_b[i]))
285                         return regs_f[i];
286
287         return reg;
288 }
289
290 static int check_struct_arg(struct parsed_proto_arg *arg)
291 {
292         if (IS(arg->type.name, "POINT"))
293                 return 2 - 1;
294
295         return 0;
296 }
297
298 static int parse_protostr(char *protostr, struct parsed_proto *pp)
299 {
300         struct parsed_proto_arg *arg;
301         char regparm[16];
302         char buf[256];
303         char cconv[32];
304         int xarg = 0;
305         char *p, *p1;
306         int i, l;
307         int ret;
308
309         p = sskip(protostr);
310         if (p[0] == '/' && p[1] == '/') {
311                 printf("%s:%d: commented out?\n", hdrfn, hdrfline);
312                 p = sskip(p + 2);
313         }
314
315         // strip unneeded stuff
316         for (p1 = p; p1[0] != 0 && p1[1] != 0; p1++) {
317                 if ((p1[0] == '/' && p1[1] == '*')
318                  || (p1[0] == '*' && p1[1] == '/'))
319                         p1[0] = p1[1] = ' ';
320         }
321
322         if (!strncmp(p, "DECLSPEC_NORETURN ", 18)) {
323                 pp->is_noreturn = 1;
324                 p = sskip(p + 18);
325         }
326
327         for (i = 0; i < ARRAY_SIZE(ignored_keywords); i++) {
328                 l = strlen(ignored_keywords[i]);
329                 if (!strncmp(p, ignored_keywords[i], l) && my_isblank(p[l]))
330                         p = sskip(p + l + 1);
331         }
332
333         ret = check_type(p, &pp->ret_type);
334         if (ret <= 0) {
335                 printf("%s:%d:%zd: unhandled return in '%s'\n",
336                         hdrfn, hdrfline, (p - protostr) + 1, protostr);
337                 return -1;
338         }
339         p = sskip(p + ret);
340
341         if (!strncmp(p, "noreturn ", 9)) {
342                 pp->is_noreturn = 1;
343                 p = sskip(p + 9);
344         }
345
346         if (!strchr(p, ')')) {
347                 p = next_idt(buf, sizeof(buf), p);
348                 p = sskip(p);
349                 if (buf[0] == 0) {
350                         printf("%s:%d:%zd: var name missing\n",
351                                 hdrfn, hdrfline, (p - protostr) + 1);
352                         return -1;
353                 }
354                 strcpy(pp->name, buf);
355
356                 p1 = strchr(p, ']');
357                 if (p1 != NULL) {
358                         p = p1 + 1;
359                         pp->ret_type.is_array = 1;
360                 }
361                 return p - protostr;
362         }
363
364         pp->is_func = 1;
365
366         if (*p == '(') {
367                 pp->is_fptr = 1;
368                 p = sskip(p + 1);
369         }
370
371         p = next_word(cconv, sizeof(cconv), p);
372         p = sskip(p);
373         if (cconv[0] == 0) {
374                 printf("%s:%d:%zd: cconv missing\n",
375                         hdrfn, hdrfline, (p - protostr) + 1);
376                 return -1;
377         }
378         if      (IS(cconv, "__cdecl"))
379                 pp->is_stdcall = 0;
380         else if (IS(cconv, "__stdcall"))
381                 pp->is_stdcall = 1;
382         else if (IS(cconv, "__fastcall")) {
383                 pp->is_fastcall = 1;
384                 pp->is_stdcall = 1; // sort of..
385         }
386         else if (IS(cconv, "__thiscall"))
387                 pp->is_stdcall = 1;
388         else if (IS(cconv, "__userpurge"))
389                 pp->is_stdcall = 1; // IDA
390         else if (IS(cconv, "__usercall"))
391                 pp->is_stdcall = 0; // IDA
392         else if (IS(cconv, "WINAPI"))
393                 pp->is_stdcall = 1;
394         else {
395                 printf("%s:%d:%zd: unhandled cconv: '%s'\n",
396                         hdrfn, hdrfline, (p - protostr) + 1, cconv);
397                 return -1;
398         }
399
400         if (pp->is_fptr) {
401                 if (*p != '*') {
402                         printf("%s:%d:%zd: '*' expected\n",
403                                 hdrfn, hdrfline, (p - protostr) + 1);
404                         return -1;
405                 }
406                 p++;
407                 // XXX: skipping extra asterisks, for now
408                 while (*p == '*')
409                         p++;
410                 p = sskip(p);
411         }
412
413         p = next_idt(buf, sizeof(buf), p);
414         p = sskip(p);
415         if (buf[0] == 0) {
416                 //printf("%s:%d:%zd: func name missing\n",
417                 //      hdrfn, hdrfline, (p - protostr) + 1);
418                 //return -1;
419         }
420         strcpy(pp->name, buf);
421
422         ret = get_regparm(regparm, sizeof(regparm), p);
423         if (ret > 0) {
424                 if (!IS(regparm, "eax") && !IS(regparm, "ax")
425                  && !IS(regparm, "al") && !IS(regparm, "edx:eax"))
426                 {
427                         printf("%s:%d:%zd: bad regparm: %s\n",
428                                 hdrfn, hdrfline, (p - protostr) + 1, regparm);
429                         return -1;
430                 }
431                 p += ret;
432                 p = sskip(p);
433         }
434
435         if (pp->is_fptr) {
436                 if (*p == '[') {
437                         // not really ret_type is array, but ohwell
438                         pp->ret_type.is_array = 1;
439                         p = strchr(p + 1, ']');
440                         if (p == NULL) {
441                                 printf("%s:%d:%zd: ']' expected\n",
442                                  hdrfn, hdrfline, (p - protostr) + 1);
443                                 return -1;
444                         }
445                         p = sskip(p + 1);
446                 }
447                 if (*p != ')') {
448                         printf("%s:%d:%zd: ')' expected\n",
449                                 hdrfn, hdrfline, (p - protostr) + 1);
450                         return -1;
451                 }
452                 p = sskip(p + 1);
453         }
454
455         if (*p != '(') {
456                 printf("%s:%d:%zd: '(' expected, got '%c'\n",
457                                 hdrfn, hdrfline, (p - protostr) + 1, *p);
458                 return -1;
459         }
460         p++;
461
462         // check for x(void)
463         p = sskip(p);
464         if ((!strncmp(p, "void", 4) || !strncmp(p, "VOID", 4))
465            && *sskip(p + 4) == ')')
466                 p += 4;
467
468         while (1) {
469                 p = sskip(p);
470                 if (*p == ')') {
471                         p++;
472                         break;
473                 }
474                 if (xarg > 0) {
475                         if (*p != ',') {
476                                 printf("%s:%d:%zd: ',' expected\n",
477                                  hdrfn, hdrfline, (p - protostr) + 1);
478                                 return -1;
479                         }
480                         p = sskip(p + 1);
481                 }
482
483                 if (!strncmp(p, "...", 3)) {
484                         pp->is_vararg = 1;
485                         p = sskip(p + 3);
486                         if (*p == ')') {
487                                 p++;
488                                 break;
489                         }
490                         printf("%s:%d:%zd: ')' expected\n",
491                                 hdrfn, hdrfline, (p - protostr) + 1);
492                         return -1;
493                 }
494
495                 arg = &pp->arg[xarg];
496                 xarg++;
497
498                 p1 = p;
499                 ret = check_type(p, &arg->type);
500                 if (ret <= 0) {
501                         printf("%s:%d:%zd: unhandled type for arg%d\n",
502                                 hdrfn, hdrfline, (p - protostr) + 1, xarg);
503                         return -1;
504                 }
505                 p = sskip(p + ret);
506
507                 if (*p == '(') {
508                         // func ptr
509                         arg->fptr = calloc(1, sizeof(*arg->fptr));
510                         ret = parse_protostr(p1, arg->fptr);
511                         if (ret < 0) {
512                                 printf("%s:%d:%zd: funcarg parse failed\n",
513                                         hdrfn, hdrfline, p1 - protostr);
514                                 return -1;
515                         }
516                         arg->fptr->is_arg = 1;
517                         // we don't use actual names right now..
518                         snprintf(arg->fptr->name,
519                                 sizeof(arg->fptr->name), "a%d", xarg);
520                         // we'll treat it as void * for non-calls
521                         arg->type.name = strdup("void *");
522                         arg->type.is_ptr = 1;
523
524                         p = p1 + ret;
525                 }
526
527                 p = next_idt(buf, sizeof(buf), p);
528                 p = sskip(p);
529 #if 0
530                 if (buf[0] == 0) {
531                         printf("%s:%d:%zd: idt missing for arg%d\n",
532                                 hdrfn, hdrfline, (p - protostr) + 1, xarg);
533                         return -1;
534                 }
535 #endif
536                 arg->reg = NULL;
537
538                 ret = get_regparm(regparm, sizeof(regparm), p);
539                 if (ret > 0) {
540                         p += ret;
541                         p = sskip(p);
542
543                         arg->reg = strdup(map_reg(regparm));
544                 }
545
546                 if (strstr(arg->type.name, "int64")
547                     || IS(arg->type.name, "double"))
548                 {
549                         // hack..
550                         free(arg->type.name);
551                         arg->type.name = strdup("int");
552                         pp_copy_arg(&pp->arg[xarg], arg);
553                         xarg++;
554                 }
555
556                 ret = check_struct_arg(arg);
557                 if (ret > 0) {
558                         pp->has_structarg = 1;
559                         arg->type.is_struct = 1;
560                         free(arg->type.name);
561                         arg->type.name = strdup("int");
562                         for (l = 0; l < ret; l++) {
563                                 pp_copy_arg(&pp->arg[xarg], arg);
564                                 xarg++;
565                         }
566                 }
567         }
568
569         if (xarg > 0 && (IS(cconv, "__fastcall") || IS(cconv, "__thiscall"))) {
570                 if (pp->arg[0].reg != NULL) {
571                         printf("%s:%d: %s with arg1 spec %s?\n",
572                                 hdrfn, hdrfline, cconv, pp->arg[0].reg);
573                 }
574                 pp->arg[0].reg = strdup("ecx");
575         }
576
577         if (xarg > 1 && IS(cconv, "__fastcall")) {
578                 if (pp->arg[1].reg != NULL) {
579                         printf("%s:%d: %s with arg2 spec %s?\n",
580                                 hdrfn, hdrfline, cconv, pp->arg[1].reg);
581                 }
582                 pp->arg[1].reg = strdup("edx");
583         }
584
585         pp->argc = xarg;
586
587         for (i = 0; i < pp->argc; i++) {
588                 if (pp->arg[i].reg == NULL)
589                         pp->argc_stack++;
590                 else
591                         pp->argc_reg++;
592         }
593
594         if (pp->argc == 1 && pp->arg[0].reg != NULL
595             && IS(pp->arg[0].reg, "ecx"))
596         {
597                 pp->is_fastcall = 1;
598         }
599         else if (pp->argc_reg == 2
600           && pp->arg[0].reg != NULL && IS(pp->arg[0].reg, "ecx")
601           && pp->arg[1].reg != NULL && IS(pp->arg[1].reg, "edx"))
602         {
603                 pp->is_fastcall = 1;
604         }
605
606         if (pp->is_vararg && (pp->is_stdcall || pp->is_fastcall)) {
607                 printf("%s:%d: vararg %s?\n", hdrfn, hdrfline, cconv);
608                 return -1;
609         }
610
611         return p - protostr;
612 }
613
614 static int pp_name_cmp(const void *p1, const void *p2)
615 {
616         const struct parsed_proto *pp1 = p1, *pp2 = p2;
617         return strcmp(pp1->name, pp2->name);
618 }
619
620 static struct parsed_proto *pp_cache;
621 static int pp_cache_size;
622 static int pp_cache_alloc;
623
624 static int b_pp_c_handler(char *proto, const char *fname)
625 {
626         int ret;
627
628         if (pp_cache_size >= pp_cache_alloc) {
629                 pp_cache_alloc = pp_cache_alloc * 2 + 64;
630                 pp_cache = realloc(pp_cache, pp_cache_alloc
631                                 * sizeof(pp_cache[0]));
632                 my_assert_not(pp_cache, NULL);
633                 memset(pp_cache + pp_cache_size, 0,
634                         (pp_cache_alloc - pp_cache_size)
635                          * sizeof(pp_cache[0]));
636         }
637
638         ret = parse_protostr(proto, &pp_cache[pp_cache_size]);
639         if (ret < 0)
640                 return -1;
641
642         pp_cache_size++;
643         return 0;
644 }
645
646 static void build_pp_cache(FILE *fhdr)
647 {
648         long pos;
649         int ret;
650
651         pos = ftell(fhdr);
652         rewind(fhdr);
653
654         ret = do_protostrs(fhdr, hdrfn);
655         if (ret < 0)
656                 exit(1);
657
658         qsort(pp_cache, pp_cache_size, sizeof(pp_cache[0]), pp_name_cmp);
659         fseek(fhdr, pos, SEEK_SET);
660 }
661
662 static const struct parsed_proto *proto_parse(FILE *fhdr, const char *sym,
663         int quiet)
664 {
665         const struct parsed_proto *pp_ret;
666         struct parsed_proto pp_search;
667
668         if (pp_cache == NULL)
669                 build_pp_cache(fhdr);
670
671         if (sym[0] == '_') // && strncmp(fname, "stdc", 4) == 0)
672                 sym++;
673
674         strcpy(pp_search.name, sym);
675         pp_ret = bsearch(&pp_search, pp_cache, pp_cache_size,
676                         sizeof(pp_cache[0]), pp_name_cmp);
677         if (pp_ret == NULL && !quiet)
678                 printf("%s: sym '%s' is missing\n", hdrfn, sym);
679
680         return pp_ret;
681 }
682
683 static void pp_copy_arg(struct parsed_proto_arg *d,
684         const struct parsed_proto_arg *s)
685 {
686         memcpy(d, s, sizeof(*d));
687
688         if (s->reg != NULL) {
689                 d->reg = strdup(s->reg);
690                 my_assert_not(d->reg, NULL);
691         }
692         if (s->type.name != NULL) {
693                 d->type.name = strdup(s->type.name);
694                 my_assert_not(d->type.name, NULL);
695         }
696         if (s->fptr != NULL) {
697                 d->fptr = malloc(sizeof(*d->fptr));
698                 my_assert_not(d->fptr, NULL);
699                 memcpy(d->fptr, s->fptr, sizeof(*d->fptr));
700         }
701 }
702
703 struct parsed_proto *proto_clone(const struct parsed_proto *pp_c)
704 {
705         struct parsed_proto *pp;
706         int i;
707
708         pp = malloc(sizeof(*pp));
709         my_assert_not(pp, NULL);
710         memcpy(pp, pp_c, sizeof(*pp)); // lazy..
711
712         // do the actual deep copy..
713         for (i = 0; i < pp_c->argc; i++)
714                 pp_copy_arg(&pp->arg[i], &pp_c->arg[i]);
715         if (pp_c->ret_type.name != NULL)
716                 pp->ret_type.name = strdup(pp_c->ret_type.name);
717
718         return pp;
719 }
720
721 static inline void pp_print(char *buf, size_t buf_size,
722   const struct parsed_proto *pp)
723 {
724   size_t l;
725   int i;
726
727   snprintf(buf, buf_size, "%s %s(", pp->ret_type.name, pp->name);
728   l = strlen(buf);
729
730   for (i = 0; i < pp->argc_reg; i++) {
731     snprintf(buf + l, buf_size - l, "%s%s",
732       i == 0 ? "" : ", ", pp->arg[i].reg);
733     l = strlen(buf);
734   }
735   if (pp->argc_stack > 0) {
736     snprintf(buf + l, buf_size - l, "%s{%d stack}",
737       i == 0 ? "" : ", ", pp->argc_stack);
738     l = strlen(buf);
739   }
740   snprintf(buf + l, buf_size - l, ")");
741 }
742
743 static inline void proto_release(struct parsed_proto *pp)
744 {
745         int i;
746
747         for (i = 0; i < pp->argc; i++) {
748                 if (pp->arg[i].reg != NULL)
749                         free(pp->arg[i].reg);
750                 if (pp->arg[i].type.name != NULL)
751                         free(pp->arg[i].type.name);
752                 if (pp->arg[i].fptr != NULL)
753                         free(pp->arg[i].fptr);
754         }
755         if (pp->ret_type.name != NULL)
756                 free(pp->ret_type.name);
757         free(pp);
758 }