handle decorated symbols better
[ia32rtools.git] / tools / protoparse.h
1
2 struct parsed_proto;
3
4 struct parsed_type {
5         char *name;
6         unsigned int is_array:1;
7         unsigned int is_ptr:1;
8         unsigned int is_struct:1; // split for args
9 };
10
11 struct parsed_proto_arg {
12         char *reg;
13         struct parsed_type type;
14         struct parsed_proto *fptr;
15         void *datap;
16 };
17
18 struct parsed_proto {
19         char name[256];
20         union {
21                 struct parsed_type ret_type;
22                 struct parsed_type type;
23         };
24         struct parsed_proto_arg arg[16];
25         int argc;
26         int argc_stack;
27         int argc_reg;
28         unsigned int is_func:1;
29         unsigned int is_stdcall:1;
30         unsigned int is_fastcall:1;
31         unsigned int is_vararg:1;     // vararg func
32         unsigned int is_fptr:1;
33         unsigned int is_noreturn:1;
34         unsigned int is_unresolved:1;
35         unsigned int is_arg:1;        // decl in func arg
36         unsigned int has_structarg:1;
37 };
38
39 static const char *hdrfn;
40 static int hdrfline = 0;
41
42 static void pp_copy_arg(struct parsed_proto_arg *d,
43         const struct parsed_proto_arg *s);
44
45 static int b_pp_c_handler(char *proto, const char *fname);
46
47 static int do_protostrs(FILE *fhdr, const char *fname)
48 {
49         const char *finc_name;
50         const char *hdrfn_saved;
51         char protostr[256];
52         char path[256];
53         char fname_inc[256];
54         FILE *finc;
55         int line = 0;
56         int ret;
57         char *p;
58
59         hdrfn_saved = hdrfn;
60         hdrfn = fname;
61
62         while (fgets(protostr, sizeof(protostr), fhdr))
63         {
64                 line++;
65                 if (strncmp(protostr, "//#include ", 11) == 0) {
66                         finc_name = protostr + 11;
67                         p = strpbrk(finc_name, "\r\n ");
68                         if (p != NULL)
69                                 *p = 0;
70
71                         path[0] = 0;
72                         p = strrchr(hdrfn_saved, '/');
73                         if (p) {
74                                 memcpy(path, hdrfn_saved,
75                                         p - hdrfn_saved + 1);
76                                 path[p - hdrfn_saved + 1] = 0;
77                         }
78                         snprintf(fname_inc, sizeof(fname_inc), "%s%s", 
79                                 path, finc_name);
80                         finc = fopen(fname_inc, "r");
81                         if (finc == NULL) {
82                                 printf("%s:%d: can't open '%s'\n",
83                                         fname_inc, line, finc_name);
84                                 continue;
85                         }
86                         ret = do_protostrs(finc, finc_name);
87                         fclose(finc);
88                         if (ret < 0)
89                                 break;
90                         continue;
91                 }
92                 if (strncmp(sskip(protostr), "//", 2) == 0)
93                         continue;
94
95                 p = protostr + strlen(protostr);
96                 for (p--; p >= protostr && my_isblank(*p); --p)
97                         *p = 0;
98                 if (p < protostr)
99                         continue;
100
101                 hdrfline = line;
102
103                 ret = b_pp_c_handler(protostr, hdrfn);
104                 if (ret < 0)
105                         break;
106         }
107
108         hdrfn = hdrfn_saved;
109
110         if (feof(fhdr))
111                 return 0;
112
113         return -1;
114 }
115
116 static int get_regparm(char *dst, size_t dlen, char *p)
117 {
118         int i, o;
119
120         if (*p != '<')
121                 return 0;
122
123         for (o = 0, i = 1; o < dlen; i++) {
124                 if (p[i] == 0)
125                         return 0;
126                 if (p[i] == '>')
127                         break;
128                 dst[o++] = p[i];
129         }
130         dst[o] = 0;
131         return i + 1;
132 }
133
134 // hmh..
135 static const char *known_type_mod[] = {
136         "const",
137         "signed",
138         "unsigned",
139         "struct",
140         "enum",
141         "CONST",
142         "volatile",
143 };
144
145 static const char *known_ptr_types[] = {
146         "FARPROC",
147         "WNDPROC",
148         "HACCEL",
149         "HANDLE",
150         "HBITMAP",
151         "HCURSOR",
152         "HDC",
153         "HFONT",
154         "HGDIOBJ",
155         "HGLOBAL",
156         "HICON",
157         "HINSTANCE",
158         //"HIMC", // DWORD
159         "HMODULE",
160         "HPALETTE",
161         "HRGN",
162         "HRSRC",
163         "HKEY",
164         "HMENU",
165         "HWAVEOUT",
166         "HWND",
167         "PBYTE",
168         "PCRITICAL_SECTION",
169         "PDWORD",
170         "PFILETIME",
171         "PHKEY",
172         "PLONG",
173         "PMEMORY_BASIC_INFORMATION",
174         "PUINT",
175         "PVOID",
176         "PCVOID",
177         "PWORD",
178         "DLGPROC",
179         "TIMERPROC",
180         "WNDENUMPROC",
181         "va_list",
182         "__VALIST",
183 };
184
185 static const char *ignored_keywords[] = {
186         "extern",
187         "WINBASEAPI",
188         "WINUSERAPI",
189         "WINGDIAPI",
190         "WINADVAPI",
191 };
192
193 // returns ptr to char after type ends
194 static int typecmp(const char *n, const char *t)
195 {
196         for (; *t != 0; n++, t++) {
197                 while (n[0] == ' ' && (n[1] == ' ' || n[1] == '*'))
198                         n++;
199                 while (t[0] == ' ' && (t[1] == ' ' || t[1] == '*'))
200                         t++;
201                 if (*n != *t)
202                         return *n - *t;
203         }
204
205         return 0;
206 }
207
208 static const char *skip_type_mod(const char *n)
209 {
210         int len;
211         int i;
212
213         for (i = 0; i < ARRAY_SIZE(known_type_mod); i++) {
214                 len = strlen(known_type_mod[i]);
215                 if (strncmp(n, known_type_mod[i], len) != 0)
216                         continue;
217                 if (!my_isblank(n[len]))
218                         continue;
219
220                 n += len;
221                 while (my_isblank(*n))
222                         n++;
223                 i = 0;
224         }
225
226         return n;
227 }
228
229 static int check_type(const char *name, struct parsed_type *type)
230 {
231         const char *n, *n1;
232         int ret = -1;
233         int i;
234
235         n = skip_type_mod(name);
236
237         for (i = 0; i < ARRAY_SIZE(known_ptr_types); i++) {
238                 if (typecmp(n, known_ptr_types[i]))
239                         continue;
240
241                 type->is_ptr = 1;
242                 break;
243         }
244
245         if (n[0] == 'L' && n[1] == 'P' && strncmp(n, "LPARAM", 6))
246                 type->is_ptr = 1;
247
248         // assume single word
249         while (!my_isblank(*n) && !my_issep(*n))
250                 n++;
251
252         while (1) {
253                 n1 = n;
254                 while (my_isblank(*n))
255                         n++;
256                 if (*n == '*') {
257                         type->is_ptr = 1;
258                         n++;
259                         continue;
260                 }
261                 break;
262         }
263
264         ret = n1 - name;
265         type->name = strndup(name, ret);
266         if (IS(type->name, "VOID"))
267                 memcpy(type->name, "void", 4);
268
269         return ret;
270 }
271
272 /* args are always expanded to 32bit */
273 static const char *map_reg(const char *reg)
274 {
275         const char *regs_f[] = { "eax", "ebx", "ecx", "edx", "esi", "edi" };
276         const char *regs_w[] = { "ax",  "bx",  "cx",  "dx",  "si",  "di" };
277         const char *regs_b[] = { "al",  "bl",  "cl",  "dl" };
278         int i;
279
280         for (i = 0; i < ARRAY_SIZE(regs_w); i++)
281                 if (IS(reg, regs_w[i]))
282                         return regs_f[i];
283
284         for (i = 0; i < ARRAY_SIZE(regs_b); i++)
285                 if (IS(reg, regs_b[i]))
286                         return regs_f[i];
287
288         return reg;
289 }
290
291 static int check_struct_arg(struct parsed_proto_arg *arg)
292 {
293         if (IS(arg->type.name, "POINT"))
294                 return 2 - 1;
295
296         return 0;
297 }
298
299 static int parse_protostr(char *protostr, struct parsed_proto *pp)
300 {
301         struct parsed_proto_arg *arg;
302         char regparm[16];
303         char buf[256];
304         char cconv[32];
305         int xarg = 0;
306         char *p, *p1;
307         int i, l;
308         int ret;
309
310         p = sskip(protostr);
311         if (p[0] == '/' && p[1] == '/') {
312                 printf("%s:%d: commented out?\n", hdrfn, hdrfline);
313                 p = sskip(p + 2);
314         }
315
316         // strip unneeded stuff
317         for (p1 = p; p1[0] != 0 && p1[1] != 0; p1++) {
318                 if ((p1[0] == '/' && p1[1] == '*')
319                  || (p1[0] == '*' && p1[1] == '/'))
320                         p1[0] = p1[1] = ' ';
321         }
322
323         if (!strncmp(p, "DECLSPEC_NORETURN ", 18)) {
324                 pp->is_noreturn = 1;
325                 p = sskip(p + 18);
326         }
327
328         for (i = 0; i < ARRAY_SIZE(ignored_keywords); i++) {
329                 l = strlen(ignored_keywords[i]);
330                 if (!strncmp(p, ignored_keywords[i], l) && my_isblank(p[l]))
331                         p = sskip(p + l + 1);
332         }
333
334         ret = check_type(p, &pp->ret_type);
335         if (ret <= 0) {
336                 printf("%s:%d:%zd: unhandled return in '%s'\n",
337                         hdrfn, hdrfline, (p - protostr) + 1, protostr);
338                 return -1;
339         }
340         p = sskip(p + ret);
341
342         if (!strncmp(p, "noreturn ", 9)) {
343                 pp->is_noreturn = 1;
344                 p = sskip(p + 9);
345         }
346
347         if (!strchr(p, ')')) {
348                 p = next_idt(buf, sizeof(buf), p);
349                 p = sskip(p);
350                 if (buf[0] == 0) {
351                         printf("%s:%d:%zd: var name missing\n",
352                                 hdrfn, hdrfline, (p - protostr) + 1);
353                         return -1;
354                 }
355                 strcpy(pp->name, buf);
356
357                 p1 = strchr(p, ']');
358                 if (p1 != NULL) {
359                         p = p1 + 1;
360                         pp->ret_type.is_array = 1;
361                 }
362                 return p - protostr;
363         }
364
365         pp->is_func = 1;
366
367         if (*p == '(') {
368                 pp->is_fptr = 1;
369                 p = sskip(p + 1);
370         }
371
372         p = next_word(cconv, sizeof(cconv), p);
373         p = sskip(p);
374         if (cconv[0] == 0) {
375                 printf("%s:%d:%zd: cconv missing\n",
376                         hdrfn, hdrfline, (p - protostr) + 1);
377                 return -1;
378         }
379         if      (IS(cconv, "__cdecl"))
380                 pp->is_stdcall = 0;
381         else if (IS(cconv, "__stdcall"))
382                 pp->is_stdcall = 1;
383         else if (IS(cconv, "__fastcall")) {
384                 pp->is_fastcall = 1;
385                 pp->is_stdcall = 1; // sort of..
386         }
387         else if (IS(cconv, "__thiscall"))
388                 pp->is_stdcall = 1;
389         else if (IS(cconv, "__userpurge"))
390                 pp->is_stdcall = 1; // IDA
391         else if (IS(cconv, "__usercall"))
392                 pp->is_stdcall = 0; // IDA
393         else if (IS(cconv, "WINAPI"))
394                 pp->is_stdcall = 1;
395         else {
396                 printf("%s:%d:%zd: unhandled cconv: '%s'\n",
397                         hdrfn, hdrfline, (p - protostr) + 1, cconv);
398                 return -1;
399         }
400
401         if (pp->is_fptr) {
402                 if (*p != '*') {
403                         printf("%s:%d:%zd: '*' expected\n",
404                                 hdrfn, hdrfline, (p - protostr) + 1);
405                         return -1;
406                 }
407                 p++;
408                 // XXX: skipping extra asterisks, for now
409                 while (*p == '*')
410                         p++;
411                 p = sskip(p);
412         }
413
414         p = next_idt(buf, sizeof(buf), p);
415         p = sskip(p);
416         if (buf[0] == 0) {
417                 //printf("%s:%d:%zd: func name missing\n",
418                 //      hdrfn, hdrfline, (p - protostr) + 1);
419                 //return -1;
420         }
421         strcpy(pp->name, buf);
422
423         ret = get_regparm(regparm, sizeof(regparm), p);
424         if (ret > 0) {
425                 if (!IS(regparm, "eax") && !IS(regparm, "ax")
426                  && !IS(regparm, "al") && !IS(regparm, "edx:eax"))
427                 {
428                         printf("%s:%d:%zd: bad regparm: %s\n",
429                                 hdrfn, hdrfline, (p - protostr) + 1, regparm);
430                         return -1;
431                 }
432                 p += ret;
433                 p = sskip(p);
434         }
435
436         if (pp->is_fptr) {
437                 if (*p == '[') {
438                         // not really ret_type is array, but ohwell
439                         pp->ret_type.is_array = 1;
440                         p = strchr(p + 1, ']');
441                         if (p == NULL) {
442                                 printf("%s:%d:%zd: ']' expected\n",
443                                  hdrfn, hdrfline, (p - protostr) + 1);
444                                 return -1;
445                         }
446                         p = sskip(p + 1);
447                 }
448                 if (*p != ')') {
449                         printf("%s:%d:%zd: ')' expected\n",
450                                 hdrfn, hdrfline, (p - protostr) + 1);
451                         return -1;
452                 }
453                 p = sskip(p + 1);
454         }
455
456         if (*p != '(') {
457                 printf("%s:%d:%zd: '(' expected, got '%c'\n",
458                                 hdrfn, hdrfline, (p - protostr) + 1, *p);
459                 return -1;
460         }
461         p++;
462
463         // check for x(void)
464         p = sskip(p);
465         if ((!strncmp(p, "void", 4) || !strncmp(p, "VOID", 4))
466            && *sskip(p + 4) == ')')
467                 p += 4;
468
469         while (1) {
470                 p = sskip(p);
471                 if (*p == ')') {
472                         p++;
473                         break;
474                 }
475                 if (xarg > 0) {
476                         if (*p != ',') {
477                                 printf("%s:%d:%zd: ',' expected\n",
478                                  hdrfn, hdrfline, (p - protostr) + 1);
479                                 return -1;
480                         }
481                         p = sskip(p + 1);
482                 }
483
484                 if (!strncmp(p, "...", 3)) {
485                         pp->is_vararg = 1;
486                         p = sskip(p + 3);
487                         if (*p == ')') {
488                                 p++;
489                                 break;
490                         }
491                         printf("%s:%d:%zd: ')' expected\n",
492                                 hdrfn, hdrfline, (p - protostr) + 1);
493                         return -1;
494                 }
495
496                 arg = &pp->arg[xarg];
497                 xarg++;
498
499                 p1 = p;
500                 ret = check_type(p, &arg->type);
501                 if (ret <= 0) {
502                         printf("%s:%d:%zd: unhandled type for arg%d\n",
503                                 hdrfn, hdrfline, (p - protostr) + 1, xarg);
504                         return -1;
505                 }
506                 p = sskip(p + ret);
507
508                 if (*p == '(') {
509                         // func ptr
510                         arg->fptr = calloc(1, sizeof(*arg->fptr));
511                         ret = parse_protostr(p1, arg->fptr);
512                         if (ret < 0) {
513                                 printf("%s:%d:%zd: funcarg parse failed\n",
514                                         hdrfn, hdrfline, p1 - protostr);
515                                 return -1;
516                         }
517                         arg->fptr->is_arg = 1;
518                         // we don't use actual names right now..
519                         snprintf(arg->fptr->name,
520                                 sizeof(arg->fptr->name), "a%d", xarg);
521                         // we'll treat it as void * for non-calls
522                         arg->type.name = strdup("void *");
523                         arg->type.is_ptr = 1;
524
525                         p = p1 + ret;
526                 }
527
528                 p = next_idt(buf, sizeof(buf), p);
529                 p = sskip(p);
530 #if 0
531                 if (buf[0] == 0) {
532                         printf("%s:%d:%zd: idt missing for arg%d\n",
533                                 hdrfn, hdrfline, (p - protostr) + 1, xarg);
534                         return -1;
535                 }
536 #endif
537                 arg->reg = NULL;
538
539                 ret = get_regparm(regparm, sizeof(regparm), p);
540                 if (ret > 0) {
541                         p += ret;
542                         p = sskip(p);
543
544                         arg->reg = strdup(map_reg(regparm));
545                 }
546
547                 if (strstr(arg->type.name, "int64")
548                     || IS(arg->type.name, "double"))
549                 {
550                         // hack..
551                         free(arg->type.name);
552                         arg->type.name = strdup("int");
553                         pp_copy_arg(&pp->arg[xarg], arg);
554                         xarg++;
555                 }
556
557                 ret = check_struct_arg(arg);
558                 if (ret > 0) {
559                         pp->has_structarg = 1;
560                         arg->type.is_struct = 1;
561                         free(arg->type.name);
562                         arg->type.name = strdup("int");
563                         for (l = 0; l < ret; l++) {
564                                 pp_copy_arg(&pp->arg[xarg], arg);
565                                 xarg++;
566                         }
567                 }
568         }
569
570         if (xarg > 0 && (IS(cconv, "__fastcall") || IS(cconv, "__thiscall"))) {
571                 if (pp->arg[0].reg != NULL) {
572                         printf("%s:%d: %s with arg1 spec %s?\n",
573                                 hdrfn, hdrfline, cconv, pp->arg[0].reg);
574                 }
575                 pp->arg[0].reg = strdup("ecx");
576         }
577
578         if (xarg > 1 && IS(cconv, "__fastcall")) {
579                 if (pp->arg[1].reg != NULL) {
580                         printf("%s:%d: %s with arg2 spec %s?\n",
581                                 hdrfn, hdrfline, cconv, pp->arg[1].reg);
582                 }
583                 pp->arg[1].reg = strdup("edx");
584         }
585
586         pp->argc = xarg;
587
588         for (i = 0; i < pp->argc; i++) {
589                 if (pp->arg[i].reg == NULL)
590                         pp->argc_stack++;
591                 else
592                         pp->argc_reg++;
593         }
594
595         if (pp->argc == 1 && pp->arg[0].reg != NULL
596             && IS(pp->arg[0].reg, "ecx"))
597         {
598                 pp->is_fastcall = 1;
599         }
600         else if (pp->argc_reg == 2
601           && pp->arg[0].reg != NULL && IS(pp->arg[0].reg, "ecx")
602           && pp->arg[1].reg != NULL && IS(pp->arg[1].reg, "edx"))
603         {
604                 pp->is_fastcall = 1;
605         }
606
607         if (pp->is_vararg && (pp->is_stdcall || pp->is_fastcall)) {
608                 printf("%s:%d: vararg %s?\n", hdrfn, hdrfline, cconv);
609                 return -1;
610         }
611
612         return p - protostr;
613 }
614
615 static int pp_name_cmp(const void *p1, const void *p2)
616 {
617         const struct parsed_proto *pp1 = p1, *pp2 = p2;
618         return strcmp(pp1->name, pp2->name);
619 }
620
621 static struct parsed_proto *pp_cache;
622 static int pp_cache_size;
623 static int pp_cache_alloc;
624
625 static int b_pp_c_handler(char *proto, const char *fname)
626 {
627         int ret;
628
629         if (pp_cache_size >= pp_cache_alloc) {
630                 pp_cache_alloc = pp_cache_alloc * 2 + 64;
631                 pp_cache = realloc(pp_cache, pp_cache_alloc
632                                 * sizeof(pp_cache[0]));
633                 my_assert_not(pp_cache, NULL);
634                 memset(pp_cache + pp_cache_size, 0,
635                         (pp_cache_alloc - pp_cache_size)
636                          * sizeof(pp_cache[0]));
637         }
638
639         ret = parse_protostr(proto, &pp_cache[pp_cache_size]);
640         if (ret < 0)
641                 return -1;
642
643         pp_cache_size++;
644         return 0;
645 }
646
647 static void build_pp_cache(FILE *fhdr)
648 {
649         long pos;
650         int ret;
651
652         pos = ftell(fhdr);
653         rewind(fhdr);
654
655         ret = do_protostrs(fhdr, hdrfn);
656         if (ret < 0)
657                 exit(1);
658
659         qsort(pp_cache, pp_cache_size, sizeof(pp_cache[0]), pp_name_cmp);
660         fseek(fhdr, pos, SEEK_SET);
661 }
662
663 static const struct parsed_proto *proto_parse(FILE *fhdr, const char *sym,
664         int quiet)
665 {
666         const struct parsed_proto *pp_ret;
667         struct parsed_proto pp_search;
668         char *p;
669
670         if (pp_cache == NULL)
671                 build_pp_cache(fhdr);
672
673         if (sym[0] == '_') // && strncmp(fname, "stdc", 4) == 0)
674                 sym++;
675
676         strcpy(pp_search.name, sym);
677         p = strchr(pp_search.name, '@');
678         if (p != NULL)
679                 *p = 0;
680
681         pp_ret = bsearch(&pp_search, pp_cache, pp_cache_size,
682                         sizeof(pp_cache[0]), pp_name_cmp);
683         if (pp_ret == NULL && !quiet)
684                 printf("%s: sym '%s' is missing\n", hdrfn, sym);
685
686         return pp_ret;
687 }
688
689 static void pp_copy_arg(struct parsed_proto_arg *d,
690         const struct parsed_proto_arg *s)
691 {
692         memcpy(d, s, sizeof(*d));
693
694         if (s->reg != NULL) {
695                 d->reg = strdup(s->reg);
696                 my_assert_not(d->reg, NULL);
697         }
698         if (s->type.name != NULL) {
699                 d->type.name = strdup(s->type.name);
700                 my_assert_not(d->type.name, NULL);
701         }
702         if (s->fptr != NULL) {
703                 d->fptr = malloc(sizeof(*d->fptr));
704                 my_assert_not(d->fptr, NULL);
705                 memcpy(d->fptr, s->fptr, sizeof(*d->fptr));
706         }
707 }
708
709 struct parsed_proto *proto_clone(const struct parsed_proto *pp_c)
710 {
711         struct parsed_proto *pp;
712         int i;
713
714         pp = malloc(sizeof(*pp));
715         my_assert_not(pp, NULL);
716         memcpy(pp, pp_c, sizeof(*pp)); // lazy..
717
718         // do the actual deep copy..
719         for (i = 0; i < pp_c->argc; i++)
720                 pp_copy_arg(&pp->arg[i], &pp_c->arg[i]);
721         if (pp_c->ret_type.name != NULL)
722                 pp->ret_type.name = strdup(pp_c->ret_type.name);
723
724         return pp;
725 }
726
727 static inline void pp_print(char *buf, size_t buf_size,
728   const struct parsed_proto *pp)
729 {
730   size_t l;
731   int i;
732
733   snprintf(buf, buf_size, "%s %s(", pp->ret_type.name, pp->name);
734   l = strlen(buf);
735
736   for (i = 0; i < pp->argc_reg; i++) {
737     snprintf(buf + l, buf_size - l, "%s%s",
738       i == 0 ? "" : ", ", pp->arg[i].reg);
739     l = strlen(buf);
740   }
741   if (pp->argc_stack > 0) {
742     snprintf(buf + l, buf_size - l, "%s{%d stack}",
743       i == 0 ? "" : ", ", pp->argc_stack);
744     l = strlen(buf);
745   }
746   snprintf(buf + l, buf_size - l, ")");
747 }
748
749 static inline void proto_release(struct parsed_proto *pp)
750 {
751         int i;
752
753         for (i = 0; i < pp->argc; i++) {
754                 if (pp->arg[i].reg != NULL)
755                         free(pp->arg[i].reg);
756                 if (pp->arg[i].type.name != NULL)
757                         free(pp->arg[i].type.name);
758                 if (pp->arg[i].fptr != NULL)
759                         free(pp->arg[i].fptr);
760         }
761         if (pp->ret_type.name != NULL)
762                 free(pp->ret_type.name);
763         free(pp);
764 }