more stuff storm needs
[ia32rtools.git] / tools / protoparse.h
1
2 struct parsed_proto;
3
4 struct parsed_type {
5         char *name;
6         unsigned int is_array:1;
7         unsigned int is_ptr:1;
8         unsigned int is_struct:1; // split for args
9 };
10
11 struct parsed_proto_arg {
12         char *reg;
13         struct parsed_type type;
14         struct parsed_proto *fptr;
15         void *datap;
16 };
17
18 struct parsed_proto {
19         char name[256];
20         union {
21                 struct parsed_type ret_type;
22                 struct parsed_type type;
23         };
24         struct parsed_proto_arg arg[16];
25         int argc;
26         int argc_stack;
27         int argc_reg;
28         unsigned int is_func:1;
29         unsigned int is_stdcall:1;
30         unsigned int is_fastcall:1;
31         unsigned int is_vararg:1;
32         unsigned int is_fptr:1;
33         unsigned int is_noreturn:1;
34         unsigned int is_unresolved:1;
35         unsigned int has_structarg:1;
36 };
37
38 static const char *hdrfn;
39 static int hdrfline = 0;
40
41 static void pp_copy_arg(struct parsed_proto_arg *d,
42         const struct parsed_proto_arg *s);
43
44 static int b_pp_c_handler(char *proto, const char *fname);
45
46 static int do_protostrs(FILE *fhdr, const char *fname)
47 {
48         const char *finc_name;
49         const char *hdrfn_saved;
50         char protostr[256];
51         char path[256];
52         char fname_inc[256];
53         FILE *finc;
54         int line = 0;
55         int ret;
56         char *p;
57
58         hdrfn_saved = hdrfn;
59         hdrfn = fname;
60
61         while (fgets(protostr, sizeof(protostr), fhdr))
62         {
63                 line++;
64                 if (strncmp(protostr, "//#include ", 11) == 0) {
65                         finc_name = protostr + 11;
66                         p = strpbrk(finc_name, "\r\n ");
67                         if (p != NULL)
68                                 *p = 0;
69
70                         path[0] = 0;
71                         p = strrchr(hdrfn_saved, '/');
72                         if (p) {
73                                 memcpy(path, hdrfn_saved,
74                                         p - hdrfn_saved + 1);
75                                 path[p - hdrfn_saved + 1] = 0;
76                         }
77                         snprintf(fname_inc, sizeof(fname_inc), "%s%s", 
78                                 path, finc_name);
79                         finc = fopen(fname_inc, "r");
80                         if (finc == NULL) {
81                                 printf("%s:%d: can't open '%s'\n",
82                                         fname_inc, line, finc_name);
83                                 continue;
84                         }
85                         ret = do_protostrs(finc, finc_name);
86                         fclose(finc);
87                         if (ret < 0)
88                                 break;
89                         continue;
90                 }
91                 if (strncmp(sskip(protostr), "//", 2) == 0)
92                         continue;
93
94                 p = protostr + strlen(protostr);
95                 for (p--; p >= protostr && my_isblank(*p); --p)
96                         *p = 0;
97                 if (p < protostr)
98                         continue;
99
100                 hdrfline = line;
101
102                 ret = b_pp_c_handler(protostr, hdrfn);
103                 if (ret < 0)
104                         break;
105         }
106
107         hdrfn = hdrfn_saved;
108
109         if (feof(fhdr))
110                 return 0;
111
112         return -1;
113 }
114
115 static int get_regparm(char *dst, size_t dlen, char *p)
116 {
117         int i, o;
118
119         if (*p != '<')
120                 return 0;
121
122         for (o = 0, i = 1; o < dlen; i++) {
123                 if (p[i] == 0)
124                         return 0;
125                 if (p[i] == '>')
126                         break;
127                 dst[o++] = p[i];
128         }
129         dst[o] = 0;
130         return i + 1;
131 }
132
133 // hmh..
134 static const char *known_type_mod[] = {
135         "const",
136         "signed",
137         "unsigned",
138         "struct",
139         "enum",
140         "CONST",
141         "volatile",
142 };
143
144 static const char *known_ptr_types[] = {
145         "FARPROC",
146         "WNDPROC",
147         "HACCEL",
148         "HANDLE",
149         "HBITMAP",
150         "HCURSOR",
151         "HDC",
152         "HFONT",
153         "HGDIOBJ",
154         "HGLOBAL",
155         "HICON",
156         "HINSTANCE",
157         //"HIMC", // DWORD
158         "HMODULE",
159         "HPALETTE",
160         "HRGN",
161         "HRSRC",
162         "HKEY",
163         "HMENU",
164         "HWND",
165         "PBYTE",
166         "PCRITICAL_SECTION",
167         "PDWORD",
168         "PFILETIME",
169         "PHKEY",
170         "PLONG",
171         "PMEMORY_BASIC_INFORMATION",
172         "PUINT",
173         "PVOID",
174         "PCVOID",
175         "PWORD",
176         "DLGPROC",
177         "TIMERPROC",
178         "WNDENUMPROC",
179         "va_list",
180         "__VALIST",
181 };
182
183 static const char *ignored_keywords[] = {
184         "extern",
185         "WINBASEAPI",
186         "WINUSERAPI",
187         "WINGDIAPI",
188         "WINADVAPI",
189 };
190
191 // returns ptr to char after type ends
192 static int typecmp(const char *n, const char *t)
193 {
194         for (; *t != 0; n++, t++) {
195                 while (n[0] == ' ' && (n[1] == ' ' || n[1] == '*'))
196                         n++;
197                 while (t[0] == ' ' && (t[1] == ' ' || t[1] == '*'))
198                         t++;
199                 if (*n != *t)
200                         return *n - *t;
201         }
202
203         return 0;
204 }
205
206 static const char *skip_type_mod(const char *n)
207 {
208         int len;
209         int i;
210
211         for (i = 0; i < ARRAY_SIZE(known_type_mod); i++) {
212                 len = strlen(known_type_mod[i]);
213                 if (strncmp(n, known_type_mod[i], len) != 0)
214                         continue;
215                 if (!my_isblank(n[len]))
216                         continue;
217
218                 n += len;
219                 while (my_isblank(*n))
220                         n++;
221                 i = 0;
222         }
223
224         return n;
225 }
226
227 static int check_type(const char *name, struct parsed_type *type)
228 {
229         const char *n, *n1;
230         int ret = -1;
231         int i;
232
233         n = skip_type_mod(name);
234
235         for (i = 0; i < ARRAY_SIZE(known_ptr_types); i++) {
236                 if (typecmp(n, known_ptr_types[i]))
237                         continue;
238
239                 type->is_ptr = 1;
240                 break;
241         }
242
243         if (n[0] == 'L' && n[1] == 'P' && strncmp(n, "LPARAM", 6))
244                 type->is_ptr = 1;
245
246         // assume single word
247         while (!my_isblank(*n) && !my_issep(*n))
248                 n++;
249
250         while (1) {
251                 n1 = n;
252                 while (my_isblank(*n))
253                         n++;
254                 if (*n == '*') {
255                         type->is_ptr = 1;
256                         n++;
257                         continue;
258                 }
259                 break;
260         }
261
262         ret = n1 - name;
263         type->name = strndup(name, ret);
264         if (IS(type->name, "VOID"))
265                 memcpy(type->name, "void", 4);
266
267         return ret;
268 }
269
270 /* args are always expanded to 32bit */
271 static const char *map_reg(const char *reg)
272 {
273         const char *regs_f[] = { "eax", "ebx", "ecx", "edx", "esi", "edi" };
274         const char *regs_w[] = { "ax",  "bx",  "cx",  "dx",  "si",  "di" };
275         const char *regs_b[] = { "al",  "bl",  "cl",  "dl" };
276         int i;
277
278         for (i = 0; i < ARRAY_SIZE(regs_w); i++)
279                 if (IS(reg, regs_w[i]))
280                         return regs_f[i];
281
282         for (i = 0; i < ARRAY_SIZE(regs_b); i++)
283                 if (IS(reg, regs_b[i]))
284                         return regs_f[i];
285
286         return reg;
287 }
288
289 static int check_struct_arg(struct parsed_proto_arg *arg)
290 {
291         if (IS(arg->type.name, "POINT"))
292                 return 2 - 1;
293
294         return 0;
295 }
296
297 static int parse_protostr(char *protostr, struct parsed_proto *pp)
298 {
299         struct parsed_proto_arg *arg;
300         char regparm[16];
301         char buf[256];
302         char cconv[32];
303         int xarg = 0;
304         char *p, *p1;
305         int i, l;
306         int ret;
307
308         p = sskip(protostr);
309         if (p[0] == '/' && p[1] == '/') {
310                 printf("%s:%d: commented out?\n", hdrfn, hdrfline);
311                 p = sskip(p + 2);
312         }
313
314         // strip unneeded stuff
315         for (p1 = p; p1[0] != 0 && p1[1] != 0; p1++) {
316                 if ((p1[0] == '/' && p1[1] == '*')
317                  || (p1[0] == '*' && p1[1] == '/'))
318                         p1[0] = p1[1] = ' ';
319         }
320
321         if (!strncmp(p, "DECLSPEC_NORETURN ", 18)) {
322                 pp->is_noreturn = 1;
323                 p = sskip(p + 18);
324         }
325
326         for (i = 0; i < ARRAY_SIZE(ignored_keywords); i++) {
327                 l = strlen(ignored_keywords[i]);
328                 if (!strncmp(p, ignored_keywords[i], l) && my_isblank(p[l]))
329                         p = sskip(p + l + 1);
330         }
331
332         ret = check_type(p, &pp->ret_type);
333         if (ret <= 0) {
334                 printf("%s:%d:%zd: unhandled return in '%s'\n",
335                         hdrfn, hdrfline, (p - protostr) + 1, protostr);
336                 return -1;
337         }
338         p = sskip(p + ret);
339
340         if (!strncmp(p, "noreturn ", 9)) {
341                 pp->is_noreturn = 1;
342                 p = sskip(p + 9);
343         }
344
345         if (!strchr(p, ')')) {
346                 p = next_idt(buf, sizeof(buf), p);
347                 p = sskip(p);
348                 if (buf[0] == 0) {
349                         printf("%s:%d:%zd: var name missing\n",
350                                 hdrfn, hdrfline, (p - protostr) + 1);
351                         return -1;
352                 }
353                 strcpy(pp->name, buf);
354
355                 p1 = strchr(p, ']');
356                 if (p1 != NULL) {
357                         p = p1 + 1;
358                         pp->ret_type.is_array = 1;
359                 }
360                 return p - protostr;
361         }
362
363         pp->is_func = 1;
364
365         if (*p == '(') {
366                 pp->is_fptr = 1;
367                 p = sskip(p + 1);
368         }
369
370         p = next_word(cconv, sizeof(cconv), p);
371         p = sskip(p);
372         if (cconv[0] == 0) {
373                 printf("%s:%d:%zd: cconv missing\n",
374                         hdrfn, hdrfline, (p - protostr) + 1);
375                 return -1;
376         }
377         if      (IS(cconv, "__cdecl"))
378                 pp->is_stdcall = 0;
379         else if (IS(cconv, "__stdcall"))
380                 pp->is_stdcall = 1;
381         else if (IS(cconv, "__fastcall")) {
382                 pp->is_fastcall = 1;
383                 pp->is_stdcall = 1; // sort of..
384         }
385         else if (IS(cconv, "__thiscall"))
386                 pp->is_stdcall = 1;
387         else if (IS(cconv, "__userpurge"))
388                 pp->is_stdcall = 1; // IDA
389         else if (IS(cconv, "__usercall"))
390                 pp->is_stdcall = 0; // IDA
391         else if (IS(cconv, "WINAPI"))
392                 pp->is_stdcall = 1;
393         else {
394                 printf("%s:%d:%zd: unhandled cconv: '%s'\n",
395                         hdrfn, hdrfline, (p - protostr) + 1, cconv);
396                 return -1;
397         }
398
399         if (pp->is_fptr) {
400                 if (*p != '*') {
401                         printf("%s:%d:%zd: '*' expected\n",
402                                 hdrfn, hdrfline, (p - protostr) + 1);
403                         return -1;
404                 }
405                 p++;
406                 // XXX: skipping extra asterisks, for now
407                 while (*p == '*')
408                         p++;
409                 p = sskip(p);
410         }
411
412         p = next_idt(buf, sizeof(buf), p);
413         p = sskip(p);
414         if (buf[0] == 0) {
415                 //printf("%s:%d:%zd: func name missing\n",
416                 //      hdrfn, hdrfline, (p - protostr) + 1);
417                 //return -1;
418         }
419         strcpy(pp->name, buf);
420
421         ret = get_regparm(regparm, sizeof(regparm), p);
422         if (ret > 0) {
423                 if (!IS(regparm, "eax") && !IS(regparm, "ax")
424                  && !IS(regparm, "al") && !IS(regparm, "edx:eax"))
425                 {
426                         printf("%s:%d:%zd: bad regparm: %s\n",
427                                 hdrfn, hdrfline, (p - protostr) + 1, regparm);
428                         return -1;
429                 }
430                 p += ret;
431                 p = sskip(p);
432         }
433
434         if (pp->is_fptr) {
435                 if (*p == '[') {
436                         // not really ret_type is array, but ohwell
437                         pp->ret_type.is_array = 1;
438                         p = strchr(p + 1, ']');
439                         if (p == NULL) {
440                                 printf("%s:%d:%zd: ']' expected\n",
441                                  hdrfn, hdrfline, (p - protostr) + 1);
442                                 return -1;
443                         }
444                         p = sskip(p + 1);
445                 }
446                 if (*p != ')') {
447                         printf("%s:%d:%zd: ')' expected\n",
448                                 hdrfn, hdrfline, (p - protostr) + 1);
449                         return -1;
450                 }
451                 p = sskip(p + 1);
452         }
453
454         if (*p != '(') {
455                 printf("%s:%d:%zd: '(' expected, got '%c'\n",
456                                 hdrfn, hdrfline, (p - protostr) + 1, *p);
457                 return -1;
458         }
459         p++;
460
461         // check for x(void)
462         p = sskip(p);
463         if ((!strncmp(p, "void", 4) || !strncmp(p, "VOID", 4))
464            && *sskip(p + 4) == ')')
465                 p += 4;
466
467         while (1) {
468                 p = sskip(p);
469                 if (*p == ')') {
470                         p++;
471                         break;
472                 }
473                 if (xarg > 0) {
474                         if (*p != ',') {
475                                 printf("%s:%d:%zd: ',' expected\n",
476                                  hdrfn, hdrfline, (p - protostr) + 1);
477                                 return -1;
478                         }
479                         p = sskip(p + 1);
480                 }
481
482                 if (!strncmp(p, "...", 3)) {
483                         pp->is_vararg = 1;
484                         p = sskip(p + 3);
485                         if (*p == ')') {
486                                 p++;
487                                 break;
488                         }
489                         printf("%s:%d:%zd: ')' expected\n",
490                                 hdrfn, hdrfline, (p - protostr) + 1);
491                         return -1;
492                 }
493
494                 arg = &pp->arg[xarg];
495                 xarg++;
496
497                 p1 = p;
498                 ret = check_type(p, &arg->type);
499                 if (ret <= 0) {
500                         printf("%s:%d:%zd: unhandled type for arg%d\n",
501                                 hdrfn, hdrfline, (p - protostr) + 1, xarg);
502                         return -1;
503                 }
504                 p = sskip(p + ret);
505
506                 if (*p == '(') {
507                         // func ptr
508                         arg->fptr = calloc(1, sizeof(*arg->fptr));
509                         ret = parse_protostr(p1, arg->fptr);
510                         if (ret < 0) {
511                                 printf("%s:%d:%zd: funcarg parse failed\n",
512                                         hdrfn, hdrfline, p1 - protostr);
513                                 return -1;
514                         }
515                         // we'll treat it as void * for non-calls
516                         arg->type.name = strdup("void *");
517                         arg->type.is_ptr = 1;
518
519                         p = p1 + ret;
520                 }
521
522                 p = next_idt(buf, sizeof(buf), p);
523                 p = sskip(p);
524 #if 0
525                 if (buf[0] == 0) {
526                         printf("%s:%d:%zd: idt missing for arg%d\n",
527                                 hdrfn, hdrfline, (p - protostr) + 1, xarg);
528                         return -1;
529                 }
530 #endif
531                 arg->reg = NULL;
532
533                 ret = get_regparm(regparm, sizeof(regparm), p);
534                 if (ret > 0) {
535                         p += ret;
536                         p = sskip(p);
537
538                         arg->reg = strdup(map_reg(regparm));
539                 }
540
541                 if (strstr(arg->type.name, "int64")
542                     || IS(arg->type.name, "double"))
543                 {
544                         // hack..
545                         free(arg->type.name);
546                         arg->type.name = strdup("int");
547                         pp_copy_arg(&pp->arg[xarg], arg);
548                         xarg++;
549                 }
550
551                 ret = check_struct_arg(arg);
552                 if (ret > 0) {
553                         pp->has_structarg = 1;
554                         arg->type.is_struct = 1;
555                         free(arg->type.name);
556                         arg->type.name = strdup("int");
557                         for (l = 0; l < ret; l++) {
558                                 pp_copy_arg(&pp->arg[xarg], arg);
559                                 xarg++;
560                         }
561                 }
562         }
563
564         if (xarg > 0 && (IS(cconv, "__fastcall") || IS(cconv, "__thiscall"))) {
565                 if (pp->arg[0].reg != NULL) {
566                         printf("%s:%d: %s with arg1 spec %s?\n",
567                                 hdrfn, hdrfline, cconv, pp->arg[0].reg);
568                 }
569                 pp->arg[0].reg = strdup("ecx");
570         }
571
572         if (xarg > 1 && IS(cconv, "__fastcall")) {
573                 if (pp->arg[1].reg != NULL) {
574                         printf("%s:%d: %s with arg2 spec %s?\n",
575                                 hdrfn, hdrfline, cconv, pp->arg[1].reg);
576                 }
577                 pp->arg[1].reg = strdup("edx");
578         }
579
580         pp->argc = xarg;
581
582         for (i = 0; i < pp->argc; i++) {
583                 if (pp->arg[i].reg == NULL)
584                         pp->argc_stack++;
585                 else
586                         pp->argc_reg++;
587         }
588
589         if (pp->argc == 1 && pp->arg[0].reg != NULL
590             && IS(pp->arg[0].reg, "ecx"))
591         {
592                 pp->is_fastcall = 1;
593         }
594         else if (pp->argc_reg == 2
595           && pp->arg[0].reg != NULL && IS(pp->arg[0].reg, "ecx")
596           && pp->arg[1].reg != NULL && IS(pp->arg[1].reg, "edx"))
597         {
598                 pp->is_fastcall = 1;
599         }
600
601         if (pp->is_vararg && (pp->is_stdcall || pp->is_fastcall)) {
602                 printf("%s:%d: vararg %s?\n", hdrfn, hdrfline, cconv);
603                 return -1;
604         }
605
606         return p - protostr;
607 }
608
609 static int pp_name_cmp(const void *p1, const void *p2)
610 {
611         const struct parsed_proto *pp1 = p1, *pp2 = p2;
612         return strcmp(pp1->name, pp2->name);
613 }
614
615 static struct parsed_proto *pp_cache;
616 static int pp_cache_size;
617 static int pp_cache_alloc;
618
619 static int b_pp_c_handler(char *proto, const char *fname)
620 {
621         int ret;
622
623         if (pp_cache_size >= pp_cache_alloc) {
624                 pp_cache_alloc = pp_cache_alloc * 2 + 64;
625                 pp_cache = realloc(pp_cache, pp_cache_alloc
626                                 * sizeof(pp_cache[0]));
627                 my_assert_not(pp_cache, NULL);
628                 memset(pp_cache + pp_cache_size, 0,
629                         (pp_cache_alloc - pp_cache_size)
630                          * sizeof(pp_cache[0]));
631         }
632
633         ret = parse_protostr(proto, &pp_cache[pp_cache_size]);
634         if (ret < 0)
635                 return -1;
636
637         pp_cache_size++;
638         return 0;
639 }
640
641 static void build_pp_cache(FILE *fhdr)
642 {
643         long pos;
644         int ret;
645
646         pos = ftell(fhdr);
647         rewind(fhdr);
648
649         ret = do_protostrs(fhdr, hdrfn);
650         if (ret < 0)
651                 exit(1);
652
653         qsort(pp_cache, pp_cache_size, sizeof(pp_cache[0]), pp_name_cmp);
654         fseek(fhdr, pos, SEEK_SET);
655 }
656
657 static const struct parsed_proto *proto_parse(FILE *fhdr, const char *sym,
658         int quiet)
659 {
660         const struct parsed_proto *pp_ret;
661         struct parsed_proto pp_search;
662
663         if (pp_cache == NULL)
664                 build_pp_cache(fhdr);
665
666         if (sym[0] == '_') // && strncmp(fname, "stdc", 4) == 0)
667                 sym++;
668
669         strcpy(pp_search.name, sym);
670         pp_ret = bsearch(&pp_search, pp_cache, pp_cache_size,
671                         sizeof(pp_cache[0]), pp_name_cmp);
672         if (pp_ret == NULL && !quiet)
673                 printf("%s: sym '%s' is missing\n", hdrfn, sym);
674
675         return pp_ret;
676 }
677
678 static void pp_copy_arg(struct parsed_proto_arg *d,
679         const struct parsed_proto_arg *s)
680 {
681         memcpy(d, s, sizeof(*d));
682
683         if (s->reg != NULL) {
684                 d->reg = strdup(s->reg);
685                 my_assert_not(d->reg, NULL);
686         }
687         if (s->type.name != NULL) {
688                 d->type.name = strdup(s->type.name);
689                 my_assert_not(d->type.name, NULL);
690         }
691         if (s->fptr != NULL) {
692                 d->fptr = malloc(sizeof(*d->fptr));
693                 my_assert_not(d->fptr, NULL);
694                 memcpy(d->fptr, s->fptr, sizeof(*d->fptr));
695         }
696 }
697
698 struct parsed_proto *proto_clone(const struct parsed_proto *pp_c)
699 {
700         struct parsed_proto *pp;
701         int i;
702
703         pp = malloc(sizeof(*pp));
704         my_assert_not(pp, NULL);
705         memcpy(pp, pp_c, sizeof(*pp)); // lazy..
706
707         // do the actual deep copy..
708         for (i = 0; i < pp_c->argc; i++)
709                 pp_copy_arg(&pp->arg[i], &pp_c->arg[i]);
710         if (pp_c->ret_type.name != NULL)
711                 pp->ret_type.name = strdup(pp_c->ret_type.name);
712
713         return pp;
714 }
715
716 static inline void pp_print(char *buf, size_t buf_size,
717   const struct parsed_proto *pp)
718 {
719   size_t l;
720   int i;
721
722   snprintf(buf, buf_size, "%s %s(", pp->ret_type.name, pp->name);
723   l = strlen(buf);
724
725   for (i = 0; i < pp->argc_reg; i++) {
726     snprintf(buf + l, buf_size - l, "%s%s",
727       i == 0 ? "" : ", ", pp->arg[i].reg);
728     l = strlen(buf);
729   }
730   if (pp->argc_stack > 0) {
731     snprintf(buf + l, buf_size - l, "%s{%d stack}",
732       i == 0 ? "" : ", ", pp->argc_stack);
733     l = strlen(buf);
734   }
735   snprintf(buf + l, buf_size - l, ")");
736 }
737
738 static inline void proto_release(struct parsed_proto *pp)
739 {
740         int i;
741
742         for (i = 0; i < pp->argc; i++) {
743                 if (pp->arg[i].reg != NULL)
744                         free(pp->arg[i].reg);
745                 if (pp->arg[i].type.name != NULL)
746                         free(pp->arg[i].type.name);
747                 if (pp->arg[i].fptr != NULL)
748                         free(pp->arg[i].fptr);
749         }
750         if (pp->ret_type.name != NULL)
751                 free(pp->ret_type.name);
752         free(pp);
753 }