more work on cvt_data, works mostly
[ia32rtools.git] / tools / protoparse.h
1
2 struct parsed_proto;
3
4 struct parsed_type {
5         char *name;
6         unsigned int is_array:1;
7         unsigned int is_ptr:1;
8         unsigned int is_struct:1; // split for args
9 };
10
11 struct parsed_proto_arg {
12         char *reg;
13         struct parsed_type type;
14         struct parsed_proto *fptr;
15         void *datap;
16 };
17
18 struct parsed_proto {
19         char name[256];
20         union {
21                 struct parsed_type ret_type;
22                 struct parsed_type type;
23         };
24         struct parsed_proto_arg arg[16];
25         int argc;
26         int argc_stack;
27         int argc_reg;
28         unsigned int is_func:1;
29         unsigned int is_stdcall:1;
30         unsigned int is_vararg:1;
31         unsigned int is_fptr:1;
32         unsigned int is_noreturn:1;
33         unsigned int has_structarg:1;
34 };
35
36 static const char *hdrfn;
37 static int hdrfline = 0;
38
39 static void pp_copy_arg(struct parsed_proto_arg *d,
40         const struct parsed_proto_arg *s);
41
42 static int b_pp_c_handler(char *proto, const char *fname);
43
44 static int do_protostrs(FILE *fhdr, const char *fname)
45 {
46         const char *finc_name;
47         const char *hdrfn_saved;
48         char protostr[256];
49         char path[256];
50         char fname_inc[256];
51         FILE *finc;
52         int line = 0;
53         int ret;
54         char *p;
55
56         hdrfn_saved = hdrfn;
57         hdrfn = fname;
58
59         while (fgets(protostr, sizeof(protostr), fhdr))
60         {
61                 line++;
62                 if (strncmp(protostr, "//#include ", 11) == 0) {
63                         finc_name = protostr + 11;
64                         p = strpbrk(finc_name, "\r\n ");
65                         if (p != NULL)
66                                 *p = 0;
67
68                         path[0] = 0;
69                         p = strrchr(hdrfn_saved, '/');
70                         if (p) {
71                                 memcpy(path, hdrfn_saved,
72                                         p - hdrfn_saved + 1);
73                                 path[p - hdrfn_saved + 1] = 0;
74                         }
75                         snprintf(fname_inc, sizeof(fname_inc), "%s%s", 
76                                 path, finc_name);
77                         finc = fopen(fname_inc, "r");
78                         if (finc == NULL) {
79                                 printf("%s:%d: can't open '%s'\n",
80                                         fname_inc, line, finc_name);
81                                 continue;
82                         }
83                         ret = do_protostrs(finc, finc_name);
84                         fclose(finc);
85                         if (ret < 0)
86                                 break;
87                         continue;
88                 }
89                 if (strncmp(sskip(protostr), "//", 2) == 0)
90                         continue;
91
92                 p = protostr + strlen(protostr);
93                 for (p--; p >= protostr && my_isblank(*p); --p)
94                         *p = 0;
95                 if (p < protostr)
96                         continue;
97
98                 hdrfline = line;
99
100                 ret = b_pp_c_handler(protostr, hdrfn);
101                 if (ret < 0)
102                         break;
103         }
104
105         hdrfn = hdrfn_saved;
106
107         if (feof(fhdr))
108                 return 0;
109
110         return -1;
111 }
112
113 static int get_regparm(char *dst, size_t dlen, char *p)
114 {
115         int i, o;
116
117         if (*p != '<')
118                 return 0;
119
120         for (o = 0, i = 1; o < dlen; i++) {
121                 if (p[i] == 0)
122                         return 0;
123                 if (p[i] == '>')
124                         break;
125                 dst[o++] = p[i];
126         }
127         dst[o] = 0;
128         return i + 1;
129 }
130
131 // hmh..
132 static const char *known_type_mod[] = {
133         "const",
134         "signed",
135         "unsigned",
136         "struct",
137         "enum",
138         "CONST",
139 };
140
141 static const char *known_ptr_types[] = {
142         "HACCEL",
143         "HANDLE",
144         "HBITMAP",
145         "HCURSOR",
146         "HDC",
147         "HFONT",
148         "HGDIOBJ",
149         "HGLOBAL",
150         "HINSTANCE",
151         "HIMC",
152         "HMODULE",
153         "HRGN",
154         "HRSRC",
155         "HKEY",
156         "HMENU",
157         "HWND",
158         "PCRITICAL_SECTION",
159         "PDWORD",
160         "PHKEY",
161         "PLONG",
162         "PMEMORY_BASIC_INFORMATION",
163         "PUINT",
164         "PVOID",
165         "PCVOID",
166         "DLGPROC",
167         "TIMERPROC",
168         "WNDENUMPROC",
169         "va_list",
170         "__VALIST",
171 };
172
173 static const char *ignored_keywords[] = {
174         "extern",
175         "WINBASEAPI",
176         "WINUSERAPI",
177         "WINGDIAPI",
178         "WINADVAPI",
179 };
180
181 // returns ptr to char after type ends
182 static int typecmp(const char *n, const char *t)
183 {
184         for (; *t != 0; n++, t++) {
185                 while (n[0] == ' ' && (n[1] == ' ' || n[1] == '*'))
186                         n++;
187                 while (t[0] == ' ' && (t[1] == ' ' || t[1] == '*'))
188                         t++;
189                 if (*n != *t)
190                         return *n - *t;
191         }
192
193         return 0;
194 }
195
196 static const char *skip_type_mod(const char *n)
197 {
198         int len;
199         int i;
200
201         for (i = 0; i < ARRAY_SIZE(known_type_mod); i++) {
202                 len = strlen(known_type_mod[i]);
203                 if (strncmp(n, known_type_mod[i], len) != 0)
204                         continue;
205                 if (!my_isblank(n[len]))
206                         continue;
207
208                 n += len;
209                 while (my_isblank(*n))
210                         n++;
211                 i = 0;
212         }
213
214         return n;
215 }
216
217 static int check_type(const char *name, struct parsed_type *type)
218 {
219         const char *n, *n1;
220         int ret = -1;
221         int i;
222
223         n = skip_type_mod(name);
224
225         for (i = 0; i < ARRAY_SIZE(known_ptr_types); i++) {
226                 if (typecmp(n, known_ptr_types[i]))
227                         continue;
228
229                 type->is_ptr = 1;
230                 break;
231         }
232
233         if (n[0] == 'L' && n[1] == 'P' && strncmp(n, "LPARAM", 6))
234                 type->is_ptr = 1;
235
236         // assume single word
237         while (!my_isblank(*n) && !my_issep(*n))
238                 n++;
239
240         while (1) {
241                 n1 = n;
242                 while (my_isblank(*n))
243                         n++;
244                 if (*n == '*') {
245                         type->is_ptr = 1;
246                         n++;
247                         continue;
248                 }
249                 break;
250         }
251
252         ret = n1 - name;
253         type->name = strndup(name, ret);
254         if (IS(type->name, "VOID"))
255                 memcpy(type->name, "void", 4);
256
257         return ret;
258 }
259
260 /* args are always expanded to 32bit */
261 static const char *map_reg(const char *reg)
262 {
263         const char *regs_f[] = { "eax", "ebx", "ecx", "edx", "esi", "edi" };
264         const char *regs_w[] = { "ax",  "bx",  "cx",  "dx",  "si",  "di" };
265         const char *regs_b[] = { "al",  "bl",  "cl",  "dl" };
266         int i;
267
268         for (i = 0; i < ARRAY_SIZE(regs_w); i++)
269                 if (IS(reg, regs_w[i]))
270                         return regs_f[i];
271
272         for (i = 0; i < ARRAY_SIZE(regs_b); i++)
273                 if (IS(reg, regs_b[i]))
274                         return regs_f[i];
275
276         return reg;
277 }
278
279 static int check_struct_arg(struct parsed_proto_arg *arg)
280 {
281         if (IS(arg->type.name, "POINT"))
282                 return 2 - 1;
283
284         return 0;
285 }
286
287 static int parse_protostr(char *protostr, struct parsed_proto *pp)
288 {
289         struct parsed_proto_arg *arg;
290         char regparm[16];
291         char buf[256];
292         char cconv[32];
293         int xarg = 0;
294         char *p, *p1;
295         int i, l;
296         int ret;
297
298         p = sskip(protostr);
299         if (p[0] == '/' && p[1] == '/') {
300                 printf("%s:%d: commented out?\n", hdrfn, hdrfline);
301                 p = sskip(p + 2);
302         }
303
304         // strip unneeded stuff
305         for (p1 = p; p1[0] != 0 && p1[1] != 0; p1++) {
306                 if ((p1[0] == '/' && p1[1] == '*')
307                  || (p1[0] == '*' && p1[1] == '/'))
308                         p1[0] = p1[1] = ' ';
309         }
310
311         if (!strncmp(p, "DECLSPEC_NORETURN ", 18)) {
312                 pp->is_noreturn = 1;
313                 p = sskip(p + 18);
314         }
315
316         for (i = 0; i < ARRAY_SIZE(ignored_keywords); i++) {
317                 l = strlen(ignored_keywords[i]);
318                 if (!strncmp(p, ignored_keywords[i], l) && my_isblank(p[l]))
319                         p = sskip(p + l + 1);
320         }
321
322         ret = check_type(p, &pp->ret_type);
323         if (ret <= 0) {
324                 printf("%s:%d:%zd: unhandled return in '%s'\n",
325                         hdrfn, hdrfline, (p - protostr) + 1, protostr);
326                 return -1;
327         }
328         p = sskip(p + ret);
329
330         if (!strchr(p, ')')) {
331                 p = next_idt(buf, sizeof(buf), p);
332                 p = sskip(p);
333                 if (buf[0] == 0) {
334                         printf("%s:%d:%zd: var name missing\n",
335                                 hdrfn, hdrfline, (p - protostr) + 1);
336                         return -1;
337                 }
338                 strcpy(pp->name, buf);
339
340                 p1 = strchr(p, ']');
341                 if (p1 != NULL) {
342                         p = p1 + 1;
343                         pp->ret_type.is_array = 1;
344                 }
345                 return p - protostr;
346         }
347
348         pp->is_func = 1;
349
350         if (*p == '(') {
351                 pp->is_fptr = 1;
352                 p = sskip(p + 1);
353         }
354
355         p = next_word(cconv, sizeof(cconv), p);
356         p = sskip(p);
357         if (cconv[0] == 0) {
358                 printf("%s:%d:%zd: cconv missing\n",
359                         hdrfn, hdrfline, (p - protostr) + 1);
360                 return -1;
361         }
362         if      (IS(cconv, "__cdecl"))
363                 pp->is_stdcall = 0;
364         else if (IS(cconv, "__stdcall"))
365                 pp->is_stdcall = 1;
366         else if (IS(cconv, "__fastcall"))
367                 pp->is_stdcall = 1;
368         else if (IS(cconv, "__thiscall"))
369                 pp->is_stdcall = 1;
370         else if (IS(cconv, "__userpurge"))
371                 pp->is_stdcall = 1; // IDA
372         else if (IS(cconv, "__usercall"))
373                 pp->is_stdcall = 0; // IDA
374         else if (IS(cconv, "WINAPI"))
375                 pp->is_stdcall = 1;
376         else {
377                 printf("%s:%d:%zd: unhandled cconv: '%s'\n",
378                         hdrfn, hdrfline, (p - protostr) + 1, cconv);
379                 return -1;
380         }
381
382         if (pp->is_fptr) {
383                 if (*p != '*') {
384                         printf("%s:%d:%zd: '*' expected\n",
385                                 hdrfn, hdrfline, (p - protostr) + 1);
386                         return -1;
387                 }
388                 p++;
389                 // XXX: skipping extra asterisks, for now
390                 while (*p == '*')
391                         p++;
392                 p = sskip(p);
393         }
394
395         p = next_idt(buf, sizeof(buf), p);
396         p = sskip(p);
397         if (buf[0] == 0) {
398                 //printf("%s:%d:%zd: func name missing\n",
399                 //      hdrfn, hdrfline, (p - protostr) + 1);
400                 //return -1;
401         }
402         strcpy(pp->name, buf);
403
404         ret = get_regparm(regparm, sizeof(regparm), p);
405         if (ret > 0) {
406                 if (!IS(regparm, "eax") && !IS(regparm, "ax")
407                  && !IS(regparm, "al") && !IS(regparm, "edx:eax"))
408                 {
409                         printf("%s:%d:%zd: bad regparm: %s\n",
410                                 hdrfn, hdrfline, (p - protostr) + 1, regparm);
411                         return -1;
412                 }
413                 p += ret;
414                 p = sskip(p);
415         }
416
417         if (pp->is_fptr) {
418                 if (*p == '[') {
419                         // not really ret_type is array, but ohwell
420                         pp->ret_type.is_array = 1;
421                         p = strchr(p + 1, ']');
422                         if (p == NULL) {
423                                 printf("%s:%d:%zd: ']' expected\n",
424                                  hdrfn, hdrfline, (p - protostr) + 1);
425                                 return -1;
426                         }
427                         p = sskip(p + 1);
428                 }
429                 if (*p != ')') {
430                         printf("%s:%d:%zd: ')' expected\n",
431                                 hdrfn, hdrfline, (p - protostr) + 1);
432                         return -1;
433                 }
434                 p = sskip(p + 1);
435         }
436
437         if (*p != '(') {
438                 printf("%s:%d:%zd: '(' expected, got '%c'\n",
439                                 hdrfn, hdrfline, (p - protostr) + 1, *p);
440                 return -1;
441         }
442         p++;
443
444         // check for x(void)
445         p = sskip(p);
446         if ((!strncmp(p, "void", 4) || !strncmp(p, "VOID", 4))
447            && *sskip(p + 4) == ')')
448                 p += 4;
449
450         while (1) {
451                 p = sskip(p);
452                 if (*p == ')') {
453                         p++;
454                         break;
455                 }
456                 if (xarg > 0) {
457                         if (*p != ',') {
458                                 printf("%s:%d:%zd: ',' expected\n",
459                                  hdrfn, hdrfline, (p - protostr) + 1);
460                                 return -1;
461                         }
462                         p = sskip(p + 1);
463                 }
464
465                 if (!strncmp(p, "...", 3)) {
466                         pp->is_vararg = 1;
467                         p = sskip(p + 3);
468                         if (*p == ')') {
469                                 p++;
470                                 break;
471                         }
472                         printf("%s:%d:%zd: ')' expected\n",
473                                 hdrfn, hdrfline, (p - protostr) + 1);
474                         return -1;
475                 }
476
477                 arg = &pp->arg[xarg];
478                 xarg++;
479
480                 p1 = p;
481                 ret = check_type(p, &arg->type);
482                 if (ret <= 0) {
483                         printf("%s:%d:%zd: unhandled type for arg%d\n",
484                                 hdrfn, hdrfline, (p - protostr) + 1, xarg);
485                         return -1;
486                 }
487                 p = sskip(p + ret);
488
489                 if (*p == '(') {
490                         // func ptr
491                         arg->fptr = calloc(1, sizeof(*arg->fptr));
492                         ret = parse_protostr(p1, arg->fptr);
493                         if (ret < 0) {
494                                 printf("%s:%d:%zd: funcarg parse failed\n",
495                                         hdrfn, hdrfline, p1 - protostr);
496                                 return -1;
497                         }
498                         // we'll treat it as void * for non-calls
499                         arg->type.name = strdup("void *");
500                         arg->type.is_ptr = 1;
501
502                         p = p1 + ret;
503                 }
504
505                 p = next_idt(buf, sizeof(buf), p);
506                 p = sskip(p);
507 #if 0
508                 if (buf[0] == 0) {
509                         printf("%s:%d:%zd: idt missing for arg%d\n",
510                                 hdrfn, hdrfline, (p - protostr) + 1, xarg);
511                         return -1;
512                 }
513 #endif
514                 arg->reg = NULL;
515
516                 ret = get_regparm(regparm, sizeof(regparm), p);
517                 if (ret > 0) {
518                         p += ret;
519                         p = sskip(p);
520
521                         arg->reg = strdup(map_reg(regparm));
522                 }
523
524                 ret = check_struct_arg(arg);
525                 if (ret > 0) {
526                         pp->has_structarg = 1;
527                         arg->type.is_struct = 1;
528                         free(arg->type.name);
529                         arg->type.name = strdup("int");
530                         for (l = 0; l < ret; l++) {
531                                 pp_copy_arg(&pp->arg[xarg], arg);
532                                 xarg++;
533                         }
534                 }
535         }
536
537         if (xarg > 0 && (IS(cconv, "__fastcall") || IS(cconv, "__thiscall"))) {
538                 if (pp->arg[0].reg != NULL) {
539                         printf("%s:%d: %s with arg1 spec %s?\n",
540                                 hdrfn, hdrfline, cconv, pp->arg[0].reg);
541                 }
542                 pp->arg[0].reg = strdup("ecx");
543         }
544
545         if (xarg > 1 && IS(cconv, "__fastcall")) {
546                 if (pp->arg[1].reg != NULL) {
547                         printf("%s:%d: %s with arg2 spec %s?\n",
548                                 hdrfn, hdrfline, cconv, pp->arg[1].reg);
549                 }
550                 pp->arg[1].reg = strdup("edx");
551         }
552
553         if (pp->is_vararg && pp->is_stdcall) {
554                 printf("%s:%d: vararg stdcall?\n", hdrfn, hdrfline);
555                 return -1;
556         }
557
558         pp->argc = xarg;
559
560         for (i = 0; i < pp->argc; i++) {
561                 if (pp->arg[i].reg == NULL)
562                         pp->argc_stack++;
563                 else
564                         pp->argc_reg++;
565         }
566
567         return p - protostr;
568 }
569
570 static int pp_name_cmp(const void *p1, const void *p2)
571 {
572         const struct parsed_proto *pp1 = p1, *pp2 = p2;
573         return strcmp(pp1->name, pp2->name);
574 }
575
576 static struct parsed_proto *pp_cache;
577 static int pp_cache_size;
578 static int pp_cache_alloc;
579
580 static int b_pp_c_handler(char *proto, const char *fname)
581 {
582         int ret;
583
584         if (pp_cache_size >= pp_cache_alloc) {
585                 pp_cache_alloc = pp_cache_alloc * 2 + 64;
586                 pp_cache = realloc(pp_cache, pp_cache_alloc
587                                 * sizeof(pp_cache[0]));
588                 my_assert_not(pp_cache, NULL);
589                 memset(pp_cache + pp_cache_size, 0,
590                         (pp_cache_alloc - pp_cache_size)
591                          * sizeof(pp_cache[0]));
592         }
593
594         ret = parse_protostr(proto, &pp_cache[pp_cache_size]);
595         if (ret < 0)
596                 return -1;
597
598         pp_cache_size++;
599         return 0;
600 }
601
602 static void build_pp_cache(FILE *fhdr)
603 {
604         int ret;
605
606         rewind(fhdr);
607
608         ret = do_protostrs(fhdr, hdrfn);
609         if (ret < 0)
610                 exit(1);
611
612         qsort(pp_cache, pp_cache_size, sizeof(pp_cache[0]), pp_name_cmp);
613 }
614
615 static const struct parsed_proto *proto_parse(FILE *fhdr, const char *sym,
616         int quiet)
617 {
618         const struct parsed_proto *pp_ret;
619         struct parsed_proto pp_search;
620
621         if (pp_cache == NULL)
622                 build_pp_cache(fhdr);
623
624         if (sym[0] == '_') // && strncmp(fname, "stdc", 4) == 0)
625                 sym++;
626
627         strcpy(pp_search.name, sym);
628         pp_ret = bsearch(&pp_search, pp_cache, pp_cache_size,
629                         sizeof(pp_cache[0]), pp_name_cmp);
630         if (pp_ret == NULL && !quiet)
631                 printf("%s: sym '%s' is missing\n", hdrfn, sym);
632
633         return pp_ret;
634 }
635
636 static void pp_copy_arg(struct parsed_proto_arg *d,
637         const struct parsed_proto_arg *s)
638 {
639         memcpy(d, s, sizeof(*d));
640
641         if (s->reg != NULL) {
642                 d->reg = strdup(s->reg);
643                 my_assert_not(d->reg, NULL);
644         }
645         if (s->type.name != NULL) {
646                 d->type.name = strdup(s->type.name);
647                 my_assert_not(d->type.name, NULL);
648         }
649         if (s->fptr != NULL) {
650                 d->fptr = malloc(sizeof(*d->fptr));
651                 my_assert_not(d->fptr, NULL);
652                 memcpy(d->fptr, s->fptr, sizeof(*d->fptr));
653         }
654 }
655
656 struct parsed_proto *proto_clone(const struct parsed_proto *pp_c)
657 {
658         struct parsed_proto *pp;
659         int i;
660
661         pp = malloc(sizeof(*pp));
662         my_assert_not(pp, NULL);
663         memcpy(pp, pp_c, sizeof(*pp)); // lazy..
664
665         // do the actual deep copy..
666         for (i = 0; i < pp_c->argc; i++)
667                 pp_copy_arg(&pp->arg[i], &pp_c->arg[i]);
668         if (pp_c->ret_type.name != NULL)
669                 pp->ret_type.name = strdup(pp_c->ret_type.name);
670
671         return pp;
672 }
673
674 static inline void proto_release(struct parsed_proto *pp)
675 {
676         int i;
677
678         for (i = 0; i < pp->argc; i++) {
679                 if (pp->arg[i].reg != NULL)
680                         free(pp->arg[i].reg);
681                 if (pp->arg[i].type.name != NULL)
682                         free(pp->arg[i].type.name);
683                 if (pp->arg[i].fptr != NULL)
684                         free(pp->arg[i].fptr);
685         }
686         if (pp->ret_type.name != NULL)
687                 free(pp->ret_type.name);
688         free(pp);
689 }