header pre-parsing
[ia32rtools.git] / tools / protoparse.h
1
2 struct parsed_proto;
3
4 struct parsed_type {
5         char *name;
6         unsigned int is_array:1;
7         unsigned int is_ptr:1;
8 };
9
10 struct parsed_proto_arg {
11         char *reg;
12         struct parsed_type type;
13         struct parsed_proto *fptr;
14         void *datap;
15 };
16
17 struct parsed_proto {
18         char name[256];
19         union {
20                 struct parsed_type ret_type;
21                 struct parsed_type type;
22         };
23         struct parsed_proto_arg arg[16];
24         int argc;
25         int argc_stack;
26         int argc_reg;
27         unsigned int is_func:1;
28         unsigned int is_stdcall:1;
29         unsigned int is_vararg:1;
30         unsigned int is_fptr:1;
31         unsigned int is_noreturn:1;
32 };
33
34 static const char *hdrfn;
35 static int hdrfline = 0;
36
37 static int b_pp_c_handler(char *proto, const char *fname);
38
39 static int do_protostrs(FILE *fhdr, const char *fname)
40 {
41         const char *finc_name;
42         const char *hdrfn_saved;
43         char protostr[256];
44         FILE *finc;
45         int line = 0;
46         int ret;
47         char *p;
48
49         hdrfn_saved = hdrfn;
50         hdrfn = fname;
51
52         while (fgets(protostr, sizeof(protostr), fhdr))
53         {
54                 line++;
55                 if (strncmp(protostr, "//#include ", 11) == 0) {
56                         finc_name = protostr + 11;
57                         p = strpbrk(finc_name, "\r\n ");
58                         if (p != NULL)
59                                 *p = 0;
60
61                         finc = fopen(finc_name, "r");
62                         if (finc == NULL) {
63                                 printf("%s:%d: can't open '%s'\n",
64                                         fname, line, finc_name);
65                                 continue;
66                         }
67                         ret = do_protostrs(finc, finc_name);
68                         fclose(finc);
69                         if (ret < 0)
70                                 break;
71                         continue;
72                 }
73                 if (strncmp(sskip(protostr), "//", 2) == 0)
74                         continue;
75
76                 p = protostr + strlen(protostr);
77                 for (p--; p >= protostr && my_isblank(*p); --p)
78                         *p = 0;
79                 if (p < protostr)
80                         continue;
81
82                 hdrfline = line;
83
84                 ret = b_pp_c_handler(protostr, hdrfn);
85                 if (ret < 0)
86                         break;
87         }
88
89         hdrfn = hdrfn_saved;
90
91         if (feof(fhdr))
92                 return 0;
93
94         return -1;
95 }
96
97 static int get_regparm(char *dst, size_t dlen, char *p)
98 {
99         int i, o;
100
101         if (*p != '<')
102                 return 0;
103
104         for (o = 0, i = 1; o < dlen; i++) {
105                 if (p[i] == 0)
106                         return 0;
107                 if (p[i] == '>')
108                         break;
109                 dst[o++] = p[i];
110         }
111         dst[o] = 0;
112         return i + 1;
113 }
114
115 // hmh..
116 static const char *known_type_mod[] = {
117         "const",
118         "signed",
119         "unsigned",
120         "struct",
121         "enum",
122 };
123
124 static const char *known_ptr_types[] = {
125         "HACCEL",
126         "HANDLE",
127         "HBITMAP",
128         "HCURSOR",
129         "HDC",
130         "HGDIOBJ",
131         "HGLOBAL",
132         "HINSTANCE",
133         "HMODULE",
134         "HRGN",
135         "HRSRC",
136         "HKEY",
137         "HMENU",
138         "HWND",
139         "PLONG",
140         "PDWORD",
141         "PVOID",
142         "PCVOID",
143         "DLGPROC",
144         "va_list",
145         "__VALIST",
146 };
147
148 static const char *ignored_keywords[] = {
149         "extern",
150         "WINBASEAPI",
151         "WINUSERAPI",
152         "WINGDIAPI",
153         "WINADVAPI",
154 };
155
156 // returns ptr to char after type ends
157 static int typecmp(const char *n, const char *t)
158 {
159         for (; *t != 0; n++, t++) {
160                 while (n[0] == ' ' && (n[1] == ' ' || n[1] == '*'))
161                         n++;
162                 while (t[0] == ' ' && (t[1] == ' ' || t[1] == '*'))
163                         t++;
164                 if (*n != *t)
165                         return *n - *t;
166         }
167
168         return 0;
169 }
170
171 static const char *skip_type_mod(const char *n)
172 {
173         int len;
174         int i;
175
176         for (i = 0; i < ARRAY_SIZE(known_type_mod); i++) {
177                 len = strlen(known_type_mod[i]);
178                 if (strncmp(n, known_type_mod[i], len) != 0)
179                         continue;
180                 if (!my_isblank(n[len]))
181                         continue;
182
183                 n += len;
184                 while (my_isblank(*n))
185                         n++;
186                 i = 0;
187         }
188
189         return n;
190 }
191
192 static int check_type(const char *name, struct parsed_type *type)
193 {
194         const char *n, *n1;
195         int ret = -1;
196         int i;
197
198         n = skip_type_mod(name);
199
200         for (i = 0; i < ARRAY_SIZE(known_ptr_types); i++) {
201                 if (typecmp(n, known_ptr_types[i]))
202                         continue;
203
204                 type->is_ptr = 1;
205                 break;
206         }
207
208         if (n[0] == 'L' && n[1] == 'P')
209                 type->is_ptr = 1;
210
211         // assume single word
212         while (!my_isblank(*n) && !my_issep(*n))
213                 n++;
214
215         while (1) {
216                 n1 = n;
217                 while (my_isblank(*n))
218                         n++;
219                 if (*n == '*') {
220                         type->is_ptr = 1;
221                         n++;
222                         continue;
223                 }
224                 break;
225         }
226
227         ret = n1 - name;
228         type->name = strndup(name, ret);
229         return ret;
230 }
231
232 /* args are always expanded to 32bit */
233 static const char *map_reg(const char *reg)
234 {
235         const char *regs_f[] = { "eax", "ebx", "ecx", "edx", "esi", "edi" };
236         const char *regs_w[] = { "ax",  "bx",  "cx",  "dx",  "si",  "di" };
237         const char *regs_b[] = { "al",  "bl",  "cl",  "dl" };
238         int i;
239
240         for (i = 0; i < ARRAY_SIZE(regs_w); i++)
241                 if (IS(reg, regs_w[i]))
242                         return regs_f[i];
243
244         for (i = 0; i < ARRAY_SIZE(regs_b); i++)
245                 if (IS(reg, regs_b[i]))
246                         return regs_f[i];
247
248         return reg;
249 }
250
251 static int parse_protostr(char *protostr, struct parsed_proto *pp)
252 {
253         struct parsed_proto_arg *arg;
254         char regparm[16];
255         char buf[256];
256         char cconv[32];
257         int xarg = 0;
258         char *p, *p1;
259         int i, l;
260         int ret;
261
262         p = sskip(protostr);
263         if (p[0] == '/' && p[1] == '/') {
264                 printf("%s:%d: commented out?\n", hdrfn, hdrfline);
265                 p = sskip(p + 2);
266         }
267
268         // strip unneeded stuff
269         for (p1 = p; p1[0] != 0 && p1[1] != 0; p1++) {
270                 if ((p1[0] == '/' && p1[1] == '*')
271                  || (p1[0] == '*' && p1[1] == '/'))
272                         p1[0] = p1[1] = ' ';
273         }
274
275         if (!strncmp(p, "DECLSPEC_NORETURN ", 18)) {
276                 pp->is_noreturn = 1;
277                 p = sskip(p + 18);
278         }
279
280         for (i = 0; i < ARRAY_SIZE(ignored_keywords); i++) {
281                 l = strlen(ignored_keywords[i]);
282                 if (!strncmp(p, ignored_keywords[i], l) && my_isblank(p[l]))
283                         p = sskip(p + l + 1);
284         }
285
286         ret = check_type(p, &pp->ret_type);
287         if (ret <= 0) {
288                 printf("%s:%d:%zd: unhandled return in '%s'\n",
289                         hdrfn, hdrfline, (p - protostr) + 1, protostr);
290                 return -1;
291         }
292         p = sskip(p + ret);
293
294         if (!strchr(p, ')')) {
295                 p = next_idt(buf, sizeof(buf), p);
296                 p = sskip(p);
297                 if (buf[0] == 0) {
298                         printf("%s:%d:%zd: var name missing\n",
299                                 hdrfn, hdrfline, (p - protostr) + 1);
300                         return -1;
301                 }
302                 strcpy(pp->name, buf);
303
304                 p1 = strchr(p, ']');
305                 if (p1 != NULL) {
306                         p = p1 + 1;
307                         pp->ret_type.is_array = 1;
308                 }
309                 return p - protostr;
310         }
311
312         pp->is_func = 1;
313
314         if (*p == '(') {
315                 pp->is_fptr = 1;
316                 p = sskip(p + 1);
317         }
318
319         p = next_word(cconv, sizeof(cconv), p);
320         p = sskip(p);
321         if (cconv[0] == 0) {
322                 printf("%s:%d:%zd: cconv missing\n",
323                         hdrfn, hdrfline, (p - protostr) + 1);
324                 return -1;
325         }
326         if      (IS(cconv, "__cdecl"))
327                 pp->is_stdcall = 0;
328         else if (IS(cconv, "__stdcall"))
329                 pp->is_stdcall = 1;
330         else if (IS(cconv, "__fastcall"))
331                 pp->is_stdcall = 1;
332         else if (IS(cconv, "__thiscall"))
333                 pp->is_stdcall = 1;
334         else if (IS(cconv, "__userpurge"))
335                 pp->is_stdcall = 1; // IDA
336         else if (IS(cconv, "__usercall"))
337                 pp->is_stdcall = 0; // IDA
338         else if (IS(cconv, "WINAPI"))
339                 pp->is_stdcall = 1;
340         else {
341                 printf("%s:%d:%zd: unhandled cconv: '%s'\n",
342                         hdrfn, hdrfline, (p - protostr) + 1, cconv);
343                 return -1;
344         }
345
346         if (pp->is_fptr) {
347                 if (*p != '*') {
348                         printf("%s:%d:%zd: '*' expected\n",
349                                 hdrfn, hdrfline, (p - protostr) + 1);
350                         return -1;
351                 }
352                 p++;
353                 // XXX: skipping extra asterisks, for now
354                 while (*p == '*')
355                         p++;
356                 p = sskip(p);
357         }
358
359         p = next_idt(buf, sizeof(buf), p);
360         p = sskip(p);
361         if (buf[0] == 0) {
362                 //printf("%s:%d:%zd: func name missing\n",
363                 //      hdrfn, hdrfline, (p - protostr) + 1);
364                 //return -1;
365         }
366         strcpy(pp->name, buf);
367
368         ret = get_regparm(regparm, sizeof(regparm), p);
369         if (ret > 0) {
370                 if (!IS(regparm, "eax") && !IS(regparm, "ax")
371                  && !IS(regparm, "al") && !IS(regparm, "edx:eax"))
372                 {
373                         printf("%s:%d:%zd: bad regparm: %s\n",
374                                 hdrfn, hdrfline, (p - protostr) + 1, regparm);
375                         return -1;
376                 }
377                 p += ret;
378                 p = sskip(p);
379         }
380
381         if (pp->is_fptr) {
382                 if (*p == '[') {
383                         // not really ret_type is array, but ohwell
384                         pp->ret_type.is_array = 1;
385                         p = strchr(p + 1, ']');
386                         if (p == NULL) {
387                                 printf("%s:%d:%zd: ']' expected\n",
388                                  hdrfn, hdrfline, (p - protostr) + 1);
389                                 return -1;
390                         }
391                         p = sskip(p + 1);
392                 }
393                 if (*p != ')') {
394                         printf("%s:%d:%zd: ')' expected\n",
395                                 hdrfn, hdrfline, (p - protostr) + 1);
396                         return -1;
397                 }
398                 p = sskip(p + 1);
399         }
400
401         if (*p != '(') {
402                 printf("%s:%d:%zd: '(' expected, got '%c'\n",
403                                 hdrfn, hdrfline, (p - protostr) + 1, *p);
404                 return -1;
405         }
406         p++;
407
408         // check for x(void)
409         p = sskip(p);
410         if ((!strncmp(p, "void", 4) || !strncmp(p, "VOID", 4))
411            && *sskip(p + 4) == ')')
412                 p += 4;
413
414         while (1) {
415                 p = sskip(p);
416                 if (*p == ')') {
417                         p++;
418                         break;
419                 }
420                 if (*p == ',')
421                         p = sskip(p + 1);
422
423                 if (!strncmp(p, "...", 3)) {
424                         pp->is_vararg = 1;
425                         p = sskip(p + 3);
426                         if (*p == ')') {
427                                 p++;
428                                 break;
429                         }
430                         printf("%s:%d:%zd: ')' expected\n",
431                                 hdrfn, hdrfline, (p - protostr) + 1);
432                         return -1;
433                 }
434
435                 arg = &pp->arg[xarg];
436                 xarg++;
437
438                 p1 = p;
439                 ret = check_type(p, &arg->type);
440                 if (ret <= 0) {
441                         printf("%s:%d:%zd: unhandled type for arg%d\n",
442                                 hdrfn, hdrfline, (p - protostr) + 1, xarg);
443                         return -1;
444                 }
445                 p = sskip(p + ret);
446
447                 if (*p == '(') {
448                         // func ptr
449                         arg->fptr = calloc(1, sizeof(*arg->fptr));
450                         ret = parse_protostr(p1, arg->fptr);
451                         if (ret < 0) {
452                                 printf("%s:%d:%zd: funcarg parse failed\n",
453                                         hdrfn, hdrfline, p1 - protostr);
454                                 return -1;
455                         }
456                         // we'll treat it as void * for non-calls
457                         arg->type.name = "void *";
458                         arg->type.is_ptr = 1;
459
460                         p = p1 + ret;
461                 }
462
463                 p = next_idt(buf, sizeof(buf), p);
464                 p = sskip(p);
465 #if 0
466                 if (buf[0] == 0) {
467                         printf("%s:%d:%zd: idt missing for arg%d\n",
468                                 hdrfn, hdrfline, (p - protostr) + 1, xarg);
469                         return -1;
470                 }
471 #endif
472                 arg->reg = NULL;
473
474                 ret = get_regparm(regparm, sizeof(regparm), p);
475                 if (ret > 0) {
476                         p += ret;
477                         p = sskip(p);
478
479                         arg->reg = strdup(map_reg(regparm));
480                 }
481         }
482
483         if (xarg > 0 && (IS(cconv, "__fastcall") || IS(cconv, "__thiscall"))) {
484                 if (pp->arg[0].reg != NULL) {
485                         printf("%s:%d: %s with arg1 spec %s?\n",
486                                 hdrfn, hdrfline, cconv, pp->arg[0].reg);
487                 }
488                 pp->arg[0].reg = strdup("ecx");
489         }
490
491         if (xarg > 1 && IS(cconv, "__fastcall")) {
492                 if (pp->arg[1].reg != NULL) {
493                         printf("%s:%d: %s with arg2 spec %s?\n",
494                                 hdrfn, hdrfline, cconv, pp->arg[1].reg);
495                 }
496                 pp->arg[1].reg = strdup("edx");
497         }
498
499         if (pp->is_vararg && pp->is_stdcall) {
500                 printf("%s:%d: vararg stdcall?\n", hdrfn, hdrfline);
501                 return -1;
502         }
503
504         pp->argc = xarg;
505
506         for (i = 0; i < pp->argc; i++) {
507                 if (pp->arg[i].reg == NULL)
508                         pp->argc_stack++;
509                 else
510                         pp->argc_reg++;
511         }
512
513         return p - protostr;
514 }
515
516 static int pp_name_cmp(const void *p1, const void *p2)
517 {
518         const struct parsed_proto *pp1 = p1, *pp2 = p2;
519         return strcmp(pp1->name, pp2->name);
520 }
521
522 static struct parsed_proto *pp_cache;
523 static int pp_cache_size;
524 static int pp_cache_alloc;
525
526 static int b_pp_c_handler(char *proto, const char *fname)
527 {
528         int ret;
529
530         if (pp_cache_size >= pp_cache_alloc) {
531                 pp_cache_alloc = pp_cache_alloc * 2 + 64;
532                 pp_cache = realloc(pp_cache, pp_cache_alloc
533                                 * sizeof(pp_cache[0]));
534                 my_assert_not(pp_cache, NULL);
535                 memset(pp_cache + pp_cache_size, 0,
536                         (pp_cache_alloc - pp_cache_size)
537                          * sizeof(pp_cache[0]));
538         }
539
540         ret = parse_protostr(proto, &pp_cache[pp_cache_size]);
541         if (ret < 0)
542                 return -1;
543
544         pp_cache_size++;
545         return 0;
546 }
547
548 static void build_pp_cache(FILE *fhdr)
549 {
550         int ret;
551
552         rewind(fhdr);
553
554         ret = do_protostrs(fhdr, hdrfn);
555         if (ret < 0)
556                 exit(1);
557
558         qsort(pp_cache, pp_cache_size, sizeof(pp_cache[0]), pp_name_cmp);
559 }
560
561 static const struct parsed_proto *proto_parse(FILE *fhdr, const char *sym)
562 {
563         const struct parsed_proto *pp_ret;
564         struct parsed_proto pp_search;
565
566         if (pp_cache == NULL)
567                 build_pp_cache(fhdr);
568
569         if (sym[0] == '_') // && strncmp(fname, "stdc", 4) == 0)
570                 sym++;
571
572         strcpy(pp_search.name, sym);
573         pp_ret = bsearch(&pp_search, pp_cache, pp_cache_size,
574                         sizeof(pp_cache[0]), pp_name_cmp);
575         if (pp_ret == NULL)
576                 printf("%s: sym '%s' is missing\n", hdrfn, sym);
577
578         return pp_ret;
579 }
580
581 struct parsed_proto *proto_clone(const struct parsed_proto *pp_c)
582 {
583         struct parsed_proto *pp;
584         int i;
585
586         pp = malloc(sizeof(*pp));
587         my_assert_not(pp, NULL);
588         memcpy(pp, pp_c, sizeof(*pp)); // lazy..
589
590         // do the actual deep copy..
591         for (i = 0; i < pp_c->argc; i++) {
592                 if (pp_c->arg[i].reg != NULL) {
593                         pp->arg[i].reg = strdup(pp_c->arg[i].reg);
594                         my_assert_not(pp->arg[i].reg, NULL);
595                 }
596                 if (pp_c->arg[i].type.name != NULL) {
597                         pp->arg[i].type.name = strdup(pp_c->arg[i].type.name);
598                         my_assert_not(pp->arg[i].type.name, NULL);
599                 }
600                 if (pp_c->arg[i].fptr != NULL) {
601                         pp->arg[i].fptr = malloc(sizeof(*pp->arg[i].fptr));
602                         my_assert_not(pp->arg[i].fptr, NULL);
603                         memcpy(pp->arg[i].fptr, pp_c->arg[i].fptr,
604                                 sizeof(*pp->arg[i].fptr));
605                 }
606         }
607         if (pp_c->ret_type.name != NULL)
608                 pp->ret_type.name = strdup(pp_c->ret_type.name);
609
610         return pp;
611 }
612
613 static inline void proto_release(struct parsed_proto *pp)
614 {
615         int i;
616
617         for (i = 0; i < pp->argc; i++) {
618                 if (pp->arg[i].reg != NULL)
619                         free(pp->arg[i].reg);
620                 if (pp->arg[i].type.name != NULL)
621                         free(pp->arg[i].type.name);
622                 if (pp->arg[i].fptr != NULL)
623                         free(pp->arg[i].fptr);
624         }
625         if (pp->ret_type.name != NULL)
626                 free(pp->ret_type.name);
627         free(pp);
628 }