translate: hdrgen: detect strings, skip std funcs
[ia32rtools.git] / tools / protoparse.h
index 0197214..3d70d86 100644 (file)
@@ -1,3 +1,10 @@
+/*
+ * ia32rtools
+ * (C) notaz, 2013,2014
+ *
+ * This work is licensed under the terms of 3-clause BSD license.
+ * See COPYING file in the top-level directory.
+ */
 
 struct parsed_proto;
 
@@ -5,6 +12,9 @@ struct parsed_type {
        char *name;
        unsigned int is_array:1;
        unsigned int is_ptr:1;
+       unsigned int is_struct:1; // split for args
+       unsigned int is_retreg:1; // register to return to caller
+       unsigned int is_va_list:1;
 };
 
 struct parsed_proto_arg {
@@ -26,82 +36,118 @@ struct parsed_proto {
        int argc_reg;
        unsigned int is_func:1;
        unsigned int is_stdcall:1;
-       unsigned int is_vararg:1;
+       unsigned int is_fastcall:1;
+       unsigned int is_vararg:1;     // vararg func
        unsigned int is_fptr:1;
        unsigned int is_noreturn:1;
+       unsigned int is_unresolved:1;
+       unsigned int is_userstack:1;
+       unsigned int is_include:1;    // not from top-level header
+       unsigned int is_osinc:1;      // OS/system library func
+       unsigned int is_arg:1;        // declared in some func arg
+       unsigned int has_structarg:1;
+       unsigned int has_retreg:1;
 };
 
 static const char *hdrfn;
 static int hdrfline = 0;
 
-static int find_protostr(char *dst, size_t dlen, FILE *fhdr,
-       const char *fname, const char *sym_)
+static void pp_copy_arg(struct parsed_proto_arg *d,
+       const struct parsed_proto_arg *s);
+
+static int b_pp_c_handler(char *proto, const char *fname,
+       int is_include, int is_osinc);
+
+static int do_protostrs(FILE *fhdr, const char *fname, int is_include)
 {
-       const char *sym = sym_;
        const char *finc_name;
+       const char *hdrfn_saved;
+       char protostr[256];
+       char path[256];
+       char fname_inc[256];
+       int is_osinc;
        FILE *finc;
-       int symlen;
        int line = 0;
        int ret;
        char *p;
 
-       if (sym[0] == '_' && strncmp(fname, "stdc", 4) == 0)
-               sym++;
-       symlen = strlen(sym);
+       hdrfn_saved = hdrfn;
+       hdrfn = fname;
 
-       rewind(fhdr);
+       is_osinc = strstr(fname, "stdc.hlist")
+                || strstr(fname, "win32.hlist");
 
-       while (fgets(dst, dlen, fhdr))
+       while (fgets(protostr, sizeof(protostr), fhdr))
        {
                line++;
-               if (strncmp(dst, "//#include ", 11) == 0) {
-                       finc_name = dst + 11;
+               if (strncmp(protostr, "//#include ", 11) == 0) {
+                       finc_name = protostr + 11;
                        p = strpbrk(finc_name, "\r\n ");
                        if (p != NULL)
                                *p = 0;
 
-                       finc = fopen(finc_name, "r");
+                       path[0] = 0;
+                       p = strrchr(hdrfn_saved, '/');
+                       if (p) {
+                               memcpy(path, hdrfn_saved,
+                                       p - hdrfn_saved + 1);
+                               path[p - hdrfn_saved + 1] = 0;
+                       }
+                       snprintf(fname_inc, sizeof(fname_inc), "%s%s", 
+                               path, finc_name);
+                       finc = fopen(fname_inc, "r");
                        if (finc == NULL) {
                                printf("%s:%d: can't open '%s'\n",
-                                       fname, line, finc_name);
+                                       fname_inc, line, finc_name);
                                continue;
                        }
-                       ret = find_protostr(dst, dlen, finc,
-                               finc_name, sym_);
+                       ret = do_protostrs(finc, finc_name, 1);
                        fclose(finc);
-                       if (ret == 0)
+                       if (ret < 0)
                                break;
                        continue;
                }
-               if (strncmp(sskip(dst), "//", 2) == 0)
+               if (strncmp(sskip(protostr), "//", 2) == 0)
+                       continue;
+
+               p = protostr + strlen(protostr);
+               for (p--; p >= protostr && my_isblank(*p); --p)
+                       *p = 0;
+               if (p < protostr)
                        continue;
 
-               p = strstr(dst, sym);
-               if (p != NULL && p > dst
-                  && (my_isblank(p[-1]) || my_issep(p[-1]))
-                  && (my_isblank(p[symlen]) || my_issep(p[symlen])))
+               hdrfline = line;
+
+               ret = b_pp_c_handler(protostr, hdrfn, is_include,
+                       is_osinc);
+               if (ret < 0)
                        break;
        }
-       hdrfline = line;
 
-       if (feof(fhdr))
-               return -1;
+       hdrfn = hdrfn_saved;
 
-       p = dst + strlen(dst);
-       for (p--; p > dst && my_isblank(*p); --p)
-               *p = 0;
+       if (feof(fhdr))
+               return 0;
 
-       return 0;
+       return -1;
 }
 
-static int get_regparm(char *dst, size_t dlen, char *p)
+static int get_regparm(char *dst, size_t dlen, char *p, int *retreg)
 {
-       int i, o;
+       int i = 0, o;
+
+       *retreg = 0;
 
        if (*p != '<')
                return 0;
 
-       for (o = 0, i = 1; o < dlen; i++) {
+       i++;
+       if (p[i] == '*') {
+               *retreg = 1;
+               i++;
+       }
+
+       for (o = 0; o < dlen; i++) {
                if (p[i] == 0)
                        return 0;
                if (p[i] == '>')
@@ -119,28 +165,52 @@ static const char *known_type_mod[] = {
        "unsigned",
        "struct",
        "enum",
+       "CONST",
+       "volatile",
 };
 
 static const char *known_ptr_types[] = {
+       "FARPROC",
+       "WNDPROC",
+       "LINECALLBACK",
        "HACCEL",
        "HANDLE",
        "HBITMAP",
+       "HCALL",
        "HCURSOR",
        "HDC",
+       "HFONT",
        "HGDIOBJ",
        "HGLOBAL",
+       "HICON",
        "HINSTANCE",
+       "HIMC", // DWORD in mingw, ptr in wine..
+       "HLINE",
+       "HLINEAPP",
+       "HLOCAL",
        "HMODULE",
+       "HPALETTE",
        "HRGN",
        "HRSRC",
        "HKEY",
        "HMENU",
+       "HWAVEOUT",
        "HWND",
-       "PLONG",
+       "PBYTE",
+       "PCRITICAL_SECTION",
        "PDWORD",
+       "PFILETIME",
+       "PLARGE_INTEGER",
+       "PHKEY",
+       "PLONG",
+       "PMEMORY_BASIC_INFORMATION",
+       "PUINT",
        "PVOID",
        "PCVOID",
+       "PWORD",
        "DLGPROC",
+       "TIMERPROC",
+       "WNDENUMPROC",
        "va_list",
        "__VALIST",
 };
@@ -177,6 +247,8 @@ static const char *skip_type_mod(const char *n)
                len = strlen(known_type_mod[i]);
                if (strncmp(n, known_type_mod[i], len) != 0)
                        continue;
+               if (!my_isblank(n[len]))
+                       continue;
 
                n += len;
                while (my_isblank(*n))
@@ -203,7 +275,7 @@ static int check_type(const char *name, struct parsed_type *type)
                break;
        }
 
-       if (n[0] == 'L' && n[1] == 'P')
+       if (n[0] == 'L' && n[1] == 'P' && strncmp(n, "LPARAM", 6))
                type->is_ptr = 1;
 
        // assume single word
@@ -224,6 +296,11 @@ static int check_type(const char *name, struct parsed_type *type)
 
        ret = n1 - name;
        type->name = strndup(name, ret);
+       if (IS(type->name, "__VALIST") || IS(type->name, "va_list"))
+               type->is_va_list = 1;
+       if (IS(type->name, "VOID"))
+               memcpy(type->name, "void", 4);
+
        return ret;
 }
 
@@ -246,12 +323,21 @@ static const char *map_reg(const char *reg)
        return reg;
 }
 
+static int check_struct_arg(struct parsed_proto_arg *arg)
+{
+       if (IS(arg->type.name, "POINT"))
+               return 2 - 1;
+
+       return 0;
+}
+
 static int parse_protostr(char *protostr, struct parsed_proto *pp)
 {
        struct parsed_proto_arg *arg;
        char regparm[16];
        char buf[256];
        char cconv[32];
+       int is_retreg;
        int xarg = 0;
        char *p, *p1;
        int i, l;
@@ -289,6 +375,11 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp)
        }
        p = sskip(p + ret);
 
+       if (!strncmp(p, "noreturn ", 9)) {
+               pp->is_noreturn = 1;
+               p = sskip(p + 9);
+       }
+
        if (!strchr(p, ')')) {
                p = next_idt(buf, sizeof(buf), p);
                p = sskip(p);
@@ -325,14 +416,20 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp)
                pp->is_stdcall = 0;
        else if (IS(cconv, "__stdcall"))
                pp->is_stdcall = 1;
-       else if (IS(cconv, "__fastcall"))
-               pp->is_stdcall = 1;
+       else if (IS(cconv, "__fastcall")) {
+               pp->is_fastcall = 1;
+               pp->is_stdcall = 1; // sort of..
+       }
        else if (IS(cconv, "__thiscall"))
                pp->is_stdcall = 1;
        else if (IS(cconv, "__userpurge"))
                pp->is_stdcall = 1; // IDA
        else if (IS(cconv, "__usercall"))
                pp->is_stdcall = 0; // IDA
+       else if (IS(cconv, "__userstack")) {
+               pp->is_stdcall = 0; // custom
+               pp->is_userstack = 1;
+       }
        else if (IS(cconv, "WINAPI"))
                pp->is_stdcall = 1;
        else {
@@ -347,7 +444,11 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp)
                                hdrfn, hdrfline, (p - protostr) + 1);
                        return -1;
                }
-               p = sskip(p + 1);
+               p++;
+               // XXX: skipping extra asterisks, for now
+               while (*p == '*')
+                       p++;
+               p = sskip(p);
        }
 
        p = next_idt(buf, sizeof(buf), p);
@@ -359,10 +460,10 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp)
        }
        strcpy(pp->name, buf);
 
-       ret = get_regparm(regparm, sizeof(regparm), p);
+       ret = get_regparm(regparm, sizeof(regparm), p, &is_retreg);
        if (ret > 0) {
                if (!IS(regparm, "eax") && !IS(regparm, "ax")
-                && !IS(regparm, "al"))
+                && !IS(regparm, "al") && !IS(regparm, "edx:eax"))
                {
                        printf("%s:%d:%zd: bad regparm: %s\n",
                                hdrfn, hdrfline, (p - protostr) + 1, regparm);
@@ -373,6 +474,17 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp)
        }
 
        if (pp->is_fptr) {
+               if (*p == '[') {
+                       // not really ret_type is array, but ohwell
+                       pp->ret_type.is_array = 1;
+                       p = strchr(p + 1, ']');
+                       if (p == NULL) {
+                               printf("%s:%d:%zd: ']' expected\n",
+                                hdrfn, hdrfline, (p - protostr) + 1);
+                               return -1;
+                       }
+                       p = sskip(p + 1);
+               }
                if (*p != ')') {
                        printf("%s:%d:%zd: ')' expected\n",
                                hdrfn, hdrfline, (p - protostr) + 1);
@@ -400,8 +512,14 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp)
                        p++;
                        break;
                }
-               if (*p == ',')
+               if (xarg > 0) {
+                       if (*p != ',') {
+                               printf("%s:%d:%zd: ',' expected\n",
+                                hdrfn, hdrfline, (p - protostr) + 1);
+                               return -1;
+                       }
                        p = sskip(p + 1);
+               }
 
                if (!strncmp(p, "...", 3)) {
                        pp->is_vararg = 1;
@@ -436,8 +554,12 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp)
                                        hdrfn, hdrfline, p1 - protostr);
                                return -1;
                        }
+                       arg->fptr->is_arg = 1;
+                       // we don't use actual names right now..
+                       snprintf(arg->fptr->name,
+                               sizeof(arg->fptr->name), "a%d", xarg);
                        // we'll treat it as void * for non-calls
-                       arg->type.name = "void *";
+                       arg->type.name = strdup("void *");
                        arg->type.is_ptr = 1;
 
                        p = p1 + ret;
@@ -454,12 +576,36 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp)
 #endif
                arg->reg = NULL;
 
-               ret = get_regparm(regparm, sizeof(regparm), p);
+               ret = get_regparm(regparm, sizeof(regparm), p, &is_retreg);
                if (ret > 0) {
                        p += ret;
                        p = sskip(p);
 
                        arg->reg = strdup(map_reg(regparm));
+                       arg->type.is_retreg = is_retreg;
+                       pp->has_retreg |= is_retreg;
+               }
+
+               if (strstr(arg->type.name, "int64")
+                   || IS(arg->type.name, "double"))
+               {
+                       // hack..
+                       free(arg->type.name);
+                       arg->type.name = strdup("int");
+                       pp_copy_arg(&pp->arg[xarg], arg);
+                       xarg++;
+               }
+
+               ret = check_struct_arg(arg);
+               if (ret > 0) {
+                       pp->has_structarg = 1;
+                       arg->type.is_struct = 1;
+                       free(arg->type.name);
+                       arg->type.name = strdup("int");
+                       for (l = 0; l < ret; l++) {
+                               pp_copy_arg(&pp->arg[xarg], arg);
+                               xarg++;
+                       }
                }
        }
 
@@ -479,11 +625,6 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp)
                pp->arg[1].reg = strdup("edx");
        }
 
-       if (pp->is_vararg && pp->is_stdcall) {
-               printf("%s:%d: vararg stdcall?\n", hdrfn, hdrfline);
-               return -1;
-       }
-
        pp->argc = xarg;
 
        for (i = 0; i < pp->argc; i++) {
@@ -493,26 +634,188 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp)
                        pp->argc_reg++;
        }
 
+       if (pp->argc == 1 && pp->arg[0].reg != NULL
+           && IS(pp->arg[0].reg, "ecx"))
+       {
+               pp->is_fastcall = 1;
+       }
+       else if (pp->argc_reg == 2
+         && pp->arg[0].reg != NULL && IS(pp->arg[0].reg, "ecx")
+         && pp->arg[1].reg != NULL && IS(pp->arg[1].reg, "edx"))
+       {
+               pp->is_fastcall = 1;
+       }
+
+       if (pp->is_vararg && (pp->is_stdcall || pp->is_fastcall)) {
+               printf("%s:%d: vararg %s?\n", hdrfn, hdrfline, cconv);
+               return -1;
+       }
+
        return p - protostr;
 }
 
-static int proto_parse(FILE *fhdr, const char *sym, struct parsed_proto *pp)
+static int pp_name_cmp(const void *p1, const void *p2)
+{
+       const struct parsed_proto *pp1 = p1, *pp2 = p2;
+       return strcmp(pp1->name, pp2->name);
+}
+
+static struct parsed_proto *pp_cache;
+static int pp_cache_size;
+static int pp_cache_alloc;
+
+static int b_pp_c_handler(char *proto, const char *fname,
+       int is_include, int is_osinc)
 {
-       char protostr[256];
        int ret;
 
-       memset(pp, 0, sizeof(*pp));
+       if (pp_cache_size >= pp_cache_alloc) {
+               pp_cache_alloc = pp_cache_alloc * 2 + 64;
+               pp_cache = realloc(pp_cache, pp_cache_alloc
+                               * sizeof(pp_cache[0]));
+               my_assert_not(pp_cache, NULL);
+               memset(pp_cache + pp_cache_size, 0,
+                       (pp_cache_alloc - pp_cache_size)
+                        * sizeof(pp_cache[0]));
+       }
+
+       ret = parse_protostr(proto, &pp_cache[pp_cache_size]);
+       if (ret < 0)
+               return -1;
+
+       pp_cache[pp_cache_size].is_include = is_include;
+       pp_cache[pp_cache_size].is_osinc = is_osinc;
+       pp_cache_size++;
+       return 0;
+}
+
+static void build_pp_cache(FILE *fhdr)
+{
+       long pos;
+       int ret;
+
+       pos = ftell(fhdr);
+       rewind(fhdr);
+
+       ret = do_protostrs(fhdr, hdrfn, 0);
+       if (ret < 0)
+               exit(1);
+
+       qsort(pp_cache, pp_cache_size, sizeof(pp_cache[0]), pp_name_cmp);
+       fseek(fhdr, pos, SEEK_SET);
+}
+
+static const struct parsed_proto *proto_parse(FILE *fhdr, const char *sym,
+       int quiet)
+{
+       const struct parsed_proto *pp_ret;
+       struct parsed_proto pp_search;
+       char *p;
+
+       if (pp_cache == NULL)
+               build_pp_cache(fhdr);
+
+       if (sym[0] == '_') // && strncmp(fname, "stdc", 4) == 0)
+               sym++;
+
+       strcpy(pp_search.name, sym);
+       p = strchr(pp_search.name, '@');
+       if (p != NULL)
+               *p = 0;
 
-       ret = find_protostr(protostr, sizeof(protostr), fhdr, hdrfn, sym);
-       if (ret != 0) {
+       pp_ret = bsearch(&pp_search, pp_cache, pp_cache_size,
+                       sizeof(pp_cache[0]), pp_name_cmp);
+       if (pp_ret == NULL && !quiet)
                printf("%s: sym '%s' is missing\n", hdrfn, sym);
-               return ret;
+
+       return pp_ret;
+}
+
+static void pp_copy_arg(struct parsed_proto_arg *d,
+       const struct parsed_proto_arg *s)
+{
+       memcpy(d, s, sizeof(*d));
+
+       if (s->reg != NULL) {
+               d->reg = strdup(s->reg);
+               my_assert_not(d->reg, NULL);
+       }
+       if (s->type.name != NULL) {
+               d->type.name = strdup(s->type.name);
+               my_assert_not(d->type.name, NULL);
+       }
+       if (s->fptr != NULL) {
+               d->fptr = malloc(sizeof(*d->fptr));
+               my_assert_not(d->fptr, NULL);
+               memcpy(d->fptr, s->fptr, sizeof(*d->fptr));
        }
+}
+
+struct parsed_proto *proto_clone(const struct parsed_proto *pp_c)
+{
+       struct parsed_proto *pp;
+       int i;
 
-       return parse_protostr(protostr, pp) < 0 ? -1 : 0;
+       pp = malloc(sizeof(*pp));
+       my_assert_not(pp, NULL);
+       memcpy(pp, pp_c, sizeof(*pp)); // lazy..
+
+       // do the actual deep copy..
+       for (i = 0; i < pp_c->argc; i++)
+               pp_copy_arg(&pp->arg[i], &pp_c->arg[i]);
+       if (pp_c->ret_type.name != NULL)
+               pp->ret_type.name = strdup(pp_c->ret_type.name);
+
+       return pp;
+}
+
+
+static inline int pp_cmp_func(const struct parsed_proto *pp1,
+  const struct parsed_proto *pp2)
+{
+  int i;
+
+  if (pp1->argc != pp2->argc || pp1->argc_reg != pp2->argc_reg)
+    return 1;
+  else {
+    for (i = 0; i < pp1->argc; i++) {
+      if ((pp1->arg[i].reg != NULL) != (pp2->arg[i].reg != NULL))
+        return 1;
+
+      if ((pp1->arg[i].reg != NULL)
+        && !IS(pp1->arg[i].reg, pp2->arg[i].reg))
+      {
+        return 1;
+      }
+    }
+  }
+
+  return 0;
+}
+
+static inline void pp_print(char *buf, size_t buf_size,
+  const struct parsed_proto *pp)
+{
+  size_t l;
+  int i;
+
+  snprintf(buf, buf_size, "%s %s(", pp->ret_type.name, pp->name);
+  l = strlen(buf);
+
+  for (i = 0; i < pp->argc_reg; i++) {
+    snprintf(buf + l, buf_size - l, "%s%s",
+      i == 0 ? "" : ", ", pp->arg[i].reg);
+    l = strlen(buf);
+  }
+  if (pp->argc_stack > 0) {
+    snprintf(buf + l, buf_size - l, "%s{%d stack}",
+      i == 0 ? "" : ", ", pp->argc_stack);
+    l = strlen(buf);
+  }
+  snprintf(buf + l, buf_size - l, ")");
 }
 
-static void proto_release(struct parsed_proto *pp)
+static inline void proto_release(struct parsed_proto *pp)
 {
        int i;
 
@@ -526,4 +829,5 @@ static void proto_release(struct parsed_proto *pp)
        }
        if (pp->ret_type.name != NULL)
                free(pp->ret_type.name);
+       free(pp);
 }