X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=ia32rtools.git;a=blobdiff_plain;f=tools%2Fprotoparse.h;h=3d70d8658dfc1988e14a659bf851c38ffa6b7dd1;hp=0197214a2578d6272779950bd4b237ee716fdd2e;hb=61e29183dd00fa64584fa8787008b21a1c70b8ce;hpb=de50b98baf577c2ab9b9f680ea102c1dad14eb7c diff --git a/tools/protoparse.h b/tools/protoparse.h index 0197214..3d70d86 100644 --- a/tools/protoparse.h +++ b/tools/protoparse.h @@ -1,3 +1,10 @@ +/* + * ia32rtools + * (C) notaz, 2013,2014 + * + * This work is licensed under the terms of 3-clause BSD license. + * See COPYING file in the top-level directory. + */ struct parsed_proto; @@ -5,6 +12,9 @@ struct parsed_type { char *name; unsigned int is_array:1; unsigned int is_ptr:1; + unsigned int is_struct:1; // split for args + unsigned int is_retreg:1; // register to return to caller + unsigned int is_va_list:1; }; struct parsed_proto_arg { @@ -26,82 +36,118 @@ struct parsed_proto { int argc_reg; unsigned int is_func:1; unsigned int is_stdcall:1; - unsigned int is_vararg:1; + unsigned int is_fastcall:1; + unsigned int is_vararg:1; // vararg func unsigned int is_fptr:1; unsigned int is_noreturn:1; + unsigned int is_unresolved:1; + unsigned int is_userstack:1; + unsigned int is_include:1; // not from top-level header + unsigned int is_osinc:1; // OS/system library func + unsigned int is_arg:1; // declared in some func arg + unsigned int has_structarg:1; + unsigned int has_retreg:1; }; static const char *hdrfn; static int hdrfline = 0; -static int find_protostr(char *dst, size_t dlen, FILE *fhdr, - const char *fname, const char *sym_) +static void pp_copy_arg(struct parsed_proto_arg *d, + const struct parsed_proto_arg *s); + +static int b_pp_c_handler(char *proto, const char *fname, + int is_include, int is_osinc); + +static int do_protostrs(FILE *fhdr, const char *fname, int is_include) { - const char *sym = sym_; const char *finc_name; + const char *hdrfn_saved; + char protostr[256]; + char path[256]; + char fname_inc[256]; + int is_osinc; FILE *finc; - int symlen; int line = 0; int ret; char *p; - if (sym[0] == '_' && strncmp(fname, "stdc", 4) == 0) - sym++; - symlen = strlen(sym); + hdrfn_saved = hdrfn; + hdrfn = fname; - rewind(fhdr); + is_osinc = strstr(fname, "stdc.hlist") + || strstr(fname, "win32.hlist"); - while (fgets(dst, dlen, fhdr)) + while (fgets(protostr, sizeof(protostr), fhdr)) { line++; - if (strncmp(dst, "//#include ", 11) == 0) { - finc_name = dst + 11; + if (strncmp(protostr, "//#include ", 11) == 0) { + finc_name = protostr + 11; p = strpbrk(finc_name, "\r\n "); if (p != NULL) *p = 0; - finc = fopen(finc_name, "r"); + path[0] = 0; + p = strrchr(hdrfn_saved, '/'); + if (p) { + memcpy(path, hdrfn_saved, + p - hdrfn_saved + 1); + path[p - hdrfn_saved + 1] = 0; + } + snprintf(fname_inc, sizeof(fname_inc), "%s%s", + path, finc_name); + finc = fopen(fname_inc, "r"); if (finc == NULL) { printf("%s:%d: can't open '%s'\n", - fname, line, finc_name); + fname_inc, line, finc_name); continue; } - ret = find_protostr(dst, dlen, finc, - finc_name, sym_); + ret = do_protostrs(finc, finc_name, 1); fclose(finc); - if (ret == 0) + if (ret < 0) break; continue; } - if (strncmp(sskip(dst), "//", 2) == 0) + if (strncmp(sskip(protostr), "//", 2) == 0) + continue; + + p = protostr + strlen(protostr); + for (p--; p >= protostr && my_isblank(*p); --p) + *p = 0; + if (p < protostr) continue; - p = strstr(dst, sym); - if (p != NULL && p > dst - && (my_isblank(p[-1]) || my_issep(p[-1])) - && (my_isblank(p[symlen]) || my_issep(p[symlen]))) + hdrfline = line; + + ret = b_pp_c_handler(protostr, hdrfn, is_include, + is_osinc); + if (ret < 0) break; } - hdrfline = line; - if (feof(fhdr)) - return -1; + hdrfn = hdrfn_saved; - p = dst + strlen(dst); - for (p--; p > dst && my_isblank(*p); --p) - *p = 0; + if (feof(fhdr)) + return 0; - return 0; + return -1; } -static int get_regparm(char *dst, size_t dlen, char *p) +static int get_regparm(char *dst, size_t dlen, char *p, int *retreg) { - int i, o; + int i = 0, o; + + *retreg = 0; if (*p != '<') return 0; - for (o = 0, i = 1; o < dlen; i++) { + i++; + if (p[i] == '*') { + *retreg = 1; + i++; + } + + for (o = 0; o < dlen; i++) { if (p[i] == 0) return 0; if (p[i] == '>') @@ -119,28 +165,52 @@ static const char *known_type_mod[] = { "unsigned", "struct", "enum", + "CONST", + "volatile", }; static const char *known_ptr_types[] = { + "FARPROC", + "WNDPROC", + "LINECALLBACK", "HACCEL", "HANDLE", "HBITMAP", + "HCALL", "HCURSOR", "HDC", + "HFONT", "HGDIOBJ", "HGLOBAL", + "HICON", "HINSTANCE", + "HIMC", // DWORD in mingw, ptr in wine.. + "HLINE", + "HLINEAPP", + "HLOCAL", "HMODULE", + "HPALETTE", "HRGN", "HRSRC", "HKEY", "HMENU", + "HWAVEOUT", "HWND", - "PLONG", + "PBYTE", + "PCRITICAL_SECTION", "PDWORD", + "PFILETIME", + "PLARGE_INTEGER", + "PHKEY", + "PLONG", + "PMEMORY_BASIC_INFORMATION", + "PUINT", "PVOID", "PCVOID", + "PWORD", "DLGPROC", + "TIMERPROC", + "WNDENUMPROC", "va_list", "__VALIST", }; @@ -177,6 +247,8 @@ static const char *skip_type_mod(const char *n) len = strlen(known_type_mod[i]); if (strncmp(n, known_type_mod[i], len) != 0) continue; + if (!my_isblank(n[len])) + continue; n += len; while (my_isblank(*n)) @@ -203,7 +275,7 @@ static int check_type(const char *name, struct parsed_type *type) break; } - if (n[0] == 'L' && n[1] == 'P') + if (n[0] == 'L' && n[1] == 'P' && strncmp(n, "LPARAM", 6)) type->is_ptr = 1; // assume single word @@ -224,6 +296,11 @@ static int check_type(const char *name, struct parsed_type *type) ret = n1 - name; type->name = strndup(name, ret); + if (IS(type->name, "__VALIST") || IS(type->name, "va_list")) + type->is_va_list = 1; + if (IS(type->name, "VOID")) + memcpy(type->name, "void", 4); + return ret; } @@ -246,12 +323,21 @@ static const char *map_reg(const char *reg) return reg; } +static int check_struct_arg(struct parsed_proto_arg *arg) +{ + if (IS(arg->type.name, "POINT")) + return 2 - 1; + + return 0; +} + static int parse_protostr(char *protostr, struct parsed_proto *pp) { struct parsed_proto_arg *arg; char regparm[16]; char buf[256]; char cconv[32]; + int is_retreg; int xarg = 0; char *p, *p1; int i, l; @@ -289,6 +375,11 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp) } p = sskip(p + ret); + if (!strncmp(p, "noreturn ", 9)) { + pp->is_noreturn = 1; + p = sskip(p + 9); + } + if (!strchr(p, ')')) { p = next_idt(buf, sizeof(buf), p); p = sskip(p); @@ -325,14 +416,20 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp) pp->is_stdcall = 0; else if (IS(cconv, "__stdcall")) pp->is_stdcall = 1; - else if (IS(cconv, "__fastcall")) - pp->is_stdcall = 1; + else if (IS(cconv, "__fastcall")) { + pp->is_fastcall = 1; + pp->is_stdcall = 1; // sort of.. + } else if (IS(cconv, "__thiscall")) pp->is_stdcall = 1; else if (IS(cconv, "__userpurge")) pp->is_stdcall = 1; // IDA else if (IS(cconv, "__usercall")) pp->is_stdcall = 0; // IDA + else if (IS(cconv, "__userstack")) { + pp->is_stdcall = 0; // custom + pp->is_userstack = 1; + } else if (IS(cconv, "WINAPI")) pp->is_stdcall = 1; else { @@ -347,7 +444,11 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp) hdrfn, hdrfline, (p - protostr) + 1); return -1; } - p = sskip(p + 1); + p++; + // XXX: skipping extra asterisks, for now + while (*p == '*') + p++; + p = sskip(p); } p = next_idt(buf, sizeof(buf), p); @@ -359,10 +460,10 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp) } strcpy(pp->name, buf); - ret = get_regparm(regparm, sizeof(regparm), p); + ret = get_regparm(regparm, sizeof(regparm), p, &is_retreg); if (ret > 0) { if (!IS(regparm, "eax") && !IS(regparm, "ax") - && !IS(regparm, "al")) + && !IS(regparm, "al") && !IS(regparm, "edx:eax")) { printf("%s:%d:%zd: bad regparm: %s\n", hdrfn, hdrfline, (p - protostr) + 1, regparm); @@ -373,6 +474,17 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp) } if (pp->is_fptr) { + if (*p == '[') { + // not really ret_type is array, but ohwell + pp->ret_type.is_array = 1; + p = strchr(p + 1, ']'); + if (p == NULL) { + printf("%s:%d:%zd: ']' expected\n", + hdrfn, hdrfline, (p - protostr) + 1); + return -1; + } + p = sskip(p + 1); + } if (*p != ')') { printf("%s:%d:%zd: ')' expected\n", hdrfn, hdrfline, (p - protostr) + 1); @@ -400,8 +512,14 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp) p++; break; } - if (*p == ',') + if (xarg > 0) { + if (*p != ',') { + printf("%s:%d:%zd: ',' expected\n", + hdrfn, hdrfline, (p - protostr) + 1); + return -1; + } p = sskip(p + 1); + } if (!strncmp(p, "...", 3)) { pp->is_vararg = 1; @@ -436,8 +554,12 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp) hdrfn, hdrfline, p1 - protostr); return -1; } + arg->fptr->is_arg = 1; + // we don't use actual names right now.. + snprintf(arg->fptr->name, + sizeof(arg->fptr->name), "a%d", xarg); // we'll treat it as void * for non-calls - arg->type.name = "void *"; + arg->type.name = strdup("void *"); arg->type.is_ptr = 1; p = p1 + ret; @@ -454,12 +576,36 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp) #endif arg->reg = NULL; - ret = get_regparm(regparm, sizeof(regparm), p); + ret = get_regparm(regparm, sizeof(regparm), p, &is_retreg); if (ret > 0) { p += ret; p = sskip(p); arg->reg = strdup(map_reg(regparm)); + arg->type.is_retreg = is_retreg; + pp->has_retreg |= is_retreg; + } + + if (strstr(arg->type.name, "int64") + || IS(arg->type.name, "double")) + { + // hack.. + free(arg->type.name); + arg->type.name = strdup("int"); + pp_copy_arg(&pp->arg[xarg], arg); + xarg++; + } + + ret = check_struct_arg(arg); + if (ret > 0) { + pp->has_structarg = 1; + arg->type.is_struct = 1; + free(arg->type.name); + arg->type.name = strdup("int"); + for (l = 0; l < ret; l++) { + pp_copy_arg(&pp->arg[xarg], arg); + xarg++; + } } } @@ -479,11 +625,6 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp) pp->arg[1].reg = strdup("edx"); } - if (pp->is_vararg && pp->is_stdcall) { - printf("%s:%d: vararg stdcall?\n", hdrfn, hdrfline); - return -1; - } - pp->argc = xarg; for (i = 0; i < pp->argc; i++) { @@ -493,26 +634,188 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp) pp->argc_reg++; } + if (pp->argc == 1 && pp->arg[0].reg != NULL + && IS(pp->arg[0].reg, "ecx")) + { + pp->is_fastcall = 1; + } + else if (pp->argc_reg == 2 + && pp->arg[0].reg != NULL && IS(pp->arg[0].reg, "ecx") + && pp->arg[1].reg != NULL && IS(pp->arg[1].reg, "edx")) + { + pp->is_fastcall = 1; + } + + if (pp->is_vararg && (pp->is_stdcall || pp->is_fastcall)) { + printf("%s:%d: vararg %s?\n", hdrfn, hdrfline, cconv); + return -1; + } + return p - protostr; } -static int proto_parse(FILE *fhdr, const char *sym, struct parsed_proto *pp) +static int pp_name_cmp(const void *p1, const void *p2) +{ + const struct parsed_proto *pp1 = p1, *pp2 = p2; + return strcmp(pp1->name, pp2->name); +} + +static struct parsed_proto *pp_cache; +static int pp_cache_size; +static int pp_cache_alloc; + +static int b_pp_c_handler(char *proto, const char *fname, + int is_include, int is_osinc) { - char protostr[256]; int ret; - memset(pp, 0, sizeof(*pp)); + if (pp_cache_size >= pp_cache_alloc) { + pp_cache_alloc = pp_cache_alloc * 2 + 64; + pp_cache = realloc(pp_cache, pp_cache_alloc + * sizeof(pp_cache[0])); + my_assert_not(pp_cache, NULL); + memset(pp_cache + pp_cache_size, 0, + (pp_cache_alloc - pp_cache_size) + * sizeof(pp_cache[0])); + } + + ret = parse_protostr(proto, &pp_cache[pp_cache_size]); + if (ret < 0) + return -1; + + pp_cache[pp_cache_size].is_include = is_include; + pp_cache[pp_cache_size].is_osinc = is_osinc; + pp_cache_size++; + return 0; +} + +static void build_pp_cache(FILE *fhdr) +{ + long pos; + int ret; + + pos = ftell(fhdr); + rewind(fhdr); + + ret = do_protostrs(fhdr, hdrfn, 0); + if (ret < 0) + exit(1); + + qsort(pp_cache, pp_cache_size, sizeof(pp_cache[0]), pp_name_cmp); + fseek(fhdr, pos, SEEK_SET); +} + +static const struct parsed_proto *proto_parse(FILE *fhdr, const char *sym, + int quiet) +{ + const struct parsed_proto *pp_ret; + struct parsed_proto pp_search; + char *p; + + if (pp_cache == NULL) + build_pp_cache(fhdr); + + if (sym[0] == '_') // && strncmp(fname, "stdc", 4) == 0) + sym++; + + strcpy(pp_search.name, sym); + p = strchr(pp_search.name, '@'); + if (p != NULL) + *p = 0; - ret = find_protostr(protostr, sizeof(protostr), fhdr, hdrfn, sym); - if (ret != 0) { + pp_ret = bsearch(&pp_search, pp_cache, pp_cache_size, + sizeof(pp_cache[0]), pp_name_cmp); + if (pp_ret == NULL && !quiet) printf("%s: sym '%s' is missing\n", hdrfn, sym); - return ret; + + return pp_ret; +} + +static void pp_copy_arg(struct parsed_proto_arg *d, + const struct parsed_proto_arg *s) +{ + memcpy(d, s, sizeof(*d)); + + if (s->reg != NULL) { + d->reg = strdup(s->reg); + my_assert_not(d->reg, NULL); + } + if (s->type.name != NULL) { + d->type.name = strdup(s->type.name); + my_assert_not(d->type.name, NULL); + } + if (s->fptr != NULL) { + d->fptr = malloc(sizeof(*d->fptr)); + my_assert_not(d->fptr, NULL); + memcpy(d->fptr, s->fptr, sizeof(*d->fptr)); } +} + +struct parsed_proto *proto_clone(const struct parsed_proto *pp_c) +{ + struct parsed_proto *pp; + int i; - return parse_protostr(protostr, pp) < 0 ? -1 : 0; + pp = malloc(sizeof(*pp)); + my_assert_not(pp, NULL); + memcpy(pp, pp_c, sizeof(*pp)); // lazy.. + + // do the actual deep copy.. + for (i = 0; i < pp_c->argc; i++) + pp_copy_arg(&pp->arg[i], &pp_c->arg[i]); + if (pp_c->ret_type.name != NULL) + pp->ret_type.name = strdup(pp_c->ret_type.name); + + return pp; +} + + +static inline int pp_cmp_func(const struct parsed_proto *pp1, + const struct parsed_proto *pp2) +{ + int i; + + if (pp1->argc != pp2->argc || pp1->argc_reg != pp2->argc_reg) + return 1; + else { + for (i = 0; i < pp1->argc; i++) { + if ((pp1->arg[i].reg != NULL) != (pp2->arg[i].reg != NULL)) + return 1; + + if ((pp1->arg[i].reg != NULL) + && !IS(pp1->arg[i].reg, pp2->arg[i].reg)) + { + return 1; + } + } + } + + return 0; +} + +static inline void pp_print(char *buf, size_t buf_size, + const struct parsed_proto *pp) +{ + size_t l; + int i; + + snprintf(buf, buf_size, "%s %s(", pp->ret_type.name, pp->name); + l = strlen(buf); + + for (i = 0; i < pp->argc_reg; i++) { + snprintf(buf + l, buf_size - l, "%s%s", + i == 0 ? "" : ", ", pp->arg[i].reg); + l = strlen(buf); + } + if (pp->argc_stack > 0) { + snprintf(buf + l, buf_size - l, "%s{%d stack}", + i == 0 ? "" : ", ", pp->argc_stack); + l = strlen(buf); + } + snprintf(buf + l, buf_size - l, ")"); } -static void proto_release(struct parsed_proto *pp) +static inline void proto_release(struct parsed_proto *pp) { int i; @@ -526,4 +829,5 @@ static void proto_release(struct parsed_proto *pp) } if (pp->ret_type.name != NULL) free(pp->ret_type.name); + free(pp); }