5 * This work is licensed under the terms of 3-clause BSD license.
6 * See COPYING file in the top-level directory.
9 #define NO_OBSOLETE_FUNCS
14 #include <kernwin.hpp>
23 #define IS_START(w, y) !strncmp(w, y, strlen(y))
24 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
26 static char **name_cache;
27 static size_t name_cache_size;
29 // non-local branch targets
30 static ea_t *nonlocal_bt;
31 static int nonlocal_bt_alloc;
32 static int nonlocal_bt_cnt;
34 //--------------------------------------------------------------------------
35 static int idaapi init(void)
40 //--------------------------------------------------------------------------
41 static void idaapi term(void)
45 if (nonlocal_bt != NULL) {
49 nonlocal_bt_alloc = 0;
51 if (name_cache != NULL) {
52 for (i = 0; i < name_cache_size; i++)
60 //--------------------------------------------------------------------------
62 static const char *reserved_names[] = {
75 static int is_name_reserved(const char *name)
78 for (i = 0; i < ARRAY_SIZE(reserved_names); i++)
79 if (strcasecmp(name, reserved_names[i]) == 0)
85 /* these tend to cause linker conflicts */
86 static const char *useless_names[] = {
87 "target", "addend", "lpMem", "Locale", "lpfn",
88 "CodePage", "uNumber", "Caption", "Default", "SubKey",
89 "ValueName", "OutputString", "LibFileName", "AppName",
90 "Buffer", "ClassName", "dwProcessId", "FileName",
91 "aExp", "aLog10", "aDelete", "aFont",
92 "lpCriticalSection", "CriticalSection", "lpAddress",
93 "lpBuffer", "lpClassName", "lpName",
94 "hHeap", "hEvent", "hHandle", "hObject",
95 "hLibModule", "hInstance",
98 static int is_name_useless(const char *name)
101 for (i = 0; i < ARRAY_SIZE(useless_names); i++)
102 if (strcasecmp(name, useless_names[i]) == 0)
108 static int nonlocal_bt_cmp(const void *p1, const void *p2)
110 const ea_t *e1 = (const ea_t *)p1, *e2 = (const ea_t *)p2;
114 static void nonlocal_add(ea_t ea)
116 if (nonlocal_bt_cnt >= nonlocal_bt_alloc) {
117 nonlocal_bt_alloc += nonlocal_bt_alloc * 2 + 64;
118 nonlocal_bt = (ea_t *)realloc(nonlocal_bt,
119 nonlocal_bt_alloc * sizeof(nonlocal_bt[0]));
120 if (nonlocal_bt == NULL) {
125 nonlocal_bt[nonlocal_bt_cnt++] = ea;
128 // is instruction a (un)conditional jump (not call)?
129 static int is_insn_jmp(uint16 itype)
131 return itype == NN_jmp || (NN_ja <= itype && itype <= NN_jz);
134 static void do_def_line(char *buf, size_t buf_size, const char *line,
135 ea_t ea, func_t *func)
137 char func_name[256] = "<nf>";
144 tag_remove(line, buf, buf_size); // remove color codes
150 memmove(buf, buf + 9, len - 9 + 1); // rm address
153 while (*p && *p != ' ' && *p != ':')
156 ea_ret = (ea_t *)bsearch(&ea, nonlocal_bt, nonlocal_bt_cnt,
157 sizeof(nonlocal_bt[0]), nonlocal_bt_cmp);
158 global_label = (ea_ret != NULL);
161 get_func_name(ea, func_name, sizeof(func_name));
162 is_libfunc = func->flags & FUNC_LIB;
164 for (i = 0; i < get_item_size(ea); i++) {
166 if (xb.first_to(ea + i, XREF_DATA)) {
167 if (!is_libfunc && xb.type == dr_O)
168 msg("%x: offset xref in %s\n", ea, func_name);
175 msg("no trailing blank in '%s'\n", buf);
182 static int name_cache_cmp(const void *p1, const void *p2)
184 // masm ignores case, so do we
185 return stricmp(*(char * const *)p1, *(char * const *)p2);
188 static void rebuild_name_cache(void)
193 // build a sorted name cache
194 newsize = get_nlist_size();
195 if (newsize > name_cache_size) {
196 tmp = realloc(name_cache, newsize * sizeof(name_cache[0]));
198 msg("OOM for name cache\n");
201 name_cache = (char **)tmp;
203 for (i = 0; i < name_cache_size; i++)
205 for (i = 0; i < newsize; i++)
206 name_cache[i] = strdup(get_nlist_name(i));
208 name_cache_size = newsize;
209 qsort(name_cache, name_cache_size, sizeof(name_cache[0]),
213 static void my_rename(ea_t ea, char *name)
219 qsnprintf(buf, sizeof(buf), "%s", name);
222 pp = (char **)bsearch(&p, name_cache, name_cache_size,
223 sizeof(name_cache[0]), name_cache_cmp);
227 qsnprintf(buf, sizeof(buf), "%s_g%d", name, n);
233 msg("rename failure? '%s'\n", name);
235 do_name_anyway(ea, buf);
236 rebuild_name_cache();
239 static void make_align(ea_t ea)
244 tmp_ea = next_head(ea, inf.maxEA);
245 if ((tmp_ea & 0x03) == 0) {
246 n = calc_max_align(tmp_ea);
247 if (n > 4) // masm doesn't like more..
249 msg("%x: align %d\n", ea, 1 << n);
250 do_unknown(ea, DOUNK_SIMPLE);
251 doAlign(ea, tmp_ea - ea, n);
255 static void idaapi run(int /*arg*/)
257 // isEnabled(ea) // address belongs to disassembly
258 // ea_t ea = get_screen_ea();
260 // foo = DecodeInstruction(ScreenEA());
269 ea_t ui_ea_block = 0, ea_size;
270 ea_t tmp_ea, target_ea;
281 // get rid of structs, masm doesn't understand them
282 idx = get_first_struc_idx();
283 while (idx != BADNODE) {
284 tid_t tid = get_struc_by_idx(idx);
285 struc_t *struc = get_struc(tid);
286 get_struc_name(tid, buf, sizeof(buf));
287 msg("removing struct '%s'\n", buf);
288 //del_struc_members(struc, 0, get_max_offset(struc));
291 idx = get_first_struc_idx();
294 rebuild_name_cache();
296 // 1st pass: walk through all funcs
301 func_tail_iterator_t fti(func);
303 msg("%x: func_tail_iterator_t main failed\n", ea);
306 const area_t &f_area = fti.chunk();
309 // rename global syms which conflict with frame member names
310 frame = get_frame(func);
313 for (m = 0; m < (int)frame->memqty; m++)
315 ret = get_member_name(frame->members[m].id, buf, sizeof(buf));
317 msg("%x: member has no name?\n", ea);
320 if (buf[0] == ' ') // what's this?
322 if (IS_START(buf, "arg_") || IS_START(buf, "var_"))
325 // check for dupe names
327 for (m1 = 0; m1 < m; m1++) {
328 get_member_name(frame->members[m1].id, buf2, sizeof(buf2));
329 if (stricmp(buf, buf2) == 0)
333 if (is_name_reserved(buf) || dupe) {
334 msg("%x: renaming '%s'\n", ea, buf);
335 qstrncat(buf, "_", sizeof(buf));
336 ret = set_member_name(frame, frame->members[m].soff, buf);
338 msg("%x: renaming failed\n", ea);
344 pp = (char **)bsearch(&p, name_cache, name_cache_size,
345 sizeof(name_cache[0]), name_cache_cmp);
349 tmp_ea = get_name_ea(BADADDR, *pp);
350 msg("%x: renaming '%s' because of '%s' at %x\n",
351 tmp_ea, *pp, buf, ea);
352 my_rename(tmp_ea, *pp);
356 // detect tailcalls to next func with 'jmp $+5' (offset 0)
357 if (f_area.endEA - f_area.startEA >= 5
358 && decode_insn(f_area.endEA - 5) && cmd.itype == NN_jmp
359 && cmd.Operands[0].type == o_near
360 && cmd.Operands[0].addr == f_area.endEA
361 && get_name(BADADDR, f_area.endEA, buf, sizeof(buf))
362 && get_cmt(f_area.endEA - 5, false, buf2, sizeof(buf2)) <= 0)
364 qsnprintf(buf2, sizeof(buf2), "sctpatch: jmp %s", buf);
365 set_cmt(f_area.endEA - 5, buf2, false);
368 func = get_next_func(ea);
371 // 2nd pass over whole .text and .(ro)data segments
372 for (ea = inf.minEA; ea != BADADDR; ea = next_head(ea, inf.maxEA))
374 segment_t *seg = getseg(ea);
377 if (seg->type == SEG_XTRN)
379 if (seg->type != SEG_CODE && seg->type != SEG_DATA)
382 ea_flags = get_flags_novalue(ea);
384 if (isCode(ea_flags))
386 if (!decode_insn(ea)) {
387 msg("%x: decode_insn() failed\n", ea);
391 // masm doesn't understand IDA's float/xmm types
392 if (cmd.itype == NN_fld || cmd.itype == NN_fst
393 || cmd.itype == NN_movapd || cmd.itype == NN_movlpd)
395 for (o = 0; o < UA_MAXOP; o++) {
396 if (cmd.Operands[o].type == o_void)
399 if (cmd.Operands[o].type == o_mem) {
400 tmp_ea = cmd.Operands[o].addr;
401 flags_t tmp_flg = get_flags_novalue(tmp_ea);
403 if (isDouble(tmp_flg))
405 get_name(ea, tmp_ea, buf, sizeof(buf));
406 msg("%x: converting dbl %x '%s'\n", ea, tmp_ea, buf);
409 if (isOwrd(tmp_flg) || isYwrd(tmp_flg) || isTbyt(tmp_flg))
411 get_name(ea, tmp_ea, buf, sizeof(buf));
412 msg("%x: undefining lrg %x '%s'\n", ea, tmp_ea, buf);
413 do_unknown(tmp_ea, DOUNK_EXPAND);
418 else if (cmd.itype == NN_lea) {
419 // detect code alignment
420 if (cmd.Operands[0].reg == cmd.Operands[1].reg
421 && cmd.Operands[1].type == o_displ
422 && cmd.Operands[1].addr == 0)
427 else if (!isDefArg1(ea_flags)
428 && cmd.Operands[1].type == o_mem // why o_mem?
429 && cmd.Operands[1].dtyp == dt_dword)
431 if (inf.minEA <= cmd.Operands[1].addr
432 && cmd.Operands[1].addr < inf.maxEA)
434 // lea to segments, like ds:58D6A8h[edx*8]
435 msg("%x: lea offset to %x\n", ea, cmd.Operands[1].addr);
436 op_offset(ea, 1, REF_OFF32);
440 // ds:0[eax*8] -> [eax*8+0]
441 msg("%x: dropping ds: for %x\n", ea, cmd.Operands[1].addr);
446 else if (cmd.itype == NN_mov && cmd.segpref == 0x1e // 2e?
447 && cmd.Operands[0].type == o_reg
448 && cmd.Operands[1].type == o_reg
449 && cmd.Operands[0].dtyp == cmd.Operands[1].dtyp
450 && cmd.Operands[0].reg == cmd.Operands[1].reg)
452 // db 2Eh; mov eax, eax
456 // find non-local branches
457 if (is_insn_jmp(cmd.itype) && cmd.Operands[0].type == o_near)
459 target_ea = cmd.Operands[0].addr;
461 nonlocal_add(target_ea);
463 ret = get_func_chunknum(func, target_ea);
465 // a jump to another func or chunk
466 // check if it lands on func start
467 if (!isFunc(get_flags_novalue(target_ea)))
468 nonlocal_add(target_ea);
475 ea_size = get_item_size(ea);
477 if (func == NULL && isOff0(ea_flags)) {
478 for (tmp_ea = 0; tmp_ea < ea_size; tmp_ea += 4)
479 nonlocal_add(get_long(ea + tmp_ea));
482 // IDA vs masm float/mmx/xmm type incompatibility
483 if (isDouble(ea_flags))
485 msg("%x: converting double\n", ea);
488 else if (isTbyt(ea_flags) || isPackReal(ea_flags))
492 else if (isOwrd(ea_flags)) {
494 get_name(BADADDR, ea, buf, sizeof(buf));
495 if (IS_START(buf, "xmm"))
498 // masm doesn't understand IDA's unicode
499 else if (isASCII(ea_flags) && ea_size >= 4
500 && (get_long(ea) & 0xff00ff00) == 0) // lame..
504 // masm doesn't understand large aligns
505 else if (isAlign(ea_flags) && ea_size >= 0x10)
507 msg("%x: undefining align %d\n", ea, ea_size);
508 do_unknown(ea, DOUNK_EXPAND);
513 get_name(BADADDR, ea, buf, sizeof(buf));
514 msg("%x: undefining '%s'\n", ea, buf);
515 do_unknown(ea, DOUNK_EXPAND);
520 // check namelist for reserved names and
521 // matching names with different case (masm ignores case)
522 n = get_nlist_size();
523 for (i = 0; i < n; i++) {
526 ea = get_nlist_ea(i);
527 ea_flags = get_flags_novalue(ea);
528 name = get_nlist_name(i);
530 msg("%x: null name?\n", ea);
534 qsnprintf(buf, sizeof(buf), "%s", name);
536 // for short names, give them a postfix to solve link dupe problem
537 if (!isCode(ea_flags) && strlen(name) <= 4) {
538 qsnprintf(buf, sizeof(buf), "%s_%06X", name, ea);
542 qsnprintf(buf2, sizeof(buf2), "%s", name);
543 if ((p = strchr(buf2, '_')))
545 if (is_name_useless(buf2)) {
546 msg("%x: removing name '%s'\n", ea, name);
547 ret = set_name(ea, "", SN_AUTO);
549 n = get_nlist_size();
556 need_rename |= is_name_reserved(name);
559 pp = (char **)bsearch(&p, name_cache, name_cache_size,
560 sizeof(name_cache[0]), name_cache_cmp);
562 if (pp > name_cache && stricmp(pp[-1], pp[0]) == 0)
564 else if (pp < name_cache + name_cache_size - 1
565 && stricmp(pp[0], pp[1]) == 0)
572 // rename vars with '?@' (funcs are ok)
574 if (!isCode(ea_flags)) {
575 if (IS_START(name, "__imp_"))
576 need_rename = 0; /* some import */
577 else if (name[0] == '?' && strstr(name, "@@"))
578 need_rename = 0; /* c++ import */
579 else if (strchr(name, '?'))
581 else if ((cp = strchr(name, '@'))) {
583 strtol(cp + 1, &endp, 10);
584 if (endp == NULL || *endp != 0)
589 if (need_rename || change_qat) {
590 msg("%x: renaming name '%s'\n", ea, name);
593 for (p = buf; *p != 0; p++) {
594 if (*p == '?' || *p == '@') {
595 qsnprintf(buf2, sizeof(buf2), "%02x", (unsigned char)*p);
596 memmove(p + 1, p, strlen(p) + 1);
606 if (nonlocal_bt_cnt > 1) {
607 qsort(nonlocal_bt, nonlocal_bt_cnt,
608 sizeof(nonlocal_bt[0]), nonlocal_bt_cmp);
611 char *fname = askfile_c(1, NULL, "Save asm file");
614 fout = qfopen(fname, "w");
616 msg("couldn't open '%s'\n", fname);
620 show_wait_box("Saving..");
622 // deal with the beginning
624 int flags = 0; // calc_default_idaplace_flags();
625 linearray_t ln(&flags);
630 n = ln.get_linecnt();
631 for (i = 0; i < n - 1; i++) {
632 do_def_line(buf, sizeof(buf), ln.down(), ea, NULL);
633 if (strstr(buf, "include"))
637 qfprintf(fout, "%s\n", buf);
638 p = strstr(buf, ".mmx");
640 memcpy(p, ".xmm", 4);
642 qfprintf(fout, "%s\n", buf);
645 p = strstr(buf, ".model");
647 qstrncpy(p, "include imports.inc", sizeof(buf) - (p - buf));
649 qfprintf(fout, "\n%s\n", buf);
658 int drop_large = 0, do_rva = 0, set_scale = 0, jmp_near = 0;
659 int word_imm = 0, dword_imm = 0, do_pushf = 0, do_nops = 0;
661 if ((ea >> 14) != ui_ea_block) {
662 ui_ea_block = ea >> 14;
670 segment_t *seg = getseg(ea);
671 if (!seg || (seg->type != SEG_CODE && seg->type != SEG_DATA))
674 ea_flags = get_flags_novalue(ea);
675 if (isCode(ea_flags))
677 if (!decode_insn(ea))
680 if (is_insn_jmp(cmd.itype) && cmd.Operands[0].type == o_near
681 && cmd.Operands[0].dtyp == dt_dword)
685 else if ((cmd.itype == NN_pushf || cmd.itype == NN_popf)
691 for (o = 0; o < UA_MAXOP; o++) {
692 const op_t &opr = cmd.Operands[o];
693 if (opr.type == o_void)
697 if (opr.type == o_mem && opr.specval_shorts.high == 0x21)
699 if (opr.hasSIB && x86_scale(opr) == 0
700 && x86_index(opr) != INDEX_NONE)
704 // annoying alignment variant..
705 if (opr.type == o_imm && opr.dtyp == dt_dword
706 && (opr.value < 0x80 || opr.value > 0xffffff80)
707 && cmd.size >= opr.offb + 4)
709 if (get_long(ea + opr.offb) == opr.value)
712 else if (opr.type == o_imm && opr.dtyp == dt_word
713 && (opr.value < 0x80 || opr.value > 0xff80)
714 && cmd.size >= opr.offb + 2)
716 if (get_word(ea + opr.offb) == (ushort)opr.value)
719 else if (opr.type == o_displ && opr.addr == 0
720 && opr.offb != 0 && opr.hasSIB && opr.sib == 0x24)
722 // uses [esp+0] with 0 encoded into op
728 if (isOff0(ea_flags))
733 n = ln.get_linecnt();
734 for (i = pl.lnnum; i < n; i++) {
735 do_def_line(buf, sizeof(buf), ln.down(), ea, func);
738 for (fw = buf; *fw != 0 && *fw == ' '; )
743 p = strstr(fw, "large ");
745 memmove(p, p + 6, strlen(p + 6) + 1);
748 p = strstr(fw, " rva ");
751 memmove(p + 4 + 3, p + 4, strlen(p + 4) + 1);
752 memcpy(p + 1, "offset", 6);
758 if (p != NULL && p[1] == 'e') {
760 // scale is 1, must specify it explicitly so that
761 // masm chooses the right scaled reg
762 memmove(p + 2, p, strlen(p) + 1);
768 if (fw != buf && fw[0] == 'j')
770 while (p && *p != ' ')
772 while (p && *p == ' ')
775 memmove(p + 9, p, strlen(p) + 1);
776 memcpy(p, "near ptr ", 9);
781 p = strstr(fw, ", ");
782 if (p != NULL && '0' <= p[2] && p[2] <= '9') {
784 memmove(p + 9, p, strlen(p) + 1);
785 memcpy(p, "word ptr ", 9);
788 else if (dword_imm) {
789 p = strstr(fw, ", ");
790 if (p != NULL && '0' <= p[2] && p[2] <= '9') {
792 memmove(p + 10, p, strlen(p) + 1);
793 memcpy(p, "dword ptr ", 10);
797 p = strstr(fw, "pushf");
799 p = strstr(fw, "popf");
801 p = strchr(p, 'f') + 1;
802 memmove(p + 1, p, strlen(p) + 1);
807 if (fw[0] == 'a' && IS_START(fw, "assume cs")) {
808 // "assume cs" causes problems with ext syms
809 memmove(fw + 1, fw, strlen(fw) + 1);
812 else if (fw[0] == 'e' && IS_START(fw, "end") && fw[3] == ' ') {
814 qfprintf(fout, "include public.inc\n\n");
821 qfprintf(fout, "%s\n", buf);
824 while (do_nops-- > 0)
825 qfprintf(fout, " nop ; adj\n");
827 // note: next_head skips some undefined stuff
828 ea = next_not_tail(ea); // correct?
843 msg("%d lines saved.\n", fout_line);
846 //--------------------------------------------------------------------------
848 static const char comment[] = "Generate disassembly for nasm";
849 static const char help[] = "Generate asm file\n";
850 static const char wanted_name[] = "Save asm";
851 static const char wanted_hotkey[] = "Shift-S";
853 //--------------------------------------------------------------------------
855 // PLUGIN DESCRIPTION BLOCK
857 //--------------------------------------------------------------------------
860 IDP_INTERFACE_VERSION,
863 term, // terminate. this pointer may be NULL.
864 run, // invoke plugin
865 comment, // long comment about the plugin
866 // it could appear in the status line
868 help, // multiline help about the plugin
869 wanted_name, // the preferred short name of the plugin
870 wanted_hotkey // the preferred hotkey to run the plugin
873 // vim:ts=2:shiftwidth=2:expandtab