plugin: detect lea offsets
[ia32rtools.git] / plugin / saveasm.cpp
CommitLineData
fc1c61f5 1/*
2 * ia32rtools
3 * (C) notaz, 2013,2014
4 *
5 * This work is licensed under the terms of 3-clause BSD license.
6 * See COPYING file in the top-level directory.
7 */
8
d8891fcc 9#define NO_OBSOLETE_FUNCS
10#include <ida.hpp>
11#include <idp.hpp>
12#include <bytes.hpp>
13#include <loader.hpp>
14#include <kernwin.hpp>
15
16#include <name.hpp>
17#include <frame.hpp>
18#include <struct.hpp>
fc1c61f5 19#include <offset.hpp>
d8891fcc 20#include <auto.hpp>
15c7b2a4 21#include <intel.hpp>
d8891fcc 22
23#define IS_START(w, y) !strncmp(w, y, strlen(y))
24#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
25
15c7b2a4 26// non-local branch targets
27static ea_t *nonlocal_bt;
28static int nonlocal_bt_alloc;
29static int nonlocal_bt_cnt;
30
d8891fcc 31//--------------------------------------------------------------------------
32static int idaapi init(void)
33{
34 return PLUGIN_OK;
35}
36
37//--------------------------------------------------------------------------
38static void idaapi term(void)
39{
15c7b2a4 40 if (nonlocal_bt != NULL) {
41 free(nonlocal_bt);
42 nonlocal_bt = NULL;
43 }
44 nonlocal_bt_alloc = 0;
d8891fcc 45}
46
47//--------------------------------------------------------------------------
48
49static const char *reserved_names[] = {
50 "name",
b587e6ae 51 "type",
d8891fcc 52 "offset",
b587e6ae 53 "aam",
1402b79d 54 "text",
de8a204c 55 "size",
56 "c",
d8891fcc 57};
58
59static int is_name_reserved(const char *name)
60{
61 int i;
62 for (i = 0; i < ARRAY_SIZE(reserved_names); i++)
63 if (strcasecmp(name, reserved_names[i]) == 0)
64 return 1;
65
66 return 0;
67}
68
15c7b2a4 69static int nonlocal_bt_cmp(const void *p1, const void *p2)
70{
71 const ea_t *e1 = (const ea_t *)p1, *e2 = (const ea_t *)p2;
72 return *e1 - *e2;
73}
74
75static void nonlocal_add(ea_t ea)
76{
77 if (nonlocal_bt_cnt >= nonlocal_bt_alloc) {
78 nonlocal_bt_alloc += nonlocal_bt_alloc * 2 + 64;
79 nonlocal_bt = (ea_t *)realloc(nonlocal_bt,
80 nonlocal_bt_alloc * sizeof(nonlocal_bt[0]));
81 if (nonlocal_bt == NULL) {
82 msg("OOM\n");
83 return;
84 }
85 }
86 nonlocal_bt[nonlocal_bt_cnt++] = ea;
87}
88
b587e6ae 89// is instruction a (un)conditional jump (not call)?
90static int is_insn_jmp(uint16 itype)
91{
92 return itype == NN_jmp || (NN_ja <= itype && itype <= NN_jz);
93}
94
d53d4cc7 95static void do_def_line(char *buf, size_t buf_size, const char *line,
96 ea_t ea)
d8891fcc 97{
d53d4cc7 98 ea_t *ea_ret;
99 char *p;
d8891fcc 100 int len;
101
102 tag_remove(line, buf, buf_size); // remove color codes
103 len = strlen(buf);
104 if (len < 9) {
105 buf[0] = 0;
106 return;
107 }
108 memmove(buf, buf + 9, len - 9 + 1); // rm address
15c7b2a4 109
d53d4cc7 110 p = buf;
111 while (*p && *p != ' ' && *p != ':')
112 p++;
113 if (*p == ':') {
114 ea_ret = (ea_t *)bsearch(&ea, nonlocal_bt, nonlocal_bt_cnt,
115 sizeof(nonlocal_bt[0]), nonlocal_bt_cmp);
116 if (ea_ret != 0) {
117 if (p[1] != ' ')
118 msg("no trailing blank in '%s'\n", buf);
119 else
120 p[1] = ':';
15c7b2a4 121 }
122 }
d8891fcc 123}
124
125static void idaapi run(int /*arg*/)
126{
15c7b2a4 127 // isEnabled(ea) // address belongs to disassembly
d8891fcc 128 // ea_t ea = get_screen_ea();
15c7b2a4 129 // foo = DecodeInstruction(ScreenEA());
d8891fcc 130 FILE *fout = NULL;
131 int fout_line = 0;
132 char buf[MAXSTR];
1caf86bb 133 char buf2[MAXSTR];
b587e6ae 134 const char *name;
d8891fcc 135 struc_t *frame;
136 func_t *func;
15c7b2a4 137 ea_t ui_ea_block = 0, ea_size;
138 ea_t tmp_ea, target_ea;
d8891fcc 139 ea_t ea;
b587e6ae 140 flags_t ea_flags;
1caf86bb 141 uval_t idx;
d8891fcc 142 int i, o, m, n;
143 int ret;
144 char *p;
145
15c7b2a4 146 nonlocal_bt_cnt = 0;
147
1caf86bb 148 // get rid of structs, masm doesn't understand them
149 idx = get_first_struc_idx();
150 while (idx != BADNODE) {
151 tid_t tid = get_struc_by_idx(idx);
152 struc_t *struc = get_struc(tid);
153 get_struc_name(tid, buf, sizeof(buf));
154 msg("removing struct '%s'\n", buf);
155 //del_struc_members(struc, 0, get_max_offset(struc));
156 del_struc(struc);
157
158 idx = get_first_struc_idx();
159 }
160
15c7b2a4 161 // 1st pass: walk through all funcs
162 func = get_func(inf.minEA);
d8891fcc 163 while (func != NULL)
164 {
15c7b2a4 165 func_tail_iterator_t fti(func);
166 if (!fti.main()) {
167 msg("%x: func_tail_iterator_t main failed\n", ea);
168 return;
169 }
170 const area_t &f_area = fti.chunk();
171 ea = f_area.startEA;
172
173 // rename global syms which conflict with frame member names
d8891fcc 174 frame = get_frame(func);
175 if (frame != NULL)
176 {
177 for (m = 0; m < (int)frame->memqty; m++)
178 {
179 ret = get_member_name(frame->members[m].id, buf, sizeof(buf));
180 if (ret <= 0) {
181 msg("%x: member has no name?\n", ea);
182 return;
183 }
184 if (buf[0] == ' ') // what's this?
185 continue;
186 if (IS_START(buf, "arg_") || IS_START(buf, "var_"))
187 continue;
188
1caf86bb 189 // check for dupe names
190 int m1, dupe = 0;
191 for (m1 = 0; m1 < m; m1++) {
192 get_member_name(frame->members[m1].id, buf2, sizeof(buf2));
193 if (stricmp(buf, buf2) == 0)
194 dupe = 1;
195 }
196
197 if (is_name_reserved(buf) || dupe) {
d8891fcc 198 msg("%x: renaming '%s'\n", ea, buf);
199 qstrncat(buf, "_", sizeof(buf));
200 ret = set_member_name(frame, frame->members[m].soff, buf);
201 if (!ret) {
202 msg("%x: renaming failed\n", ea);
203 return;
204 }
205 }
206
207 tmp_ea = get_name_ea(ea, buf);
208 if (tmp_ea == 0 || tmp_ea == ~0)
209 continue;
210
211 msg("%x: from %x: renaming '%s'\n", tmp_ea, ea, buf);
212 qstrncat(buf, "_g", sizeof(buf));
213 set_name(tmp_ea, buf);
214 }
215 }
216
217 func = get_next_func(ea);
15c7b2a4 218 }
219
b587e6ae 220 // 2nd pass over whole .text and .(ro)data segments
15c7b2a4 221 for (ea = inf.minEA; ea != BADADDR; ea = next_head(ea, inf.maxEA))
222 {
223 segment_t *seg = getseg(ea);
b587e6ae 224 if (!seg)
225 break;
226 if (seg->type == SEG_XTRN)
227 continue;
228 if (seg->type != SEG_CODE && seg->type != SEG_DATA)
15c7b2a4 229 break;
230
b587e6ae 231 ea_flags = get_flags_novalue(ea);
15c7b2a4 232 func = get_func(ea);
233 if (isCode(ea_flags))
234 {
235 if (!decode_insn(ea)) {
236 msg("%x: decode_insn() failed\n", ea);
237 continue;
238 }
239
b587e6ae 240 // masm doesn't understand IDA's float/xmm types
241 if (cmd.itype == NN_fld || cmd.itype == NN_fst
242 || cmd.itype == NN_movapd || cmd.itype == NN_movlpd)
243 {
244 for (o = 0; o < UA_MAXOP; o++) {
245 if (cmd.Operands[o].type == o_void)
246 break;
247
248 if (cmd.Operands[o].type == o_mem) {
249 tmp_ea = cmd.Operands[o].addr;
250 flags_t tmp_ea_flags = get_flags_novalue(tmp_ea);
251 if (!isUnknown(tmp_ea_flags)) {
252 buf[0] = 0;
253 get_name(ea, tmp_ea, buf, sizeof(buf));
254 msg("%x: undefining %x '%s'\n", ea, tmp_ea, buf);
255 do_unknown(tmp_ea, DOUNK_EXPAND);
256 }
257 }
258 }
259 }
1caf86bb 260 else if (cmd.itype == NN_lea) {
fc1c61f5 261 // detect code alignment
1caf86bb 262 if (cmd.Operands[0].reg == cmd.Operands[1].reg
263 && cmd.Operands[1].type == o_displ
264 && cmd.Operands[1].addr == 0)
265 {
266 tmp_ea = next_head(ea, inf.maxEA);
267 if ((tmp_ea & 0x03) == 0) {
268 n = calc_max_align(tmp_ea);
269 if (n > 4) // masm doesn't like more..
270 n = 4;
271 msg("%x: align %d\n", ea, 1 << n);
272 do_unknown(ea, DOUNK_SIMPLE);
273 doAlign(ea, tmp_ea - ea, n);
274 }
275 }
fc1c61f5 276 else if (!isDefArg1(ea_flags)
277 && cmd.Operands[1].type == o_mem // why o_mem?
278 && cmd.Operands[1].dtyp == dt_dword)
279 {
280 if (inf.minEA <= cmd.Operands[1].addr
281 && cmd.Operands[1].addr < inf.maxEA)
282 {
283 // lea to segments, like ds:58D6A8h[edx*8]
284 msg("%x: lea offset to %x\n", ea, cmd.Operands[1].addr);
285 op_offset(ea, 1, REF_OFF32);
286 }
287 else
288 {
289 // ds:0[eax*8] -> [eax*8+0]
290 msg("%x: dropping ds: for %x\n", ea, cmd.Operands[1].addr);
291 op_hex(ea, 1);
292 }
293 }
1caf86bb 294 }
b587e6ae 295
15c7b2a4 296 // find non-local branches
b587e6ae 297 if (is_insn_jmp(cmd.itype) && cmd.Operands[0].type == o_near)
15c7b2a4 298 {
299 target_ea = cmd.Operands[0].addr;
300 if (func == NULL)
301 nonlocal_add(target_ea);
302 else {
303 ret = get_func_chunknum(func, target_ea);
304 if (ret != 0) {
305 // a jump to another func or chunk
306 // check if it lands on func start
307 if (!isFunc(get_flags_novalue(target_ea)))
308 nonlocal_add(target_ea);
309 }
310 }
311 }
312 }
313 else { // not code
1caf86bb 314 int do_undef = 0;
315 ea_size = get_item_size(ea);
316
15c7b2a4 317 if (func == NULL && isOff0(ea_flags)) {
15c7b2a4 318 for (tmp_ea = 0; tmp_ea < ea_size; tmp_ea += 4)
319 nonlocal_add(get_long(ea + tmp_ea));
320 }
b587e6ae 321
322 // IDA vs masm float/mmx/xmm type incompatibility
323 if (isDouble(ea_flags) || isTbyt(ea_flags)
324 || isPackReal(ea_flags))
325 {
1caf86bb 326 do_undef = 1;
327 }
328 else if (isOwrd(ea_flags)) {
b587e6ae 329 buf[0] = 0;
330 get_name(BADADDR, ea, buf, sizeof(buf));
1caf86bb 331 if (IS_START(buf, "xmm"))
332 do_undef = 1;
333 }
334 // masm doesn't understand IDA's unicode
335 else if (isASCII(ea_flags) && ea_size >= 4
336 && (get_long(ea) & 0xff00ff00) == 0) // lame..
337 {
338 do_undef = 1;
339 }
340 // masm doesn't understand large aligns
341 else if (isAlign(ea_flags) && ea_size > 0x10) {
342 msg("%x: undefining align %d\n", ea, ea_size);
b587e6ae 343 do_unknown(ea, DOUNK_EXPAND);
344 }
1caf86bb 345
346 if (do_undef) {
b587e6ae 347 buf[0] = 0;
348 get_name(BADADDR, ea, buf, sizeof(buf));
1caf86bb 349 msg("%x: undefining '%s'\n", ea, buf);
350 do_unknown(ea, DOUNK_EXPAND);
b587e6ae 351 }
352 }
353 }
354
355 // check namelist for reserved names
356 n = get_nlist_size();
357 for (i = 0; i < n; i++) {
358 ea = get_nlist_ea(i);
359 name = get_nlist_name(i);
360 if (name == NULL) {
361 msg("%x: null name?\n", ea);
362 continue;
363 }
364
b25f320a 365 // rename vars with '?@' (funcs are ok)
366 int change_qat = 0;
367 ea_flags = get_flags_novalue(ea);
368 if (!isCode(ea_flags) && strpbrk(name, "?@"))
369 change_qat = 1;
370
371 if (change_qat || is_name_reserved(name)) {
b587e6ae 372 msg("%x: renaming name '%s'\n", ea, name);
373 qsnprintf(buf, sizeof(buf), "%s_g", name);
b25f320a 374
375 if (change_qat) {
376 for (p = buf; *p != 0; p++) {
377 if (*p == '?' || *p == '@') {
378 qsnprintf(buf2, sizeof(buf2), "%02x", (unsigned char)*p);
379 memmove(p + 1, p, strlen(p) + 1);
380 memcpy(p, buf2, 2);
381 }
382 }
383 }
384
b587e6ae 385 set_name(ea, buf);
15c7b2a4 386 }
387 }
388
389 if (nonlocal_bt_cnt > 1) {
390 qsort(nonlocal_bt, nonlocal_bt_cnt,
391 sizeof(nonlocal_bt[0]), nonlocal_bt_cmp);
d8891fcc 392 }
393
394 char *fname = askfile_c(1, NULL, "Save asm file");
395 if (fname == NULL)
396 return;
397 fout = qfopen(fname, "w");
398 if (fout == NULL) {
399 msg("couldn't open '%s'\n", fname);
400 return;
401 }
402
403 show_wait_box("Saving..");
404
405 // deal with the beginning
406 ea = inf.minEA;
407 int flags = 0; // calc_default_idaplace_flags();
408 linearray_t ln(&flags);
409 idaplace_t pl;
410 pl.ea = ea;
411 pl.lnnum = 0;
412 ln.set_place(&pl);
413 n = ln.get_linecnt();
414 for (i = 0; i < n - 1; i++) {
d53d4cc7 415 do_def_line(buf, sizeof(buf), ln.down(), ea);
d8891fcc 416 if (strstr(buf, "include"))
417 continue;
418
419 fout_line++;
420 qfprintf(fout, "%s\n", buf);
421 p = strstr(buf, ".mmx");
422 if (p != NULL) {
423 memcpy(p, ".xmm", 4);
424 fout_line++;
425 qfprintf(fout, "%s\n", buf);
1402b79d 426 continue;
427 }
428 p = strstr(buf, ".model");
429 if (p != NULL) {
430 qstrncpy(p, "include imports.inc", sizeof(buf) - (p - buf));
431 fout_line++;
432 qfprintf(fout, "\n%s\n", buf);
de8a204c 433 i++;
434 break;
d8891fcc 435 }
436 }
b587e6ae 437 pl.lnnum = i;
d8891fcc 438
439 for (;;)
440 {
94cd6e34 441 int drop_large = 0, do_rva = 0, set_scale = 0, jmp_near = 0;
1caf86bb 442 int word_imm = 0, dword_imm = 0, do_pushf = 0;
15c7b2a4 443
d8891fcc 444 if ((ea >> 14) != ui_ea_block) {
445 ui_ea_block = ea >> 14;
446 showAddr(ea);
447 if (wasBreak())
448 break;
449 }
450
451 segment_t *seg = getseg(ea);
b587e6ae 452 if (!seg || (seg->type != SEG_CODE && seg->type != SEG_DATA))
d8891fcc 453 goto pass;
454
b587e6ae 455 ea_flags = get_flags_novalue(ea);
456 if (isCode(ea_flags))
457 {
458 if (!decode_insn(ea))
459 goto pass;
d8891fcc 460
1caf86bb 461 if (is_insn_jmp(cmd.itype) && cmd.Operands[0].type == o_near
462 && cmd.Operands[0].dtyp == dt_dword)
463 {
464 jmp_near = 1;
465 }
466 else if ((cmd.itype == NN_pushf || cmd.itype == NN_popf)
467 && natop())
468 {
469 do_pushf = 1;
470 }
471
b587e6ae 472 for (o = 0; o < UA_MAXOP; o++) {
1caf86bb 473 const op_t &opr = cmd.Operands[o];
474 if (opr.type == o_void)
b587e6ae 475 break;
d8891fcc 476
1caf86bb 477 // correct?
478 if (opr.type == o_mem && opr.specval_shorts.high == 0x21)
b587e6ae 479 drop_large = 1;
1caf86bb 480 if (opr.hasSIB && x86_scale(opr) == 0
481 && x86_index(opr) != INDEX_NONE)
482 {
483 set_scale = 1;
484 }
485 // annoying alignment variant..
486 if (opr.type == o_imm && opr.dtyp == dt_dword
487 && (opr.value < 0x80 || opr.value > 0xffffff80)
488 && cmd.size >= opr.offb + 4)
489 {
490 if (get_long(ea + opr.offb) == opr.value)
491 dword_imm = 1;
492 }
493 else if (opr.type == o_imm && opr.dtyp == dt_word
494 && (opr.value < 0x80 || opr.value > 0xff80)
495 && cmd.size >= opr.offb + 2)
496 {
497 if (get_word(ea + opr.offb) == (ushort)opr.value)
498 word_imm = 1;
15c7b2a4 499 }
15c7b2a4 500 }
b587e6ae 501 }
502 else { // not code
503 if (isOff0(ea_flags))
94cd6e34 504 do_rva = 1;
d8891fcc 505 }
506
507pass:
b587e6ae 508 n = ln.get_linecnt();
509 for (i = pl.lnnum; i < n; i++) {
d53d4cc7 510 do_def_line(buf, sizeof(buf), ln.down(), ea);
15c7b2a4 511
1402b79d 512 char *fw;
513 for (fw = buf; *fw != 0 && *fw == ' '; )
514 fw++;
515
1caf86bb 516 // patches..
b587e6ae 517 if (drop_large) {
1402b79d 518 p = strstr(fw, "large ");
b587e6ae 519 if (p != NULL)
520 memmove(p, p + 6, strlen(p + 6) + 1);
521 }
94cd6e34 522 while (do_rva) {
1402b79d 523 p = strstr(fw, " rva ");
b587e6ae 524 if (p == NULL)
525 break;
94cd6e34 526 memmove(p + 4 + 3, p + 4, strlen(p + 4) + 1);
527 memcpy(p + 1, "offset", 6);
b587e6ae 528 }
1caf86bb 529 if (set_scale) {
1402b79d 530 p = strchr(fw, '[');
1caf86bb 531 if (p != NULL)
532 p = strchr(p, '+');
533 if (p != NULL && p[1] == 'e') {
534 p += 4;
535 // scale is 1, must specify it explicitly so that
536 // masm chooses the right scaled reg
537 memmove(p + 2, p, strlen(p) + 1);
538 memcpy(p, "*1", 2);
539 }
540 }
541 else if (jmp_near) {
1402b79d 542 p = strchr(fw, 'j');
1caf86bb 543 while (p && *p != ' ')
544 p++;
545 while (p && *p == ' ')
546 p++;
547 if (p != NULL) {
548 memmove(p + 9, p, strlen(p) + 1);
549 memcpy(p, "near ptr ", 9);
550 }
551 }
552 if (word_imm) {
1402b79d 553 p = strstr(fw, ", ");
1caf86bb 554 if (p != NULL && '0' <= p[2] && p[2] <= '9') {
555 p += 2;
556 memmove(p + 9, p, strlen(p) + 1);
557 memcpy(p, "word ptr ", 9);
558 }
559 }
560 else if (dword_imm) {
1402b79d 561 p = strstr(fw, ", ");
1caf86bb 562 if (p != NULL && '0' <= p[2] && p[2] <= '9') {
563 p += 2;
564 memmove(p + 10, p, strlen(p) + 1);
565 memcpy(p, "dword ptr ", 10);
566 }
567 }
568 else if (do_pushf) {
1402b79d 569 p = strstr(fw, "pushf");
1caf86bb 570 if (p == NULL)
1402b79d 571 p = strstr(fw, "popf");
1caf86bb 572 if (p != NULL) {
573 p = strchr(p, 'f') + 1;
574 memmove(p + 1, p, strlen(p) + 1);
575 *p = 'd';
576 }
577 }
b587e6ae 578
de8a204c 579 if (fw[0] == 'a' && IS_START(fw, "assume cs")) {
580 // "assume cs" causes problems with ext syms
581 memmove(fw + 1, fw, strlen(fw) + 1);
582 *fw = ';';
583 }
584 else if (fw[0] == 'e' && IS_START(fw, "end") && fw[3] == ' ') {
1402b79d 585 fout_line++;
586 qfprintf(fout, "include public.inc\n\n");
587
588 // kill entry point
589 fw[3] = 0;
590 }
591
b587e6ae 592 fout_line++;
593 qfprintf(fout, "%s\n", buf);
594 }
d8891fcc 595
15c7b2a4 596 // note: next_head skips some undefined stuff
d8891fcc 597 ea = next_not_tail(ea); // correct?
15c7b2a4 598 if (ea == BADADDR)
d8891fcc 599 break;
600
601 pl.ea = ea;
602 pl.lnnum = 0;
603 ln.set_place(&pl);
d8891fcc 604 }
605
606 if (fout != NULL)
607 qfclose(fout);
15c7b2a4 608 if (fname != NULL)
609 qfree(fname);
d8891fcc 610
611 hide_wait_box();
612 msg("%d lines saved.\n", fout_line);
613}
614
615//--------------------------------------------------------------------------
616
617static const char comment[] = "Generate disassembly lines for one address";
618static const char help[] = "Generate asm file\n";
619static const char wanted_name[] = "Save asm";
620static const char wanted_hotkey[] = "Ctrl-F6";
621
622//--------------------------------------------------------------------------
623//
624// PLUGIN DESCRIPTION BLOCK
625//
626//--------------------------------------------------------------------------
627plugin_t PLUGIN =
628{
629 IDP_INTERFACE_VERSION,
630 0, // plugin flags
631 init, // initialize
632 term, // terminate. this pointer may be NULL.
633 run, // invoke plugin
634 comment, // long comment about the plugin
635 // it could appear in the status line
636 // or as a hint
637 help, // multiline help about the plugin
638 wanted_name, // the preferred short name of the plugin
639 wanted_hotkey // the preferred hotkey to run the plugin
640};
641
642// vim:ts=2:shiftwidth=2:expandtab