drc: another hack to try to get Vita to work
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
28#include "arm_features.h"
29
30#define unused __attribute__((unused))
31
32#ifdef DRC_DBG
33#pragma GCC diagnostic ignored "-Wunused-function"
34#pragma GCC diagnostic ignored "-Wunused-variable"
35#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
36#endif
37
38void indirect_jump_indexed();
39void indirect_jump();
40void do_interrupt();
41void jump_vaddr_r0();
42void jump_vaddr_r1();
43void jump_vaddr_r2();
44void jump_vaddr_r3();
45void jump_vaddr_r4();
46void jump_vaddr_r5();
47void jump_vaddr_r6();
48void jump_vaddr_r7();
49void jump_vaddr_r8();
50void jump_vaddr_r9();
51void jump_vaddr_r10();
52void jump_vaddr_r12();
53
54void * const jump_vaddr_reg[16] = {
55 jump_vaddr_r0,
56 jump_vaddr_r1,
57 jump_vaddr_r2,
58 jump_vaddr_r3,
59 jump_vaddr_r4,
60 jump_vaddr_r5,
61 jump_vaddr_r6,
62 jump_vaddr_r7,
63 jump_vaddr_r8,
64 jump_vaddr_r9,
65 jump_vaddr_r10,
66 0,
67 jump_vaddr_r12,
68 0,
69 0,
70 0
71};
72
73void invalidate_addr_r0();
74void invalidate_addr_r1();
75void invalidate_addr_r2();
76void invalidate_addr_r3();
77void invalidate_addr_r4();
78void invalidate_addr_r5();
79void invalidate_addr_r6();
80void invalidate_addr_r7();
81void invalidate_addr_r8();
82void invalidate_addr_r9();
83void invalidate_addr_r10();
84void invalidate_addr_r12();
85
86const u_int invalidate_addr_reg[16] = {
87 (int)invalidate_addr_r0,
88 (int)invalidate_addr_r1,
89 (int)invalidate_addr_r2,
90 (int)invalidate_addr_r3,
91 (int)invalidate_addr_r4,
92 (int)invalidate_addr_r5,
93 (int)invalidate_addr_r6,
94 (int)invalidate_addr_r7,
95 (int)invalidate_addr_r8,
96 (int)invalidate_addr_r9,
97 (int)invalidate_addr_r10,
98 0,
99 (int)invalidate_addr_r12,
100 0,
101 0,
102 0};
103
104/* Linker */
105
106static void set_jump_target(void *addr, void *target_)
107{
108 u_int target = (u_int)target_;
109 u_char *ptr = addr;
110 u_int *ptr2=(u_int *)ptr;
111 if(ptr[3]==0xe2) {
112 assert((target-(u_int)ptr2-8)<1024);
113 assert(((uintptr_t)addr&3)==0);
114 assert((target&3)==0);
115 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
116 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
117 }
118 else if(ptr[3]==0x72) {
119 // generated by emit_jno_unlikely
120 if((target-(u_int)ptr2-8)<1024) {
121 assert(((uintptr_t)addr&3)==0);
122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
124 }
125 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
126 assert(((uintptr_t)addr&3)==0);
127 assert((target&3)==0);
128 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
129 }
130 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
131 }
132 else {
133 assert((ptr[3]&0x0e)==0xa);
134 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
135 }
136}
137
138// This optionally copies the instruction from the target of the branch into
139// the space before the branch. Works, but the difference in speed is
140// usually insignificant.
141#if 0
142static void set_jump_target_fillslot(int addr,u_int target,int copy)
143{
144 u_char *ptr=(u_char *)addr;
145 u_int *ptr2=(u_int *)ptr;
146 assert(!copy||ptr2[-1]==0xe28dd000);
147 if(ptr[3]==0xe2) {
148 assert(!copy);
149 assert((target-(u_int)ptr2-8)<4096);
150 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
151 }
152 else {
153 assert((ptr[3]&0x0e)==0xa);
154 u_int target_insn=*(u_int *)target;
155 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
156 copy=0;
157 }
158 if((target_insn&0x0c100000)==0x04100000) { // Load
159 copy=0;
160 }
161 if(target_insn&0x08000000) {
162 copy=0;
163 }
164 if(copy) {
165 ptr2[-1]=target_insn;
166 target+=4;
167 }
168 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
169 }
170}
171#endif
172
173/* Literal pool */
174static void add_literal(int addr,int val)
175{
176 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
177 literals[literalcount][0]=addr;
178 literals[literalcount][1]=val;
179 literalcount++;
180}
181
182// from a pointer to external jump stub (which was produced by emit_extjump2)
183// find where the jumping insn is
184static void *find_extjump_insn(void *stub)
185{
186 int *ptr=(int *)(stub+4);
187 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
188 u_int offset=*ptr&0xfff;
189 void **l_ptr=(void *)ptr+offset+8;
190 return *l_ptr;
191}
192
193// find where external branch is liked to using addr of it's stub:
194// get address that insn one after stub loads (dyna_linker arg1),
195// treat it as a pointer to branch insn,
196// return addr where that branch jumps to
197static void *get_pointer(void *stub)
198{
199 //printf("get_pointer(%x)\n",(int)stub);
200 int *i_ptr=find_extjump_insn(stub);
201 assert((*i_ptr&0x0f000000)==0x0a000000); // b
202 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
203}
204
205// Find the "clean" entry point from a "dirty" entry point
206// by skipping past the call to verify_code
207static void *get_clean_addr(void *addr)
208{
209 signed int *ptr = addr;
210 #ifndef HAVE_ARMV7
211 ptr+=4;
212 #else
213 ptr+=6;
214 #endif
215 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
216 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
217 ptr++;
218 if((*ptr&0xFF000000)==0xea000000) {
219 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
220 }
221 return ptr;
222}
223
224static int verify_dirty(const u_int *ptr)
225{
226 #ifndef HAVE_ARMV7
227 u_int offset;
228 // get from literal pool
229 assert((*ptr&0xFFFF0000)==0xe59f0000);
230 offset=*ptr&0xfff;
231 u_int source=*(u_int*)((void *)ptr+offset+8);
232 ptr++;
233 assert((*ptr&0xFFFF0000)==0xe59f0000);
234 offset=*ptr&0xfff;
235 u_int copy=*(u_int*)((void *)ptr+offset+8);
236 ptr++;
237 assert((*ptr&0xFFFF0000)==0xe59f0000);
238 offset=*ptr&0xfff;
239 u_int len=*(u_int*)((void *)ptr+offset+8);
240 ptr++;
241 ptr++;
242 #else
243 // ARMv7 movw/movt
244 assert((*ptr&0xFFF00000)==0xe3000000);
245 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
246 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
247 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
248 ptr+=6;
249 #endif
250 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
251 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
252 //printf("verify_dirty: %x %x %x\n",source,copy,len);
253 return !memcmp((void *)source,(void *)copy,len);
254}
255
256// This doesn't necessarily find all clean entry points, just
257// guarantees that it's not dirty
258static int isclean(void *addr)
259{
260 #ifndef HAVE_ARMV7
261 u_int *ptr=((u_int *)addr)+4;
262 #else
263 u_int *ptr=((u_int *)addr)+6;
264 #endif
265 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
266 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
267 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
268 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
269 return 1;
270}
271
272// get source that block at addr was compiled from (host pointers)
273static void get_bounds(void *addr, u_char **start, u_char **end)
274{
275 u_int *ptr = addr;
276 #ifndef HAVE_ARMV7
277 u_int offset;
278 // get from literal pool
279 assert((*ptr&0xFFFF0000)==0xe59f0000);
280 offset=*ptr&0xfff;
281 u_int source=*(u_int*)((void *)ptr+offset+8);
282 ptr++;
283 //assert((*ptr&0xFFFF0000)==0xe59f0000);
284 //offset=*ptr&0xfff;
285 //u_int copy=*(u_int*)((void *)ptr+offset+8);
286 ptr++;
287 assert((*ptr&0xFFFF0000)==0xe59f0000);
288 offset=*ptr&0xfff;
289 u_int len=*(u_int*)((void *)ptr+offset+8);
290 ptr++;
291 ptr++;
292 #else
293 // ARMv7 movw/movt
294 assert((*ptr&0xFFF00000)==0xe3000000);
295 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
296 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
297 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
298 ptr+=6;
299 #endif
300 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
301 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
302 *start=(u_char *)source;
303 *end=(u_char *)source+len;
304}
305
306// Allocate a specific ARM register.
307static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
308{
309 int n;
310 int dirty=0;
311
312 // see if it's already allocated (and dealloc it)
313 for(n=0;n<HOST_REGS;n++)
314 {
315 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
316 dirty=(cur->dirty>>n)&1;
317 cur->regmap[n]=-1;
318 }
319 }
320
321 cur->regmap[hr]=reg;
322 cur->dirty&=~(1<<hr);
323 cur->dirty|=dirty<<hr;
324 cur->isconst&=~(1<<hr);
325}
326
327// Alloc cycle count into dedicated register
328static void alloc_cc(struct regstat *cur,int i)
329{
330 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
331}
332
333/* Assembler */
334
335static unused char regname[16][4] = {
336 "r0",
337 "r1",
338 "r2",
339 "r3",
340 "r4",
341 "r5",
342 "r6",
343 "r7",
344 "r8",
345 "r9",
346 "r10",
347 "fp",
348 "r12",
349 "sp",
350 "lr",
351 "pc"};
352
353static void output_w32(u_int word)
354{
355 *((u_int *)out)=word;
356 out+=4;
357}
358
359static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
360{
361 assert(rd<16);
362 assert(rn<16);
363 assert(rm<16);
364 return((rn<<16)|(rd<<12)|rm);
365}
366
367static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
368{
369 assert(rd<16);
370 assert(rn<16);
371 assert(imm<256);
372 assert((shift&1)==0);
373 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
374}
375
376static u_int genimm(u_int imm,u_int *encoded)
377{
378 *encoded=0;
379 if(imm==0) return 1;
380 int i=32;
381 while(i>0)
382 {
383 if(imm<256) {
384 *encoded=((i&30)<<7)|imm;
385 return 1;
386 }
387 imm=(imm>>2)|(imm<<30);i-=2;
388 }
389 return 0;
390}
391
392static void genimm_checked(u_int imm,u_int *encoded)
393{
394 u_int ret=genimm(imm,encoded);
395 assert(ret);
396 (void)ret;
397}
398
399static u_int genjmp(u_int addr)
400{
401 if (addr < 3) return 0; // a branch that will be patched later
402 int offset = addr-(int)out-8;
403 if (offset < -33554432 || offset >= 33554432) {
404 SysPrintf("genjmp: out of range: %08x\n", offset);
405 abort();
406 return 0;
407 }
408 return ((u_int)offset>>2)&0xffffff;
409}
410
411static unused void emit_breakpoint(void)
412{
413 assem_debug("bkpt #0\n");
414 //output_w32(0xe1200070);
415 output_w32(0xe7f001f0);
416}
417
418static void emit_mov(int rs,int rt)
419{
420 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
421 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
422}
423
424static void emit_movs(int rs,int rt)
425{
426 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
427 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
428}
429
430static void emit_add(int rs1,int rs2,int rt)
431{
432 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
433 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
434}
435
436static void emit_adds(int rs1,int rs2,int rt)
437{
438 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
439 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
440}
441#define emit_adds_ptr emit_adds
442
443static void emit_adcs(int rs1,int rs2,int rt)
444{
445 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
446 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
447}
448
449static void emit_neg(int rs, int rt)
450{
451 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
452 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
453}
454
455static void emit_sub(int rs1,int rs2,int rt)
456{
457 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
458 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
459}
460
461static void emit_zeroreg(int rt)
462{
463 assem_debug("mov %s,#0\n",regname[rt]);
464 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
465}
466
467static void emit_loadlp(u_int imm,u_int rt)
468{
469 add_literal((int)out,imm);
470 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
471 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
472}
473
474#ifdef HAVE_ARMV7
475static void emit_movw(u_int imm,u_int rt)
476{
477 assert(imm<65536);
478 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
479 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
480}
481
482static void emit_movt(u_int imm,u_int rt)
483{
484 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
485 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
486}
487#endif
488
489static void emit_movimm(u_int imm,u_int rt)
490{
491 u_int armval;
492 if(genimm(imm,&armval)) {
493 assem_debug("mov %s,#%d\n",regname[rt],imm);
494 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
495 }else if(genimm(~imm,&armval)) {
496 assem_debug("mvn %s,#%d\n",regname[rt],imm);
497 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
498 }else if(imm<65536) {
499 #ifndef HAVE_ARMV7
500 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
501 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
502 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
503 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
504 #else
505 emit_movw(imm,rt);
506 #endif
507 }else{
508 #ifndef HAVE_ARMV7
509 emit_loadlp(imm,rt);
510 #else
511 emit_movw(imm&0x0000FFFF,rt);
512 emit_movt(imm&0xFFFF0000,rt);
513 #endif
514 }
515}
516
517static void emit_pcreladdr(u_int rt)
518{
519 assem_debug("add %s,pc,#?\n",regname[rt]);
520 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
521}
522
523static void emit_loadreg(int r, int hr)
524{
525 if(r&64) {
526 SysPrintf("64bit load in 32bit mode!\n");
527 assert(0);
528 return;
529 }
530 if((r&63)==0)
531 emit_zeroreg(hr);
532 else {
533 void *addr;
534 switch (r) {
535 //case HIREG: addr = &hi; break;
536 //case LOREG: addr = &lo; break;
537 case CCREG: addr = &cycle_count; break;
538 case CSREG: addr = &Status; break;
539 case INVCP: addr = &invc_ptr; break;
540 case ROREG: addr = &ram_offset; break;
541 default:
542 assert(r < 34);
543 addr = &psxRegs.GPR.r[r];
544 break;
545 }
546 u_int offset = (u_char *)addr - (u_char *)&dynarec_local;
547 assert(offset<4096);
548 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
549 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
550 }
551}
552
553static void emit_storereg(int r, int hr)
554{
555 if(r&64) {
556 SysPrintf("64bit store in 32bit mode!\n");
557 assert(0);
558 return;
559 }
560 int addr = (int)&psxRegs.GPR.r[r];
561 switch (r) {
562 //case HIREG: addr = &hi; break;
563 //case LOREG: addr = &lo; break;
564 case CCREG: addr = (int)&cycle_count; break;
565 default: assert(r < 34); break;
566 }
567 u_int offset = addr-(u_int)&dynarec_local;
568 assert(offset<4096);
569 assem_debug("str %s,fp+%d\n",regname[hr],offset);
570 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
571}
572
573static void emit_test(int rs, int rt)
574{
575 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
576 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
577}
578
579static void emit_testimm(int rs,int imm)
580{
581 u_int armval;
582 assem_debug("tst %s,#%d\n",regname[rs],imm);
583 genimm_checked(imm,&armval);
584 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
585}
586
587static void emit_testeqimm(int rs,int imm)
588{
589 u_int armval;
590 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
591 genimm_checked(imm,&armval);
592 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
593}
594
595static void emit_not(int rs,int rt)
596{
597 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
598 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
599}
600
601static void emit_and(u_int rs1,u_int rs2,u_int rt)
602{
603 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
604 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
605}
606
607static void emit_or(u_int rs1,u_int rs2,u_int rt)
608{
609 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
610 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
611}
612
613static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
614{
615 assert(rs<16);
616 assert(rt<16);
617 assert(imm<32);
618 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
619 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
620}
621
622static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
623{
624 assert(rs<16);
625 assert(rt<16);
626 assert(imm<32);
627 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
628 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
629}
630
631static void emit_xor(u_int rs1,u_int rs2,u_int rt)
632{
633 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
634 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
635}
636
637static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
638{
639 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
640 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
641}
642
643static void emit_addimm(u_int rs,int imm,u_int rt)
644{
645 assert(rs<16);
646 assert(rt<16);
647 if(imm!=0) {
648 u_int armval;
649 if(genimm(imm,&armval)) {
650 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
651 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
652 }else if(genimm(-imm,&armval)) {
653 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
654 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
655 #ifdef HAVE_ARMV7
656 }else if(rt!=rs&&(u_int)imm<65536) {
657 emit_movw(imm&0x0000ffff,rt);
658 emit_add(rs,rt,rt);
659 }else if(rt!=rs&&(u_int)-imm<65536) {
660 emit_movw(-imm&0x0000ffff,rt);
661 emit_sub(rs,rt,rt);
662 #endif
663 }else if((u_int)-imm<65536) {
664 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
665 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
666 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
667 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
668 }else {
669 do {
670 int shift = (ffs(imm) - 1) & ~1;
671 int imm8 = imm & (0xff << shift);
672 genimm_checked(imm8,&armval);
673 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
674 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
675 rs = rt;
676 imm &= ~imm8;
677 }
678 while (imm != 0);
679 }
680 }
681 else if(rs!=rt) emit_mov(rs,rt);
682}
683
684static void emit_addimm_and_set_flags(int imm,int rt)
685{
686 assert(imm>-65536&&imm<65536);
687 u_int armval;
688 if(genimm(imm,&armval)) {
689 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
690 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
691 }else if(genimm(-imm,&armval)) {
692 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
693 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
694 }else if(imm<0) {
695 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
696 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
697 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
698 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
699 }else{
700 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
701 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
702 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
703 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
704 }
705}
706
707static void emit_addnop(u_int r)
708{
709 assert(r<16);
710 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
711 output_w32(0xe2800000|rd_rn_rm(r,r,0));
712}
713
714static void emit_andimm(int rs,int imm,int rt)
715{
716 u_int armval;
717 if(imm==0) {
718 emit_zeroreg(rt);
719 }else if(genimm(imm,&armval)) {
720 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
721 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
722 }else if(genimm(~imm,&armval)) {
723 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
724 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
725 }else if(imm==65535) {
726 #ifndef HAVE_ARMV6
727 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
728 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
729 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
730 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
731 #else
732 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
733 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
734 #endif
735 }else{
736 assert(imm>0&&imm<65535);
737 #ifndef HAVE_ARMV7
738 assem_debug("mov r14,#%d\n",imm&0xFF00);
739 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
740 assem_debug("add r14,r14,#%d\n",imm&0xFF);
741 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
742 #else
743 emit_movw(imm,HOST_TEMPREG);
744 #endif
745 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
746 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
747 }
748}
749
750static void emit_orimm(int rs,int imm,int rt)
751{
752 u_int armval;
753 if(imm==0) {
754 if(rs!=rt) emit_mov(rs,rt);
755 }else if(genimm(imm,&armval)) {
756 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
757 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
758 }else{
759 assert(imm>0&&imm<65536);
760 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
761 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
762 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
763 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
764 }
765}
766
767static void emit_xorimm(int rs,int imm,int rt)
768{
769 u_int armval;
770 if(imm==0) {
771 if(rs!=rt) emit_mov(rs,rt);
772 }else if(genimm(imm,&armval)) {
773 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
774 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
775 }else{
776 assert(imm>0&&imm<65536);
777 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
778 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
779 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
780 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
781 }
782}
783
784static void emit_shlimm(int rs,u_int imm,int rt)
785{
786 assert(imm>0);
787 assert(imm<32);
788 //if(imm==1) ...
789 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
790 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
791}
792
793static void emit_lsls_imm(int rs,int imm,int rt)
794{
795 assert(imm>0);
796 assert(imm<32);
797 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
798 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
799}
800
801static unused void emit_lslpls_imm(int rs,int imm,int rt)
802{
803 assert(imm>0);
804 assert(imm<32);
805 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
806 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
807}
808
809static void emit_shrimm(int rs,u_int imm,int rt)
810{
811 assert(imm>0);
812 assert(imm<32);
813 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
814 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
815}
816
817static void emit_sarimm(int rs,u_int imm,int rt)
818{
819 assert(imm>0);
820 assert(imm<32);
821 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
822 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
823}
824
825static void emit_rorimm(int rs,u_int imm,int rt)
826{
827 assert(imm>0);
828 assert(imm<32);
829 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
830 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
831}
832
833static void emit_signextend16(int rs,int rt)
834{
835 #ifndef HAVE_ARMV6
836 emit_shlimm(rs,16,rt);
837 emit_sarimm(rt,16,rt);
838 #else
839 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
840 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
841 #endif
842}
843
844static void emit_signextend8(int rs,int rt)
845{
846 #ifndef HAVE_ARMV6
847 emit_shlimm(rs,24,rt);
848 emit_sarimm(rt,24,rt);
849 #else
850 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
851 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
852 #endif
853}
854
855static void emit_shl(u_int rs,u_int shift,u_int rt)
856{
857 assert(rs<16);
858 assert(rt<16);
859 assert(shift<16);
860 //if(imm==1) ...
861 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
862 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
863}
864
865static void emit_shr(u_int rs,u_int shift,u_int rt)
866{
867 assert(rs<16);
868 assert(rt<16);
869 assert(shift<16);
870 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
871 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
872}
873
874static void emit_sar(u_int rs,u_int shift,u_int rt)
875{
876 assert(rs<16);
877 assert(rt<16);
878 assert(shift<16);
879 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
880 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
881}
882
883static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
884{
885 assert(rs<16);
886 assert(rt<16);
887 assert(shift<16);
888 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
889 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
890}
891
892static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
893{
894 assert(rs<16);
895 assert(rt<16);
896 assert(shift<16);
897 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
898 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
899}
900
901static void emit_cmpimm(int rs,int imm)
902{
903 u_int armval;
904 if(genimm(imm,&armval)) {
905 assem_debug("cmp %s,#%d\n",regname[rs],imm);
906 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
907 }else if(genimm(-imm,&armval)) {
908 assem_debug("cmn %s,#%d\n",regname[rs],imm);
909 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
910 }else if(imm>0) {
911 assert(imm<65536);
912 emit_movimm(imm,HOST_TEMPREG);
913 assem_debug("cmp %s,r14\n",regname[rs]);
914 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
915 }else{
916 assert(imm>-65536);
917 emit_movimm(-imm,HOST_TEMPREG);
918 assem_debug("cmn %s,r14\n",regname[rs]);
919 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
920 }
921}
922
923static void emit_cmovne_imm(int imm,int rt)
924{
925 assem_debug("movne %s,#%d\n",regname[rt],imm);
926 u_int armval;
927 genimm_checked(imm,&armval);
928 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
929}
930
931static void emit_cmovl_imm(int imm,int rt)
932{
933 assem_debug("movlt %s,#%d\n",regname[rt],imm);
934 u_int armval;
935 genimm_checked(imm,&armval);
936 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
937}
938
939static void emit_cmovb_imm(int imm,int rt)
940{
941 assem_debug("movcc %s,#%d\n",regname[rt],imm);
942 u_int armval;
943 genimm_checked(imm,&armval);
944 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
945}
946
947static void emit_cmovae_imm(int imm,int rt)
948{
949 assem_debug("movcs %s,#%d\n",regname[rt],imm);
950 u_int armval;
951 genimm_checked(imm,&armval);
952 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
953}
954
955static void emit_cmovs_imm(int imm,int rt)
956{
957 assem_debug("movmi %s,#%d\n",regname[rt],imm);
958 u_int armval;
959 genimm_checked(imm,&armval);
960 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
961}
962
963static void emit_cmovne_reg(int rs,int rt)
964{
965 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
966 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
967}
968
969static void emit_cmovl_reg(int rs,int rt)
970{
971 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
972 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
973}
974
975static void emit_cmovb_reg(int rs,int rt)
976{
977 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
978 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
979}
980
981static void emit_cmovs_reg(int rs,int rt)
982{
983 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
984 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
985}
986
987static void emit_slti32(int rs,int imm,int rt)
988{
989 if(rs!=rt) emit_zeroreg(rt);
990 emit_cmpimm(rs,imm);
991 if(rs==rt) emit_movimm(0,rt);
992 emit_cmovl_imm(1,rt);
993}
994
995static void emit_sltiu32(int rs,int imm,int rt)
996{
997 if(rs!=rt) emit_zeroreg(rt);
998 emit_cmpimm(rs,imm);
999 if(rs==rt) emit_movimm(0,rt);
1000 emit_cmovb_imm(1,rt);
1001}
1002
1003static void emit_cmp(int rs,int rt)
1004{
1005 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1006 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1007}
1008
1009static void emit_set_gz32(int rs, int rt)
1010{
1011 //assem_debug("set_gz32\n");
1012 emit_cmpimm(rs,1);
1013 emit_movimm(1,rt);
1014 emit_cmovl_imm(0,rt);
1015}
1016
1017static void emit_set_nz32(int rs, int rt)
1018{
1019 //assem_debug("set_nz32\n");
1020 if(rs!=rt) emit_movs(rs,rt);
1021 else emit_test(rs,rs);
1022 emit_cmovne_imm(1,rt);
1023}
1024
1025static void emit_set_if_less32(int rs1, int rs2, int rt)
1026{
1027 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1028 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1029 emit_cmp(rs1,rs2);
1030 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1031 emit_cmovl_imm(1,rt);
1032}
1033
1034static void emit_set_if_carry32(int rs1, int rs2, int rt)
1035{
1036 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1037 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1038 emit_cmp(rs1,rs2);
1039 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1040 emit_cmovb_imm(1,rt);
1041}
1042
1043static int can_jump_or_call(const void *a)
1044{
1045 intptr_t offset = (u_char *)a - out - 8;
1046 return (-33554432 <= offset && offset < 33554432);
1047}
1048
1049static void emit_call(const void *a_)
1050{
1051 int a = (int)a_;
1052 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1053 u_int offset=genjmp(a);
1054 output_w32(0xeb000000|offset);
1055}
1056
1057static void emit_jmp(const void *a_)
1058{
1059 int a = (int)a_;
1060 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
1061 u_int offset=genjmp(a);
1062 output_w32(0xea000000|offset);
1063}
1064
1065static void emit_jne(const void *a_)
1066{
1067 int a = (int)a_;
1068 assem_debug("bne %x\n",a);
1069 u_int offset=genjmp(a);
1070 output_w32(0x1a000000|offset);
1071}
1072
1073static void emit_jeq(const void *a_)
1074{
1075 int a = (int)a_;
1076 assem_debug("beq %x\n",a);
1077 u_int offset=genjmp(a);
1078 output_w32(0x0a000000|offset);
1079}
1080
1081static void emit_js(const void *a_)
1082{
1083 int a = (int)a_;
1084 assem_debug("bmi %x\n",a);
1085 u_int offset=genjmp(a);
1086 output_w32(0x4a000000|offset);
1087}
1088
1089static void emit_jns(const void *a_)
1090{
1091 int a = (int)a_;
1092 assem_debug("bpl %x\n",a);
1093 u_int offset=genjmp(a);
1094 output_w32(0x5a000000|offset);
1095}
1096
1097static void emit_jl(const void *a_)
1098{
1099 int a = (int)a_;
1100 assem_debug("blt %x\n",a);
1101 u_int offset=genjmp(a);
1102 output_w32(0xba000000|offset);
1103}
1104
1105static void emit_jge(const void *a_)
1106{
1107 int a = (int)a_;
1108 assem_debug("bge %x\n",a);
1109 u_int offset=genjmp(a);
1110 output_w32(0xaa000000|offset);
1111}
1112
1113static void emit_jno(const void *a_)
1114{
1115 int a = (int)a_;
1116 assem_debug("bvc %x\n",a);
1117 u_int offset=genjmp(a);
1118 output_w32(0x7a000000|offset);
1119}
1120
1121static void emit_jc(const void *a_)
1122{
1123 int a = (int)a_;
1124 assem_debug("bcs %x\n",a);
1125 u_int offset=genjmp(a);
1126 output_w32(0x2a000000|offset);
1127}
1128
1129static void emit_jcc(const void *a_)
1130{
1131 int a = (int)a_;
1132 assem_debug("bcc %x\n",a);
1133 u_int offset=genjmp(a);
1134 output_w32(0x3a000000|offset);
1135}
1136
1137static unused void emit_callreg(u_int r)
1138{
1139 assert(r<15);
1140 assem_debug("blx %s\n",regname[r]);
1141 output_w32(0xe12fff30|r);
1142}
1143
1144static void emit_jmpreg(u_int r)
1145{
1146 assem_debug("mov pc,%s\n",regname[r]);
1147 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1148}
1149
1150static void emit_ret(void)
1151{
1152 emit_jmpreg(14);
1153}
1154
1155static void emit_readword_indexed(int offset, int rs, int rt)
1156{
1157 assert(offset>-4096&&offset<4096);
1158 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1159 if(offset>=0) {
1160 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1161 }else{
1162 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1163 }
1164}
1165
1166static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1167{
1168 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1169 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1170}
1171#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1172
1173static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1174{
1175 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1176 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1177}
1178
1179static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1180{
1181 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1182 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1183}
1184
1185static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1186{
1187 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1188 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1189}
1190
1191static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1192{
1193 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1194 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1195}
1196
1197static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1198{
1199 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1200 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1201}
1202
1203static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1204{
1205 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1206 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1207}
1208
1209static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1210{
1211 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1212 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1213}
1214
1215static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1216{
1217 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1218 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1219}
1220
1221static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1222{
1223 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1224 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1225}
1226
1227static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1228{
1229 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1230 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1231}
1232
1233static void emit_str_dualindexed(int rs1, int rs2, int rt)
1234{
1235 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1236 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1237}
1238
1239static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1240{
1241 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1242 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1243}
1244
1245static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1246{
1247 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1248 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
1249}
1250
1251static void emit_movsbl_indexed(int offset, int rs, int rt)
1252{
1253 assert(offset>-256&&offset<256);
1254 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1255 if(offset>=0) {
1256 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1257 }else{
1258 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1259 }
1260}
1261
1262static void emit_movswl_indexed(int offset, int rs, int rt)
1263{
1264 assert(offset>-256&&offset<256);
1265 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1266 if(offset>=0) {
1267 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1268 }else{
1269 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1270 }
1271}
1272
1273static void emit_movzbl_indexed(int offset, int rs, int rt)
1274{
1275 assert(offset>-4096&&offset<4096);
1276 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1277 if(offset>=0) {
1278 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1279 }else{
1280 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1281 }
1282}
1283
1284static void emit_movzwl_indexed(int offset, int rs, int rt)
1285{
1286 assert(offset>-256&&offset<256);
1287 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1288 if(offset>=0) {
1289 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1290 }else{
1291 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1292 }
1293}
1294
1295static void emit_ldrd(int offset, int rs, int rt)
1296{
1297 assert(offset>-256&&offset<256);
1298 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1299 if(offset>=0) {
1300 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1301 }else{
1302 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1303 }
1304}
1305
1306static void emit_readword(void *addr, int rt)
1307{
1308 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1309 assert(offset<4096);
1310 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1311 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1312}
1313#define emit_readptr emit_readword
1314
1315static void emit_writeword_indexed(int rt, int offset, int rs)
1316{
1317 assert(offset>-4096&&offset<4096);
1318 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1319 if(offset>=0) {
1320 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1321 }else{
1322 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1323 }
1324}
1325
1326static void emit_writehword_indexed(int rt, int offset, int rs)
1327{
1328 assert(offset>-256&&offset<256);
1329 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1330 if(offset>=0) {
1331 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1332 }else{
1333 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1334 }
1335}
1336
1337static void emit_writebyte_indexed(int rt, int offset, int rs)
1338{
1339 assert(offset>-4096&&offset<4096);
1340 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1341 if(offset>=0) {
1342 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1343 }else{
1344 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1345 }
1346}
1347
1348static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1349{
1350 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1351 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1352}
1353
1354static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1355{
1356 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1357 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1358}
1359
1360static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1361{
1362 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1363 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1364}
1365
1366static void emit_writeword(int rt, void *addr)
1367{
1368 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1369 assert(offset<4096);
1370 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1371 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1372}
1373
1374static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1375{
1376 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1377 assert(rs1<16);
1378 assert(rs2<16);
1379 assert(hi<16);
1380 assert(lo<16);
1381 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1382}
1383
1384static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1385{
1386 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1387 assert(rs1<16);
1388 assert(rs2<16);
1389 assert(hi<16);
1390 assert(lo<16);
1391 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1392}
1393
1394static void emit_clz(int rs,int rt)
1395{
1396 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1397 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1398}
1399
1400static void emit_subcs(int rs1,int rs2,int rt)
1401{
1402 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1403 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1404}
1405
1406static void emit_shrcc_imm(int rs,u_int imm,int rt)
1407{
1408 assert(imm>0);
1409 assert(imm<32);
1410 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1411 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1412}
1413
1414static void emit_shrne_imm(int rs,u_int imm,int rt)
1415{
1416 assert(imm>0);
1417 assert(imm<32);
1418 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1419 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1420}
1421
1422static void emit_negmi(int rs, int rt)
1423{
1424 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1425 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1426}
1427
1428static void emit_negsmi(int rs, int rt)
1429{
1430 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1431 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1432}
1433
1434static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1435{
1436 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1437 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1438}
1439
1440static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1441{
1442 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1443 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1444}
1445
1446static void emit_teq(int rs, int rt)
1447{
1448 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1449 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1450}
1451
1452static unused void emit_rsbimm(int rs, int imm, int rt)
1453{
1454 u_int armval;
1455 genimm_checked(imm,&armval);
1456 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1457 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1458}
1459
1460// Conditionally select one of two immediates, optimizing for small code size
1461// This will only be called if HAVE_CMOV_IMM is defined
1462static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1463{
1464 u_int armval;
1465 if(genimm(imm2-imm1,&armval)) {
1466 emit_movimm(imm1,rt);
1467 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1468 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1469 }else if(genimm(imm1-imm2,&armval)) {
1470 emit_movimm(imm1,rt);
1471 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1472 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1473 }
1474 else {
1475 #ifndef HAVE_ARMV7
1476 emit_movimm(imm1,rt);
1477 add_literal((int)out,imm2);
1478 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1479 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1480 #else
1481 emit_movw(imm1&0x0000FFFF,rt);
1482 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1483 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1484 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1485 }
1486 emit_movt(imm1&0xFFFF0000,rt);
1487 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1488 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1489 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1490 }
1491 #endif
1492 }
1493}
1494
1495// special case for checking invalid_code
1496static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1497{
1498 assert(imm<128&&imm>=0);
1499 assert(r>=0&&r<16);
1500 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1501 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1502 emit_cmpimm(HOST_TEMPREG,imm);
1503}
1504
1505static void emit_callne(int a)
1506{
1507 assem_debug("blne %x\n",a);
1508 u_int offset=genjmp(a);
1509 output_w32(0x1b000000|offset);
1510}
1511
1512// Used to preload hash table entries
1513static unused void emit_prefetchreg(int r)
1514{
1515 assem_debug("pld %s\n",regname[r]);
1516 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1517}
1518
1519// Special case for mini_ht
1520static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1521{
1522 assert(offset<4096);
1523 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1524 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1525}
1526
1527static void emit_orrne_imm(int rs,int imm,int rt)
1528{
1529 u_int armval;
1530 genimm_checked(imm,&armval);
1531 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1532 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1533}
1534
1535static unused void emit_addpl_imm(int rs,int imm,int rt)
1536{
1537 u_int armval;
1538 genimm_checked(imm,&armval);
1539 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1540 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1541}
1542
1543static void emit_jno_unlikely(int a)
1544{
1545 //emit_jno(a);
1546 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1547 output_w32(0x72800000|rd_rn_rm(15,15,0));
1548}
1549
1550static void save_regs_all(u_int reglist)
1551{
1552 int i;
1553 if(!reglist) return;
1554 assem_debug("stmia fp,{");
1555 for(i=0;i<16;i++)
1556 if(reglist&(1<<i))
1557 assem_debug("r%d,",i);
1558 assem_debug("}\n");
1559 output_w32(0xe88b0000|reglist);
1560}
1561
1562static void restore_regs_all(u_int reglist)
1563{
1564 int i;
1565 if(!reglist) return;
1566 assem_debug("ldmia fp,{");
1567 for(i=0;i<16;i++)
1568 if(reglist&(1<<i))
1569 assem_debug("r%d,",i);
1570 assem_debug("}\n");
1571 output_w32(0xe89b0000|reglist);
1572}
1573
1574// Save registers before function call
1575static void save_regs(u_int reglist)
1576{
1577 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1578 save_regs_all(reglist);
1579}
1580
1581// Restore registers after function call
1582static void restore_regs(u_int reglist)
1583{
1584 reglist&=CALLER_SAVE_REGS;
1585 restore_regs_all(reglist);
1586}
1587
1588/* Stubs/epilogue */
1589
1590static void literal_pool(int n)
1591{
1592 if(!literalcount) return;
1593 if(n) {
1594 if((int)out-literals[0][0]<4096-n) return;
1595 }
1596 u_int *ptr;
1597 int i;
1598 for(i=0;i<literalcount;i++)
1599 {
1600 u_int l_addr=(u_int)out;
1601 int j;
1602 for(j=0;j<i;j++) {
1603 if(literals[j][1]==literals[i][1]) {
1604 //printf("dup %08x\n",literals[i][1]);
1605 l_addr=literals[j][0];
1606 break;
1607 }
1608 }
1609 ptr=(u_int *)literals[i][0];
1610 u_int offset=l_addr-(u_int)ptr-8;
1611 assert(offset<4096);
1612 assert(!(offset&3));
1613 *ptr|=offset;
1614 if(l_addr==(u_int)out) {
1615 literals[i][0]=l_addr; // remember for dupes
1616 output_w32(literals[i][1]);
1617 }
1618 }
1619 literalcount=0;
1620}
1621
1622static void literal_pool_jumpover(int n)
1623{
1624 if(!literalcount) return;
1625 if(n) {
1626 if((int)out-literals[0][0]<4096-n) return;
1627 }
1628 void *jaddr = out;
1629 emit_jmp(0);
1630 literal_pool(0);
1631 set_jump_target(jaddr, out);
1632}
1633
1634// parsed by get_pointer, find_extjump_insn
1635static void emit_extjump2(u_char *addr, u_int target, void *linker)
1636{
1637 u_char *ptr=(u_char *)addr;
1638 assert((ptr[3]&0x0e)==0xa);
1639 (void)ptr;
1640
1641 emit_loadlp(target,0);
1642 emit_loadlp((u_int)addr,1);
1643 assert(ndrc->translation_cache <= addr &&
1644 addr < ndrc->translation_cache + sizeof(ndrc->translation_cache));
1645 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1646//DEBUG >
1647#ifdef DEBUG_CYCLE_COUNT
1648 emit_readword(&last_count,ECX);
1649 emit_add(HOST_CCREG,ECX,HOST_CCREG);
1650 emit_readword(&next_interupt,ECX);
1651 emit_writeword(HOST_CCREG,&Count);
1652 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
1653 emit_writeword(ECX,&last_count);
1654#endif
1655//DEBUG <
1656 emit_far_jump(linker);
1657}
1658
1659static void check_extjump2(void *src)
1660{
1661 u_int *ptr = src;
1662 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1663 (void)ptr;
1664}
1665
1666// put rt_val into rt, potentially making use of rs with value rs_val
1667static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1668{
1669 u_int armval;
1670 int diff;
1671 if(genimm(rt_val,&armval)) {
1672 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1673 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1674 return;
1675 }
1676 if(genimm(~rt_val,&armval)) {
1677 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1678 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1679 return;
1680 }
1681 diff=rt_val-rs_val;
1682 if(genimm(diff,&armval)) {
1683 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1684 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1685 return;
1686 }else if(genimm(-diff,&armval)) {
1687 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1688 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1689 return;
1690 }
1691 emit_movimm(rt_val,rt);
1692}
1693
1694// return 1 if above function can do it's job cheaply
1695static int is_similar_value(u_int v1,u_int v2)
1696{
1697 u_int xs;
1698 int diff;
1699 if(v1==v2) return 1;
1700 diff=v2-v1;
1701 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
1702 ;
1703 if(xs<0x100) return 1;
1704 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1705 ;
1706 if(xs<0x100) return 1;
1707 return 0;
1708}
1709
1710static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
1711{
1712 switch(type) {
1713 case LOADB_STUB: emit_signextend8(rs,rt); break;
1714 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1715 case LOADH_STUB: emit_signextend16(rs,rt); break;
1716 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1717 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1718 default: assert(0);
1719 }
1720}
1721
1722#include "pcsxmem.h"
1723#include "pcsxmem_inline.c"
1724
1725static void do_readstub(int n)
1726{
1727 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1728 literal_pool(256);
1729 set_jump_target(stubs[n].addr, out);
1730 enum stub_type type=stubs[n].type;
1731 int i=stubs[n].a;
1732 int rs=stubs[n].b;
1733 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1734 u_int reglist=stubs[n].e;
1735 const signed char *i_regmap=i_regs->regmap;
1736 int rt;
1737 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1738 rt=get_reg(i_regmap,FTEMP);
1739 }else{
1740 rt=get_reg(i_regmap,dops[i].rt1);
1741 }
1742 assert(rs>=0);
1743 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1744 void *restore_jump = NULL;
1745 reglist|=(1<<rs);
1746 for(r=0;r<=12;r++) {
1747 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1748 temp=r; break;
1749 }
1750 }
1751 if(rt>=0&&dops[i].rt1!=0)
1752 reglist&=~(1<<rt);
1753 if(temp==-1) {
1754 save_regs(reglist);
1755 regs_saved=1;
1756 temp=(rs==0)?2:0;
1757 }
1758 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1759 temp2=1;
1760 emit_readword(&mem_rtab,temp);
1761 emit_shrimm(rs,12,temp2);
1762 emit_readword_dualindexedx4(temp,temp2,temp2);
1763 emit_lsls_imm(temp2,1,temp2);
1764 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1765 switch(type) {
1766 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1767 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1768 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1769 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1770 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
1771 default: assert(0);
1772 }
1773 }
1774 if(regs_saved) {
1775 restore_jump=out;
1776 emit_jcc(0); // jump to reg restore
1777 }
1778 else
1779 emit_jcc(stubs[n].retaddr); // return address
1780
1781 if(!regs_saved)
1782 save_regs(reglist);
1783 void *handler=NULL;
1784 if(type==LOADB_STUB||type==LOADBU_STUB)
1785 handler=jump_handler_read8;
1786 if(type==LOADH_STUB||type==LOADHU_STUB)
1787 handler=jump_handler_read16;
1788 if(type==LOADW_STUB)
1789 handler=jump_handler_read32;
1790 assert(handler);
1791 pass_args(rs,temp2);
1792 int cc=get_reg(i_regmap,CCREG);
1793 if(cc<0)
1794 emit_loadreg(CCREG,2);
1795 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1796 emit_far_call(handler);
1797 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1798 mov_loadtype_adj(type,0,rt);
1799 }
1800 if(restore_jump)
1801 set_jump_target(restore_jump, out);
1802 restore_regs(reglist);
1803 emit_jmp(stubs[n].retaddr); // return address
1804}
1805
1806static void inline_readstub(enum stub_type type, int i, u_int addr,
1807 const signed char regmap[], int target, int adj, u_int reglist)
1808{
1809 int rs=get_reg(regmap,target);
1810 int rt=get_reg(regmap,target);
1811 if(rs<0) rs=get_reg(regmap,-1);
1812 assert(rs>=0);
1813 u_int is_dynamic;
1814 uintptr_t host_addr = 0;
1815 void *handler;
1816 int cc=get_reg(regmap,CCREG);
1817 if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1818 return;
1819 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1820 if (handler == NULL) {
1821 if(rt<0||dops[i].rt1==0)
1822 return;
1823 if(addr!=host_addr)
1824 emit_movimm_from(addr,rs,host_addr,rs);
1825 switch(type) {
1826 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1827 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1828 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1829 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1830 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1831 default: assert(0);
1832 }
1833 return;
1834 }
1835 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1836 if(is_dynamic) {
1837 if(type==LOADB_STUB||type==LOADBU_STUB)
1838 handler=jump_handler_read8;
1839 if(type==LOADH_STUB||type==LOADHU_STUB)
1840 handler=jump_handler_read16;
1841 if(type==LOADW_STUB)
1842 handler=jump_handler_read32;
1843 }
1844
1845 // call a memhandler
1846 if(rt>=0&&dops[i].rt1!=0)
1847 reglist&=~(1<<rt);
1848 save_regs(reglist);
1849 if(target==0)
1850 emit_movimm(addr,0);
1851 else if(rs!=0)
1852 emit_mov(rs,0);
1853 if(cc<0)
1854 emit_loadreg(CCREG,2);
1855 if(is_dynamic) {
1856 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1857 emit_addimm(cc<0?2:cc,adj,2);
1858 }
1859 else {
1860 emit_readword(&last_count,3);
1861 emit_addimm(cc<0?2:cc,adj,2);
1862 emit_add(2,3,2);
1863 emit_writeword(2,&Count);
1864 }
1865
1866 emit_far_call(handler);
1867
1868 if(rt>=0&&dops[i].rt1!=0) {
1869 switch(type) {
1870 case LOADB_STUB: emit_signextend8(0,rt); break;
1871 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1872 case LOADH_STUB: emit_signextend16(0,rt); break;
1873 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1874 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1875 default: assert(0);
1876 }
1877 }
1878 restore_regs(reglist);
1879}
1880
1881static void do_writestub(int n)
1882{
1883 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1884 literal_pool(256);
1885 set_jump_target(stubs[n].addr, out);
1886 enum stub_type type=stubs[n].type;
1887 int i=stubs[n].a;
1888 int rs=stubs[n].b;
1889 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
1890 u_int reglist=stubs[n].e;
1891 const signed char *i_regmap=i_regs->regmap;
1892 int rt,r;
1893 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1894 rt=get_reg(i_regmap,r=FTEMP);
1895 }else{
1896 rt=get_reg(i_regmap,r=dops[i].rs2);
1897 }
1898 assert(rs>=0);
1899 assert(rt>=0);
1900 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1901 void *restore_jump = NULL;
1902 int reglist2=reglist|(1<<rs)|(1<<rt);
1903 for(rtmp=0;rtmp<=12;rtmp++) {
1904 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1905 temp=rtmp; break;
1906 }
1907 }
1908 if(temp==-1) {
1909 save_regs(reglist);
1910 regs_saved=1;
1911 for(rtmp=0;rtmp<=3;rtmp++)
1912 if(rtmp!=rs&&rtmp!=rt)
1913 {temp=rtmp;break;}
1914 }
1915 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1916 temp2=3;
1917 emit_readword(&mem_wtab,temp);
1918 emit_shrimm(rs,12,temp2);
1919 emit_readword_dualindexedx4(temp,temp2,temp2);
1920 emit_lsls_imm(temp2,1,temp2);
1921 switch(type) {
1922 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1923 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1924 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1925 default: assert(0);
1926 }
1927 if(regs_saved) {
1928 restore_jump=out;
1929 emit_jcc(0); // jump to reg restore
1930 }
1931 else
1932 emit_jcc(stubs[n].retaddr); // return address (invcode check)
1933
1934 if(!regs_saved)
1935 save_regs(reglist);
1936 void *handler=NULL;
1937 switch(type) {
1938 case STOREB_STUB: handler=jump_handler_write8; break;
1939 case STOREH_STUB: handler=jump_handler_write16; break;
1940 case STOREW_STUB: handler=jump_handler_write32; break;
1941 default: assert(0);
1942 }
1943 assert(handler);
1944 pass_args(rs,rt);
1945 if(temp2!=3)
1946 emit_mov(temp2,3);
1947 int cc=get_reg(i_regmap,CCREG);
1948 if(cc<0)
1949 emit_loadreg(CCREG,2);
1950 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1951 // returns new cycle_count
1952 emit_far_call(handler);
1953 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1954 if(cc<0)
1955 emit_storereg(CCREG,2);
1956 if(restore_jump)
1957 set_jump_target(restore_jump, out);
1958 restore_regs(reglist);
1959 emit_jmp(stubs[n].retaddr);
1960}
1961
1962static void inline_writestub(enum stub_type type, int i, u_int addr,
1963 const signed char regmap[], int target, int adj, u_int reglist)
1964{
1965 int rs=get_reg(regmap,-1);
1966 int rt=get_reg(regmap,target);
1967 assert(rs>=0);
1968 assert(rt>=0);
1969 uintptr_t host_addr = 0;
1970 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1971 if (handler == NULL) {
1972 if(addr!=host_addr)
1973 emit_movimm_from(addr,rs,host_addr,rs);
1974 switch(type) {
1975 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1976 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1977 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1978 default: assert(0);
1979 }
1980 return;
1981 }
1982
1983 // call a memhandler
1984 save_regs(reglist);
1985 pass_args(rs,rt);
1986 int cc=get_reg(regmap,CCREG);
1987 if(cc<0)
1988 emit_loadreg(CCREG,2);
1989 emit_addimm(cc<0?2:cc,adj,2);
1990 emit_movimm((u_int)handler,3);
1991 // returns new cycle_count
1992 emit_far_call(jump_handler_write_h);
1993 emit_addimm(0,-adj,cc<0?2:cc);
1994 if(cc<0)
1995 emit_storereg(CCREG,2);
1996 restore_regs(reglist);
1997}
1998
1999// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
2000static void do_dirty_stub_emit_args(u_int arg0, u_int source_len)
2001{
2002 #ifndef HAVE_ARMV7
2003 emit_loadlp((int)source, 1);
2004 emit_loadlp((int)copy, 2);
2005 emit_loadlp(source_len, 3);
2006 #else
2007 emit_movw(((u_int)source)&0x0000FFFF, 1);
2008 emit_movw(((u_int)copy)&0x0000FFFF, 2);
2009 emit_movt(((u_int)source)&0xFFFF0000, 1);
2010 emit_movt(((u_int)copy)&0xFFFF0000, 2);
2011 emit_movw(source_len, 3);
2012 #endif
2013 emit_movimm(arg0, 0);
2014}
2015
2016static void *do_dirty_stub(int i, u_int source_len)
2017{
2018 assem_debug("do_dirty_stub %x\n",start+i*4);
2019 do_dirty_stub_emit_args(start + i*4, source_len);
2020 emit_far_call(verify_code);
2021 void *entry = out;
2022 load_regs_entry(i);
2023 if (entry == out)
2024 entry = instr_addr[i];
2025 emit_jmp(instr_addr[i]);
2026 return entry;
2027}
2028
2029static void do_dirty_stub_ds(u_int source_len)
2030{
2031 do_dirty_stub_emit_args(start + 1, source_len);
2032 emit_far_call(verify_code_ds);
2033}
2034
2035/* Special assem */
2036
2037static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
2038{
2039 save_regs_all(reglist);
2040 cop2_do_stall_check(op, i, i_regs, 0);
2041#ifdef PCNT
2042 emit_movimm(op, 0);
2043 emit_far_call(pcnt_gte_start);
2044#endif
2045 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
2046}
2047
2048static void c2op_epilogue(u_int op,u_int reglist)
2049{
2050#ifdef PCNT
2051 emit_movimm(op,0);
2052 emit_far_call(pcnt_gte_end);
2053#endif
2054 restore_regs_all(reglist);
2055}
2056
2057static void c2op_call_MACtoIR(int lm,int need_flags)
2058{
2059 if(need_flags)
2060 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
2061 else
2062 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
2063}
2064
2065static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2066{
2067 emit_far_call(func);
2068 // func is C code and trashes r0
2069 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2070 if(need_flags||need_ir)
2071 c2op_call_MACtoIR(lm,need_flags);
2072 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
2073}
2074
2075static void c2op_assemble(int i, const struct regstat *i_regs)
2076{
2077 u_int c2op = source[i] & 0x3f;
2078 u_int reglist_full = get_host_reglist(i_regs->regmap);
2079 u_int reglist = reglist_full & CALLER_SAVE_REGS;
2080 int need_flags, need_ir;
2081
2082 if (gte_handlers[c2op]!=NULL) {
2083 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
2084 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
2085 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2086 source[i],gte_unneeded[i+1],need_flags,need_ir);
2087 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
2088 need_flags=0;
2089 int shift = (source[i] >> 19) & 1;
2090 int lm = (source[i] >> 10) & 1;
2091 switch(c2op) {
2092#ifndef DRC_DBG
2093 case GTE_MVMVA: {
2094#ifdef HAVE_ARMV5
2095 int v = (source[i] >> 15) & 3;
2096 int cv = (source[i] >> 13) & 3;
2097 int mx = (source[i] >> 17) & 3;
2098 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
2099 c2op_prologue(c2op,i,i_regs,reglist);
2100 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2101 if(v<3)
2102 emit_ldrd(v*8,0,4);
2103 else {
2104 emit_movzwl_indexed(9*4,0,4); // gteIR
2105 emit_movzwl_indexed(10*4,0,6);
2106 emit_movzwl_indexed(11*4,0,5);
2107 emit_orrshl_imm(6,16,4);
2108 }
2109 if(mx<3)
2110 emit_addimm(0,32*4+mx*8*4,6);
2111 else
2112 emit_readword(&zeromem_ptr,6);
2113 if(cv<3)
2114 emit_addimm(0,32*4+(cv*8+5)*4,7);
2115 else
2116 emit_readword(&zeromem_ptr,7);
2117#ifdef __ARM_NEON__
2118 emit_movimm(source[i],1); // opcode
2119 emit_far_call(gteMVMVA_part_neon);
2120 if(need_flags) {
2121 emit_movimm(lm,1);
2122 emit_far_call(gteMACtoIR_flags_neon);
2123 }
2124#else
2125 if(cv==3&&shift)
2126 emit_far_call(gteMVMVA_part_cv3sh12_arm);
2127 else {
2128 emit_movimm(shift,1);
2129 emit_far_call(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm);
2130 }
2131 if(need_flags||need_ir)
2132 c2op_call_MACtoIR(lm,need_flags);
2133#endif
2134#else /* if not HAVE_ARMV5 */
2135 c2op_prologue(c2op,i,i_regs,reglist);
2136 emit_movimm(source[i],1); // opcode
2137 emit_writeword(1,&psxRegs.code);
2138 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2139#endif
2140 break;
2141 }
2142 case GTE_OP:
2143 c2op_prologue(c2op,i,i_regs,reglist);
2144 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
2145 if(need_flags||need_ir) {
2146 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2147 c2op_call_MACtoIR(lm,need_flags);
2148 }
2149 break;
2150 case GTE_DPCS:
2151 c2op_prologue(c2op,i,i_regs,reglist);
2152 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2153 break;
2154 case GTE_INTPL:
2155 c2op_prologue(c2op,i,i_regs,reglist);
2156 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2157 break;
2158 case GTE_SQR:
2159 c2op_prologue(c2op,i,i_regs,reglist);
2160 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
2161 if(need_flags||need_ir) {
2162 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2163 c2op_call_MACtoIR(lm,need_flags);
2164 }
2165 break;
2166 case GTE_DCPL:
2167 c2op_prologue(c2op,i,i_regs,reglist);
2168 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2169 break;
2170 case GTE_GPF:
2171 c2op_prologue(c2op,i,i_regs,reglist);
2172 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2173 break;
2174 case GTE_GPL:
2175 c2op_prologue(c2op,i,i_regs,reglist);
2176 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2177 break;
2178#endif
2179 default:
2180 c2op_prologue(c2op,i,i_regs,reglist);
2181#ifdef DRC_DBG
2182 emit_movimm(source[i],1); // opcode
2183 emit_writeword(1,&psxRegs.code);
2184#endif
2185 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
2186 break;
2187 }
2188 c2op_epilogue(c2op,reglist);
2189 }
2190}
2191
2192static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2193{
2194 //value = value & 0x7ffff000;
2195 //if (value & 0x7f87e000) value |= 0x80000000;
2196 emit_shrimm(sl,12,temp);
2197 emit_shlimm(temp,12,temp);
2198 emit_testimm(temp,0x7f000000);
2199 emit_testeqimm(temp,0x00870000);
2200 emit_testeqimm(temp,0x0000e000);
2201 emit_orrne_imm(temp,0x80000000,temp);
2202}
2203
2204static void do_mfc2_31_one(u_int copr,signed char temp)
2205{
2206 emit_readword(&reg_cop2d[copr],temp);
2207 emit_lsls_imm(temp,16,temp);
2208 emit_cmovs_imm(0,temp);
2209 emit_cmpimm(temp,0xf80<<16);
2210 emit_andimm(temp,0xf80<<16,temp);
2211 emit_cmovae_imm(0xf80<<16,temp);
2212}
2213
2214static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2215{
2216 if (temp < 0) {
2217 host_tempreg_acquire();
2218 temp = HOST_TEMPREG;
2219 }
2220 do_mfc2_31_one(9,temp);
2221 emit_shrimm(temp,7+16,tl);
2222 do_mfc2_31_one(10,temp);
2223 emit_orrshr_imm(temp,2+16,tl);
2224 do_mfc2_31_one(11,temp);
2225 emit_orrshr_imm(temp,-3+16,tl);
2226 emit_writeword(tl,&reg_cop2d[29]);
2227 if (temp == HOST_TEMPREG)
2228 host_tempreg_release();
2229}
2230
2231static void multdiv_assemble_arm(int i, const struct regstat *i_regs)
2232{
2233 // case 0x18: MULT
2234 // case 0x19: MULTU
2235 // case 0x1A: DIV
2236 // case 0x1B: DIVU
2237 // case 0x1C: DMULT
2238 // case 0x1D: DMULTU
2239 // case 0x1E: DDIV
2240 // case 0x1F: DDIVU
2241 if(dops[i].rs1&&dops[i].rs2)
2242 {
2243 if((dops[i].opcode2&4)==0) // 32-bit
2244 {
2245 if(dops[i].opcode2==0x18) // MULT
2246 {
2247 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2248 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2249 signed char hi=get_reg(i_regs->regmap,HIREG);
2250 signed char lo=get_reg(i_regs->regmap,LOREG);
2251 assert(m1>=0);
2252 assert(m2>=0);
2253 assert(hi>=0);
2254 assert(lo>=0);
2255 emit_smull(m1,m2,hi,lo);
2256 }
2257 if(dops[i].opcode2==0x19) // MULTU
2258 {
2259 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2260 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
2261 signed char hi=get_reg(i_regs->regmap,HIREG);
2262 signed char lo=get_reg(i_regs->regmap,LOREG);
2263 assert(m1>=0);
2264 assert(m2>=0);
2265 assert(hi>=0);
2266 assert(lo>=0);
2267 emit_umull(m1,m2,hi,lo);
2268 }
2269 if(dops[i].opcode2==0x1A) // DIV
2270 {
2271 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2272 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
2273 assert(d1>=0);
2274 assert(d2>=0);
2275 signed char quotient=get_reg(i_regs->regmap,LOREG);
2276 signed char remainder=get_reg(i_regs->regmap,HIREG);
2277 assert(quotient>=0);
2278 assert(remainder>=0);
2279 emit_movs(d1,remainder);
2280 emit_movimm(0xffffffff,quotient);
2281 emit_negmi(quotient,quotient); // .. quotient and ..
2282 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
2283 emit_movs(d2,HOST_TEMPREG);
2284 emit_jeq(out+52); // Division by zero
2285 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
2286#ifdef HAVE_ARMV5
2287 emit_clz(HOST_TEMPREG,quotient);
2288 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
2289#else
2290 emit_movimm(0,quotient);
2291 emit_addpl_imm(quotient,1,quotient);
2292 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2293 emit_jns(out-2*4);
2294#endif
2295 emit_orimm(quotient,1<<31,quotient);
2296 emit_shr(quotient,quotient,quotient);
2297 emit_cmp(remainder,HOST_TEMPREG);
2298 emit_subcs(remainder,HOST_TEMPREG,remainder);
2299 emit_adcs(quotient,quotient,quotient);
2300 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
2301 emit_jcc(out-16); // -4
2302 emit_teq(d1,d2);
2303 emit_negmi(quotient,quotient);
2304 emit_test(d1,d1);
2305 emit_negmi(remainder,remainder);
2306 }
2307 if(dops[i].opcode2==0x1B) // DIVU
2308 {
2309 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2310 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
2311 assert(d1>=0);
2312 assert(d2>=0);
2313 signed char quotient=get_reg(i_regs->regmap,LOREG);
2314 signed char remainder=get_reg(i_regs->regmap,HIREG);
2315 assert(quotient>=0);
2316 assert(remainder>=0);
2317 emit_mov(d1,remainder);
2318 emit_movimm(0xffffffff,quotient); // div0 case
2319 emit_test(d2,d2);
2320 emit_jeq(out+40); // Division by zero
2321#ifdef HAVE_ARMV5
2322 emit_clz(d2,HOST_TEMPREG);
2323 emit_movimm(1<<31,quotient);
2324 emit_shl(d2,HOST_TEMPREG,d2);
2325#else
2326 emit_movimm(0,HOST_TEMPREG);
2327 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2328 emit_lslpls_imm(d2,1,d2);
2329 emit_jns(out-2*4);
2330 emit_movimm(1<<31,quotient);
2331#endif
2332 emit_shr(quotient,HOST_TEMPREG,quotient);
2333 emit_cmp(remainder,d2);
2334 emit_subcs(remainder,d2,remainder);
2335 emit_adcs(quotient,quotient,quotient);
2336 emit_shrcc_imm(d2,1,d2);
2337 emit_jcc(out-16); // -4
2338 }
2339 }
2340 else // 64-bit
2341 assert(0);
2342 }
2343 else
2344 {
2345 // Multiply by zero is zero.
2346 // MIPS does not have a divide by zero exception.
2347 // The result is undefined, we return zero.
2348 signed char hr=get_reg(i_regs->regmap,HIREG);
2349 signed char lr=get_reg(i_regs->regmap,LOREG);
2350 if(hr>=0) emit_zeroreg(hr);
2351 if(lr>=0) emit_zeroreg(lr);
2352 }
2353}
2354#define multdiv_assemble multdiv_assemble_arm
2355
2356static void do_jump_vaddr(int rs)
2357{
2358 emit_far_jump(jump_vaddr_reg[rs]);
2359}
2360
2361static void do_preload_rhash(int r) {
2362 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2363 // register. On ARM the hash can be done with a single instruction (below)
2364}
2365
2366static void do_preload_rhtbl(int ht) {
2367 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2368}
2369
2370static void do_rhash(int rs,int rh) {
2371 emit_andimm(rs,0xf8,rh);
2372}
2373
2374static void do_miniht_load(int ht,int rh) {
2375 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2376 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2377}
2378
2379static void do_miniht_jump(int rs,int rh,int ht) {
2380 emit_cmp(rh,rs);
2381 emit_ldreq_indexed(ht,4,15);
2382 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2383 if(rs!=7)
2384 emit_mov(rs,7);
2385 rs=7;
2386 #endif
2387 do_jump_vaddr(rs);
2388}
2389
2390static void do_miniht_insert(u_int return_address,int rt,int temp) {
2391 #ifndef HAVE_ARMV7
2392 emit_movimm(return_address,rt); // PC into link register
2393 add_to_linker(out,return_address,1);
2394 emit_pcreladdr(temp);
2395 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2396 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2397 #else
2398 emit_movw(return_address&0x0000FFFF,rt);
2399 add_to_linker(out,return_address,1);
2400 emit_pcreladdr(temp);
2401 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2402 emit_movt(return_address&0xFFFF0000,rt);
2403 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2404 #endif
2405}
2406
2407// CPU-architecture-specific initialization
2408static void arch_init(void)
2409{
2410 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2411 struct tramp_insns *ops = ndrc->tramp.ops;
2412 size_t i;
2413 assert(!(diff & 3));
2414 assert(diff < 0x1000);
2415 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2416 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2417 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2418 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2419}
2420
2421// vim:shiftwidth=2:expandtab