drc: try to support w^x platforms like iOS
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
054175e9 26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
665f33e1 29#include "arm_features.h"
054175e9 30
a327ad27 31#if !BASE_ADDR_FIXED
bdeade46 32char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
33#endif
34
4d646738 35#ifndef __MACH__
36#define CALLER_SAVE_REGS 0x100f
37#else
38#define CALLER_SAVE_REGS 0x120f
39#endif
40
e2b5e7aa 41#define unused __attribute__((unused))
42
57871462 43extern int cycle_count;
44extern int last_count;
45extern int pcaddr;
46extern int pending_exception;
47extern int branch_target;
48extern uint64_t readmem_dword;
57871462 49extern void *dynarec_local;
57871462 50extern u_int mini_ht[32][2];
57871462 51
52void indirect_jump_indexed();
53void indirect_jump();
54void do_interrupt();
55void jump_vaddr_r0();
56void jump_vaddr_r1();
57void jump_vaddr_r2();
58void jump_vaddr_r3();
59void jump_vaddr_r4();
60void jump_vaddr_r5();
61void jump_vaddr_r6();
62void jump_vaddr_r7();
63void jump_vaddr_r8();
64void jump_vaddr_r9();
65void jump_vaddr_r10();
66void jump_vaddr_r12();
67
68const u_int jump_vaddr_reg[16] = {
69 (int)jump_vaddr_r0,
70 (int)jump_vaddr_r1,
71 (int)jump_vaddr_r2,
72 (int)jump_vaddr_r3,
73 (int)jump_vaddr_r4,
74 (int)jump_vaddr_r5,
75 (int)jump_vaddr_r6,
76 (int)jump_vaddr_r7,
77 (int)jump_vaddr_r8,
78 (int)jump_vaddr_r9,
79 (int)jump_vaddr_r10,
80 0,
81 (int)jump_vaddr_r12,
82 0,
83 0,
84 0};
85
0bbd1454 86void invalidate_addr_r0();
87void invalidate_addr_r1();
88void invalidate_addr_r2();
89void invalidate_addr_r3();
90void invalidate_addr_r4();
91void invalidate_addr_r5();
92void invalidate_addr_r6();
93void invalidate_addr_r7();
94void invalidate_addr_r8();
95void invalidate_addr_r9();
96void invalidate_addr_r10();
97void invalidate_addr_r12();
98
99const u_int invalidate_addr_reg[16] = {
100 (int)invalidate_addr_r0,
101 (int)invalidate_addr_r1,
102 (int)invalidate_addr_r2,
103 (int)invalidate_addr_r3,
104 (int)invalidate_addr_r4,
105 (int)invalidate_addr_r5,
106 (int)invalidate_addr_r6,
107 (int)invalidate_addr_r7,
108 (int)invalidate_addr_r8,
109 (int)invalidate_addr_r9,
110 (int)invalidate_addr_r10,
111 0,
112 (int)invalidate_addr_r12,
113 0,
114 0,
115 0};
116
d148d265 117static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
dd3a91a1 118
57871462 119/* Linker */
120
e2b5e7aa 121static void set_jump_target(int addr,u_int target)
57871462 122{
123 u_char *ptr=(u_char *)addr;
124 u_int *ptr2=(u_int *)ptr;
125 if(ptr[3]==0xe2) {
126 assert((target-(u_int)ptr2-8)<1024);
127 assert((addr&3)==0);
128 assert((target&3)==0);
129 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
130 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
131 }
132 else if(ptr[3]==0x72) {
133 // generated by emit_jno_unlikely
134 if((target-(u_int)ptr2-8)<1024) {
135 assert((addr&3)==0);
136 assert((target&3)==0);
137 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
138 }
139 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
140 assert((addr&3)==0);
141 assert((target&3)==0);
142 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
143 }
144 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
145 }
146 else {
147 assert((ptr[3]&0x0e)==0xa);
148 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
149 }
150}
151
152// This optionally copies the instruction from the target of the branch into
153// the space before the branch. Works, but the difference in speed is
154// usually insignificant.
e2b5e7aa 155#if 0
156static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 157{
158 u_char *ptr=(u_char *)addr;
159 u_int *ptr2=(u_int *)ptr;
160 assert(!copy||ptr2[-1]==0xe28dd000);
161 if(ptr[3]==0xe2) {
162 assert(!copy);
163 assert((target-(u_int)ptr2-8)<4096);
164 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
165 }
166 else {
167 assert((ptr[3]&0x0e)==0xa);
168 u_int target_insn=*(u_int *)target;
169 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
170 copy=0;
171 }
172 if((target_insn&0x0c100000)==0x04100000) { // Load
173 copy=0;
174 }
175 if(target_insn&0x08000000) {
176 copy=0;
177 }
178 if(copy) {
179 ptr2[-1]=target_insn;
180 target+=4;
181 }
182 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
183 }
184}
e2b5e7aa 185#endif
57871462 186
187/* Literal pool */
e2b5e7aa 188static void add_literal(int addr,int val)
57871462 189{
15776b68 190 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 191 literals[literalcount][0]=addr;
192 literals[literalcount][1]=val;
9f51b4b9 193 literalcount++;
194}
57871462 195
d148d265 196// from a pointer to external jump stub (which was produced by emit_extjump2)
197// find where the jumping insn is
198static void *find_extjump_insn(void *stub)
57871462 199{
200 int *ptr=(int *)(stub+4);
d148d265 201 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 202 u_int offset=*ptr&0xfff;
d148d265 203 void **l_ptr=(void *)ptr+offset+8;
204 return *l_ptr;
57871462 205}
206
f968d35d 207// find where external branch is liked to using addr of it's stub:
208// get address that insn one after stub loads (dyna_linker arg1),
209// treat it as a pointer to branch insn,
210// return addr where that branch jumps to
e2b5e7aa 211static int get_pointer(void *stub)
57871462 212{
213 //printf("get_pointer(%x)\n",(int)stub);
d148d265 214 int *i_ptr=find_extjump_insn(stub);
57871462 215 assert((*i_ptr&0x0f000000)==0x0a000000);
216 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
217}
218
219// Find the "clean" entry point from a "dirty" entry point
220// by skipping past the call to verify_code
e2b5e7aa 221static u_int get_clean_addr(int addr)
57871462 222{
223 int *ptr=(int *)addr;
665f33e1 224 #ifndef HAVE_ARMV7
57871462 225 ptr+=4;
226 #else
227 ptr+=6;
228 #endif
229 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
230 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
231 ptr++;
232 if((*ptr&0xFF000000)==0xea000000) {
233 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
234 }
235 return (u_int)ptr;
236}
237
e2b5e7aa 238static int verify_dirty(u_int *ptr)
57871462 239{
665f33e1 240 #ifndef HAVE_ARMV7
57871462 241 // get from literal pool
15776b68 242 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 243 u_int offset=*ptr&0xfff;
244 u_int *l_ptr=(void *)ptr+offset+8;
245 u_int source=l_ptr[0];
246 u_int copy=l_ptr[1];
247 u_int len=l_ptr[2];
248 ptr+=4;
249 #else
250 // ARMv7 movw/movt
251 assert((*ptr&0xFFF00000)==0xe3000000);
252 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
253 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
254 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
255 ptr+=6;
256 #endif
257 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
258 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 259 //printf("verify_dirty: %x %x %x\n",source,copy,len);
260 return !memcmp((void *)source,(void *)copy,len);
261}
262
263// This doesn't necessarily find all clean entry points, just
264// guarantees that it's not dirty
e2b5e7aa 265static int isclean(int addr)
57871462 266{
665f33e1 267 #ifndef HAVE_ARMV7
581335b0 268 u_int *ptr=((u_int *)addr)+4;
57871462 269 #else
581335b0 270 u_int *ptr=((u_int *)addr)+6;
57871462 271 #endif
272 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
273 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
274 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
275 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
276 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
277 return 1;
278}
279
4a35de07 280// get source that block at addr was compiled from (host pointers)
e2b5e7aa 281static void get_bounds(int addr,u_int *start,u_int *end)
57871462 282{
283 u_int *ptr=(u_int *)addr;
665f33e1 284 #ifndef HAVE_ARMV7
57871462 285 // get from literal pool
15776b68 286 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 287 u_int offset=*ptr&0xfff;
288 u_int *l_ptr=(void *)ptr+offset+8;
289 u_int source=l_ptr[0];
290 //u_int copy=l_ptr[1];
291 u_int len=l_ptr[2];
292 ptr+=4;
293 #else
294 // ARMv7 movw/movt
295 assert((*ptr&0xFFF00000)==0xe3000000);
296 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
297 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
298 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
299 ptr+=6;
300 #endif
301 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
302 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 303 *start=source;
304 *end=source+len;
305}
306
307/* Register allocation */
308
309// Note: registers are allocated clean (unmodified state)
310// if you intend to modify the register, you must call dirty_reg().
e2b5e7aa 311static void alloc_reg(struct regstat *cur,int i,signed char reg)
57871462 312{
313 int r,hr;
314 int preferred_reg = (reg&7);
315 if(reg==CCREG) preferred_reg=HOST_CCREG;
316 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
9f51b4b9 317
57871462 318 // Don't allocate unused registers
319 if((cur->u>>reg)&1) return;
9f51b4b9 320
57871462 321 // see if it's already allocated
322 for(hr=0;hr<HOST_REGS;hr++)
323 {
324 if(cur->regmap[hr]==reg) return;
325 }
9f51b4b9 326
57871462 327 // Keep the same mapping if the register was already allocated in a loop
328 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 329
57871462 330 // Try to allocate the preferred register
331 if(cur->regmap[preferred_reg]==-1) {
332 cur->regmap[preferred_reg]=reg;
333 cur->dirty&=~(1<<preferred_reg);
334 cur->isconst&=~(1<<preferred_reg);
335 return;
336 }
337 r=cur->regmap[preferred_reg];
338 if(r<64&&((cur->u>>r)&1)) {
339 cur->regmap[preferred_reg]=reg;
340 cur->dirty&=~(1<<preferred_reg);
341 cur->isconst&=~(1<<preferred_reg);
342 return;
343 }
344 if(r>=64&&((cur->uu>>(r&63))&1)) {
345 cur->regmap[preferred_reg]=reg;
346 cur->dirty&=~(1<<preferred_reg);
347 cur->isconst&=~(1<<preferred_reg);
348 return;
349 }
9f51b4b9 350
57871462 351 // Clear any unneeded registers
352 // We try to keep the mapping consistent, if possible, because it
353 // makes branches easier (especially loops). So we try to allocate
354 // first (see above) before removing old mappings. If this is not
355 // possible then go ahead and clear out the registers that are no
356 // longer needed.
357 for(hr=0;hr<HOST_REGS;hr++)
358 {
359 r=cur->regmap[hr];
360 if(r>=0) {
361 if(r<64) {
362 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
363 }
364 else
365 {
366 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
367 }
368 }
369 }
370 // Try to allocate any available register, but prefer
371 // registers that have not been used recently.
372 if(i>0) {
373 for(hr=0;hr<HOST_REGS;hr++) {
374 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
375 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
376 cur->regmap[hr]=reg;
377 cur->dirty&=~(1<<hr);
378 cur->isconst&=~(1<<hr);
379 return;
380 }
381 }
382 }
383 }
384 // Try to allocate any available register
385 for(hr=0;hr<HOST_REGS;hr++) {
386 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
387 cur->regmap[hr]=reg;
388 cur->dirty&=~(1<<hr);
389 cur->isconst&=~(1<<hr);
390 return;
391 }
392 }
9f51b4b9 393
57871462 394 // Ok, now we have to evict someone
395 // Pick a register we hopefully won't need soon
396 u_char hsn[MAXREG+1];
397 memset(hsn,10,sizeof(hsn));
398 int j;
399 lsn(hsn,i,&preferred_reg);
400 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
401 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
402 if(i>0) {
403 // Don't evict the cycle count at entry points, otherwise the entry
404 // stub will have to write it.
405 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
406 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
407 for(j=10;j>=3;j--)
408 {
409 // Alloc preferred register if available
410 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
411 for(hr=0;hr<HOST_REGS;hr++) {
412 // Evict both parts of a 64-bit register
413 if((cur->regmap[hr]&63)==r) {
414 cur->regmap[hr]=-1;
415 cur->dirty&=~(1<<hr);
416 cur->isconst&=~(1<<hr);
417 }
418 }
419 cur->regmap[preferred_reg]=reg;
420 return;
421 }
422 for(r=1;r<=MAXREG;r++)
423 {
424 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
425 for(hr=0;hr<HOST_REGS;hr++) {
426 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
427 if(cur->regmap[hr]==r+64) {
428 cur->regmap[hr]=reg;
429 cur->dirty&=~(1<<hr);
430 cur->isconst&=~(1<<hr);
431 return;
432 }
433 }
434 }
435 for(hr=0;hr<HOST_REGS;hr++) {
436 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
437 if(cur->regmap[hr]==r) {
438 cur->regmap[hr]=reg;
439 cur->dirty&=~(1<<hr);
440 cur->isconst&=~(1<<hr);
441 return;
442 }
443 }
444 }
445 }
446 }
447 }
448 }
449 for(j=10;j>=0;j--)
450 {
451 for(r=1;r<=MAXREG;r++)
452 {
453 if(hsn[r]==j) {
454 for(hr=0;hr<HOST_REGS;hr++) {
455 if(cur->regmap[hr]==r+64) {
456 cur->regmap[hr]=reg;
457 cur->dirty&=~(1<<hr);
458 cur->isconst&=~(1<<hr);
459 return;
460 }
461 }
462 for(hr=0;hr<HOST_REGS;hr++) {
463 if(cur->regmap[hr]==r) {
464 cur->regmap[hr]=reg;
465 cur->dirty&=~(1<<hr);
466 cur->isconst&=~(1<<hr);
467 return;
468 }
469 }
470 }
471 }
472 }
c43b5311 473 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
57871462 474}
475
e2b5e7aa 476static void alloc_reg64(struct regstat *cur,int i,signed char reg)
57871462 477{
478 int preferred_reg = 8+(reg&1);
479 int r,hr;
9f51b4b9 480
57871462 481 // allocate the lower 32 bits
482 alloc_reg(cur,i,reg);
9f51b4b9 483
57871462 484 // Don't allocate unused registers
485 if((cur->uu>>reg)&1) return;
9f51b4b9 486
57871462 487 // see if the upper half is already allocated
488 for(hr=0;hr<HOST_REGS;hr++)
489 {
490 if(cur->regmap[hr]==reg+64) return;
491 }
9f51b4b9 492
57871462 493 // Keep the same mapping if the register was already allocated in a loop
494 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 495
57871462 496 // Try to allocate the preferred register
497 if(cur->regmap[preferred_reg]==-1) {
498 cur->regmap[preferred_reg]=reg|64;
499 cur->dirty&=~(1<<preferred_reg);
500 cur->isconst&=~(1<<preferred_reg);
501 return;
502 }
503 r=cur->regmap[preferred_reg];
504 if(r<64&&((cur->u>>r)&1)) {
505 cur->regmap[preferred_reg]=reg|64;
506 cur->dirty&=~(1<<preferred_reg);
507 cur->isconst&=~(1<<preferred_reg);
508 return;
509 }
510 if(r>=64&&((cur->uu>>(r&63))&1)) {
511 cur->regmap[preferred_reg]=reg|64;
512 cur->dirty&=~(1<<preferred_reg);
513 cur->isconst&=~(1<<preferred_reg);
514 return;
515 }
9f51b4b9 516
57871462 517 // Clear any unneeded registers
518 // We try to keep the mapping consistent, if possible, because it
519 // makes branches easier (especially loops). So we try to allocate
520 // first (see above) before removing old mappings. If this is not
521 // possible then go ahead and clear out the registers that are no
522 // longer needed.
523 for(hr=HOST_REGS-1;hr>=0;hr--)
524 {
525 r=cur->regmap[hr];
526 if(r>=0) {
527 if(r<64) {
528 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
529 }
530 else
531 {
532 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
533 }
534 }
535 }
536 // Try to allocate any available register, but prefer
537 // registers that have not been used recently.
538 if(i>0) {
539 for(hr=0;hr<HOST_REGS;hr++) {
540 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
541 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
542 cur->regmap[hr]=reg|64;
543 cur->dirty&=~(1<<hr);
544 cur->isconst&=~(1<<hr);
545 return;
546 }
547 }
548 }
549 }
550 // Try to allocate any available register
551 for(hr=0;hr<HOST_REGS;hr++) {
552 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
553 cur->regmap[hr]=reg|64;
554 cur->dirty&=~(1<<hr);
555 cur->isconst&=~(1<<hr);
556 return;
557 }
558 }
9f51b4b9 559
57871462 560 // Ok, now we have to evict someone
561 // Pick a register we hopefully won't need soon
562 u_char hsn[MAXREG+1];
563 memset(hsn,10,sizeof(hsn));
564 int j;
565 lsn(hsn,i,&preferred_reg);
566 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
567 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
568 if(i>0) {
569 // Don't evict the cycle count at entry points, otherwise the entry
570 // stub will have to write it.
571 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
572 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
573 for(j=10;j>=3;j--)
574 {
575 // Alloc preferred register if available
576 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
577 for(hr=0;hr<HOST_REGS;hr++) {
578 // Evict both parts of a 64-bit register
579 if((cur->regmap[hr]&63)==r) {
580 cur->regmap[hr]=-1;
581 cur->dirty&=~(1<<hr);
582 cur->isconst&=~(1<<hr);
583 }
584 }
585 cur->regmap[preferred_reg]=reg|64;
586 return;
587 }
588 for(r=1;r<=MAXREG;r++)
589 {
590 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
591 for(hr=0;hr<HOST_REGS;hr++) {
592 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
593 if(cur->regmap[hr]==r+64) {
594 cur->regmap[hr]=reg|64;
595 cur->dirty&=~(1<<hr);
596 cur->isconst&=~(1<<hr);
597 return;
598 }
599 }
600 }
601 for(hr=0;hr<HOST_REGS;hr++) {
602 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
603 if(cur->regmap[hr]==r) {
604 cur->regmap[hr]=reg|64;
605 cur->dirty&=~(1<<hr);
606 cur->isconst&=~(1<<hr);
607 return;
608 }
609 }
610 }
611 }
612 }
613 }
614 }
615 for(j=10;j>=0;j--)
616 {
617 for(r=1;r<=MAXREG;r++)
618 {
619 if(hsn[r]==j) {
620 for(hr=0;hr<HOST_REGS;hr++) {
621 if(cur->regmap[hr]==r+64) {
622 cur->regmap[hr]=reg|64;
623 cur->dirty&=~(1<<hr);
624 cur->isconst&=~(1<<hr);
625 return;
626 }
627 }
628 for(hr=0;hr<HOST_REGS;hr++) {
629 if(cur->regmap[hr]==r) {
630 cur->regmap[hr]=reg|64;
631 cur->dirty&=~(1<<hr);
632 cur->isconst&=~(1<<hr);
633 return;
634 }
635 }
636 }
637 }
638 }
c43b5311 639 SysPrintf("This shouldn't happen");exit(1);
57871462 640}
641
642// Allocate a temporary register. This is done without regard to
643// dirty status or whether the register we request is on the unneeded list
644// Note: This will only allocate one register, even if called multiple times
e2b5e7aa 645static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
57871462 646{
647 int r,hr;
648 int preferred_reg = -1;
9f51b4b9 649
57871462 650 // see if it's already allocated
651 for(hr=0;hr<HOST_REGS;hr++)
652 {
653 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
654 }
9f51b4b9 655
57871462 656 // Try to allocate any available register
657 for(hr=HOST_REGS-1;hr>=0;hr--) {
658 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
659 cur->regmap[hr]=reg;
660 cur->dirty&=~(1<<hr);
661 cur->isconst&=~(1<<hr);
662 return;
663 }
664 }
9f51b4b9 665
57871462 666 // Find an unneeded register
667 for(hr=HOST_REGS-1;hr>=0;hr--)
668 {
669 r=cur->regmap[hr];
670 if(r>=0) {
671 if(r<64) {
672 if((cur->u>>r)&1) {
673 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
674 cur->regmap[hr]=reg;
675 cur->dirty&=~(1<<hr);
676 cur->isconst&=~(1<<hr);
677 return;
678 }
679 }
680 }
681 else
682 {
683 if((cur->uu>>(r&63))&1) {
684 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
685 cur->regmap[hr]=reg;
686 cur->dirty&=~(1<<hr);
687 cur->isconst&=~(1<<hr);
688 return;
689 }
690 }
691 }
692 }
693 }
9f51b4b9 694
57871462 695 // Ok, now we have to evict someone
696 // Pick a register we hopefully won't need soon
697 // TODO: we might want to follow unconditional jumps here
698 // TODO: get rid of dupe code and make this into a function
699 u_char hsn[MAXREG+1];
700 memset(hsn,10,sizeof(hsn));
701 int j;
702 lsn(hsn,i,&preferred_reg);
703 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
704 if(i>0) {
705 // Don't evict the cycle count at entry points, otherwise the entry
706 // stub will have to write it.
707 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
708 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
709 for(j=10;j>=3;j--)
710 {
711 for(r=1;r<=MAXREG;r++)
712 {
713 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
714 for(hr=0;hr<HOST_REGS;hr++) {
715 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
716 if(cur->regmap[hr]==r+64) {
717 cur->regmap[hr]=reg;
718 cur->dirty&=~(1<<hr);
719 cur->isconst&=~(1<<hr);
720 return;
721 }
722 }
723 }
724 for(hr=0;hr<HOST_REGS;hr++) {
725 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
726 if(cur->regmap[hr]==r) {
727 cur->regmap[hr]=reg;
728 cur->dirty&=~(1<<hr);
729 cur->isconst&=~(1<<hr);
730 return;
731 }
732 }
733 }
734 }
735 }
736 }
737 }
738 for(j=10;j>=0;j--)
739 {
740 for(r=1;r<=MAXREG;r++)
741 {
742 if(hsn[r]==j) {
743 for(hr=0;hr<HOST_REGS;hr++) {
744 if(cur->regmap[hr]==r+64) {
745 cur->regmap[hr]=reg;
746 cur->dirty&=~(1<<hr);
747 cur->isconst&=~(1<<hr);
748 return;
749 }
750 }
751 for(hr=0;hr<HOST_REGS;hr++) {
752 if(cur->regmap[hr]==r) {
753 cur->regmap[hr]=reg;
754 cur->dirty&=~(1<<hr);
755 cur->isconst&=~(1<<hr);
756 return;
757 }
758 }
759 }
760 }
761 }
c43b5311 762 SysPrintf("This shouldn't happen");exit(1);
57871462 763}
e2b5e7aa 764
57871462 765// Allocate a specific ARM register.
e2b5e7aa 766static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 767{
768 int n;
f776eb14 769 int dirty=0;
9f51b4b9 770
57871462 771 // see if it's already allocated (and dealloc it)
772 for(n=0;n<HOST_REGS;n++)
773 {
f776eb14 774 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
775 dirty=(cur->dirty>>n)&1;
776 cur->regmap[n]=-1;
777 }
57871462 778 }
9f51b4b9 779
57871462 780 cur->regmap[hr]=reg;
781 cur->dirty&=~(1<<hr);
f776eb14 782 cur->dirty|=dirty<<hr;
57871462 783 cur->isconst&=~(1<<hr);
784}
785
786// Alloc cycle count into dedicated register
e2b5e7aa 787static void alloc_cc(struct regstat *cur,int i)
57871462 788{
789 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
790}
791
792/* Special alloc */
793
794
795/* Assembler */
796
e2b5e7aa 797static unused char regname[16][4] = {
57871462 798 "r0",
799 "r1",
800 "r2",
801 "r3",
802 "r4",
803 "r5",
804 "r6",
805 "r7",
806 "r8",
807 "r9",
808 "r10",
809 "fp",
810 "r12",
811 "sp",
812 "lr",
813 "pc"};
814
e2b5e7aa 815static void output_w32(u_int word)
57871462 816{
817 *((u_int *)out)=word;
818 out+=4;
819}
e2b5e7aa 820
821static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 822{
823 assert(rd<16);
824 assert(rn<16);
825 assert(rm<16);
826 return((rn<<16)|(rd<<12)|rm);
827}
e2b5e7aa 828
829static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 830{
831 assert(rd<16);
832 assert(rn<16);
833 assert(imm<256);
834 assert((shift&1)==0);
835 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
836}
e2b5e7aa 837
838static u_int genimm(u_int imm,u_int *encoded)
57871462 839{
c2e3bd42 840 *encoded=0;
841 if(imm==0) return 1;
57871462 842 int i=32;
843 while(i>0)
844 {
845 if(imm<256) {
846 *encoded=((i&30)<<7)|imm;
847 return 1;
848 }
849 imm=(imm>>2)|(imm<<30);i-=2;
850 }
851 return 0;
852}
e2b5e7aa 853
854static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 855{
856 u_int ret=genimm(imm,encoded);
857 assert(ret);
581335b0 858 (void)ret;
cfbd3c6e 859}
e2b5e7aa 860
861static u_int genjmp(u_int addr)
57871462 862{
863 int offset=addr-(int)out-8;
e80343e2 864 if(offset<-33554432||offset>=33554432) {
865 if (addr>2) {
c43b5311 866 SysPrintf("genjmp: out of range: %08x\n", offset);
e80343e2 867 exit(1);
868 }
869 return 0;
870 }
57871462 871 return ((u_int)offset>>2)&0xffffff;
872}
873
e2b5e7aa 874static void emit_mov(int rs,int rt)
57871462 875{
876 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
877 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
878}
879
e2b5e7aa 880static void emit_movs(int rs,int rt)
57871462 881{
882 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
883 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
884}
885
e2b5e7aa 886static void emit_add(int rs1,int rs2,int rt)
57871462 887{
888 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
889 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
890}
891
e2b5e7aa 892static void emit_adds(int rs1,int rs2,int rt)
57871462 893{
894 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
895 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
896}
897
e2b5e7aa 898static void emit_adcs(int rs1,int rs2,int rt)
57871462 899{
900 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
901 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
902}
903
e2b5e7aa 904static void emit_sbc(int rs1,int rs2,int rt)
57871462 905{
906 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
907 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
908}
909
e2b5e7aa 910static void emit_sbcs(int rs1,int rs2,int rt)
57871462 911{
912 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
913 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
914}
915
e2b5e7aa 916static void emit_neg(int rs, int rt)
57871462 917{
918 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
919 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
920}
921
e2b5e7aa 922static void emit_negs(int rs, int rt)
57871462 923{
924 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
925 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
926}
927
e2b5e7aa 928static void emit_sub(int rs1,int rs2,int rt)
57871462 929{
930 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
931 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
932}
933
e2b5e7aa 934static void emit_subs(int rs1,int rs2,int rt)
57871462 935{
936 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
937 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
938}
939
e2b5e7aa 940static void emit_zeroreg(int rt)
57871462 941{
942 assem_debug("mov %s,#0\n",regname[rt]);
943 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
944}
945
e2b5e7aa 946static void emit_loadlp(u_int imm,u_int rt)
790ee18e 947{
948 add_literal((int)out,imm);
949 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
950 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
951}
e2b5e7aa 952
953static void emit_movw(u_int imm,u_int rt)
790ee18e 954{
955 assert(imm<65536);
956 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
957 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
958}
e2b5e7aa 959
960static void emit_movt(u_int imm,u_int rt)
790ee18e 961{
962 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
963 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
964}
e2b5e7aa 965
966static void emit_movimm(u_int imm,u_int rt)
790ee18e 967{
968 u_int armval;
969 if(genimm(imm,&armval)) {
970 assem_debug("mov %s,#%d\n",regname[rt],imm);
971 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
972 }else if(genimm(~imm,&armval)) {
973 assem_debug("mvn %s,#%d\n",regname[rt],imm);
974 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
975 }else if(imm<65536) {
665f33e1 976 #ifndef HAVE_ARMV7
790ee18e 977 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
978 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
979 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
980 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
981 #else
982 emit_movw(imm,rt);
983 #endif
984 }else{
665f33e1 985 #ifndef HAVE_ARMV7
790ee18e 986 emit_loadlp(imm,rt);
987 #else
988 emit_movw(imm&0x0000FFFF,rt);
989 emit_movt(imm&0xFFFF0000,rt);
990 #endif
991 }
992}
e2b5e7aa 993
994static void emit_pcreladdr(u_int rt)
790ee18e 995{
996 assem_debug("add %s,pc,#?\n",regname[rt]);
997 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
998}
999
e2b5e7aa 1000static void emit_loadreg(int r, int hr)
57871462 1001{
3d624f89 1002 if(r&64) {
c43b5311 1003 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 1004 assert(0);
1005 return;
3d624f89 1006 }
57871462 1007 if((r&63)==0)
1008 emit_zeroreg(hr);
1009 else {
3d624f89 1010 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1011 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1012 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1013 if(r==CCREG) addr=(int)&cycle_count;
1014 if(r==CSREG) addr=(int)&Status;
1015 if(r==FSREG) addr=(int)&FCR31;
1016 if(r==INVCP) addr=(int)&invc_ptr;
1017 u_int offset = addr-(u_int)&dynarec_local;
1018 assert(offset<4096);
1019 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1020 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1021 }
1022}
e2b5e7aa 1023
1024static void emit_storereg(int r, int hr)
57871462 1025{
3d624f89 1026 if(r&64) {
c43b5311 1027 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 1028 assert(0);
1029 return;
3d624f89 1030 }
3d624f89 1031 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1032 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1033 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1034 if(r==CCREG) addr=(int)&cycle_count;
1035 if(r==FSREG) addr=(int)&FCR31;
1036 u_int offset = addr-(u_int)&dynarec_local;
1037 assert(offset<4096);
1038 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1039 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1040}
1041
e2b5e7aa 1042static void emit_test(int rs, int rt)
57871462 1043{
1044 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1045 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1046}
1047
e2b5e7aa 1048static void emit_testimm(int rs,int imm)
57871462 1049{
1050 u_int armval;
5a05d80c 1051 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1052 genimm_checked(imm,&armval);
57871462 1053 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1054}
1055
e2b5e7aa 1056static void emit_testeqimm(int rs,int imm)
b9b61529 1057{
1058 u_int armval;
1059 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1060 genimm_checked(imm,&armval);
b9b61529 1061 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1062}
1063
e2b5e7aa 1064static void emit_not(int rs,int rt)
57871462 1065{
1066 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1067 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1068}
1069
e2b5e7aa 1070static void emit_mvnmi(int rs,int rt)
b9b61529 1071{
1072 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1073 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1074}
1075
e2b5e7aa 1076static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 1077{
1078 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1079 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1080}
1081
e2b5e7aa 1082static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 1083{
1084 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1085 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1086}
e2b5e7aa 1087
1088static void emit_or_and_set_flags(int rs1,int rs2,int rt)
57871462 1089{
1090 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1091 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1092}
1093
e2b5e7aa 1094static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 1095{
1096 assert(rs<16);
1097 assert(rt<16);
1098 assert(imm<32);
1099 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1100 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1101}
1102
e2b5e7aa 1103static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 1104{
1105 assert(rs<16);
1106 assert(rt<16);
1107 assert(imm<32);
1108 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1109 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1110}
1111
e2b5e7aa 1112static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 1113{
1114 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1115 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1116}
1117
e2b5e7aa 1118static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 1119{
1120 assert(rs<16);
1121 assert(rt<16);
1122 if(imm!=0) {
57871462 1123 u_int armval;
1124 if(genimm(imm,&armval)) {
1125 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1126 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1127 }else if(genimm(-imm,&armval)) {
8a0a8423 1128 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1129 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 1130 #ifdef HAVE_ARMV7
1131 }else if(rt!=rs&&(u_int)imm<65536) {
1132 emit_movw(imm&0x0000ffff,rt);
1133 emit_add(rs,rt,rt);
1134 }else if(rt!=rs&&(u_int)-imm<65536) {
1135 emit_movw(-imm&0x0000ffff,rt);
1136 emit_sub(rs,rt,rt);
1137 #endif
1138 }else if((u_int)-imm<65536) {
57871462 1139 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1140 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1141 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1142 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 1143 }else {
1144 do {
1145 int shift = (ffs(imm) - 1) & ~1;
1146 int imm8 = imm & (0xff << shift);
1147 genimm_checked(imm8,&armval);
1148 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1149 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1150 rs = rt;
1151 imm &= ~imm8;
1152 }
1153 while (imm != 0);
57871462 1154 }
1155 }
1156 else if(rs!=rt) emit_mov(rs,rt);
1157}
1158
e2b5e7aa 1159static void emit_addimm_and_set_flags(int imm,int rt)
57871462 1160{
1161 assert(imm>-65536&&imm<65536);
1162 u_int armval;
1163 if(genimm(imm,&armval)) {
1164 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1165 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1166 }else if(genimm(-imm,&armval)) {
1167 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1168 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1169 }else if(imm<0) {
1170 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1171 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1172 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1173 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1174 }else{
1175 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1176 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1177 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1178 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1179 }
1180}
e2b5e7aa 1181
1182static void emit_addimm_no_flags(u_int imm,u_int rt)
57871462 1183{
1184 emit_addimm(rt,imm,rt);
1185}
1186
e2b5e7aa 1187static void emit_addnop(u_int r)
57871462 1188{
1189 assert(r<16);
1190 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1191 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1192}
1193
e2b5e7aa 1194static void emit_adcimm(u_int rs,int imm,u_int rt)
57871462 1195{
1196 u_int armval;
cfbd3c6e 1197 genimm_checked(imm,&armval);
57871462 1198 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1199 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1200}
1edfcc68 1201
e2b5e7aa 1202static void emit_rscimm(int rs,int imm,u_int rt)
57871462 1203{
1204 assert(0);
1205 u_int armval;
cfbd3c6e 1206 genimm_checked(imm,&armval);
57871462 1207 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1208 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1209}
1210
e2b5e7aa 1211static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
57871462 1212{
1213 // TODO: if(genimm(imm,&armval)) ...
1214 // else
1215 emit_movimm(imm,HOST_TEMPREG);
1216 emit_adds(HOST_TEMPREG,rsl,rtl);
1217 emit_adcimm(rsh,0,rth);
1218}
1219
e2b5e7aa 1220static void emit_andimm(int rs,int imm,int rt)
57871462 1221{
1222 u_int armval;
790ee18e 1223 if(imm==0) {
1224 emit_zeroreg(rt);
1225 }else if(genimm(imm,&armval)) {
57871462 1226 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1227 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1228 }else if(genimm(~imm,&armval)) {
1229 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1230 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1231 }else if(imm==65535) {
332a4533 1232 #ifndef HAVE_ARMV6
57871462 1233 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1234 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1235 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1236 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1237 #else
1238 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1239 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1240 #endif
1241 }else{
1242 assert(imm>0&&imm<65535);
665f33e1 1243 #ifndef HAVE_ARMV7
57871462 1244 assem_debug("mov r14,#%d\n",imm&0xFF00);
1245 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1246 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1247 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1248 #else
1249 emit_movw(imm,HOST_TEMPREG);
1250 #endif
1251 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1252 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1253 }
1254}
1255
e2b5e7aa 1256static void emit_orimm(int rs,int imm,int rt)
57871462 1257{
1258 u_int armval;
790ee18e 1259 if(imm==0) {
1260 if(rs!=rt) emit_mov(rs,rt);
1261 }else if(genimm(imm,&armval)) {
57871462 1262 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1263 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1264 }else{
1265 assert(imm>0&&imm<65536);
1266 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1267 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1268 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1269 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1270 }
1271}
1272
e2b5e7aa 1273static void emit_xorimm(int rs,int imm,int rt)
57871462 1274{
57871462 1275 u_int armval;
790ee18e 1276 if(imm==0) {
1277 if(rs!=rt) emit_mov(rs,rt);
1278 }else if(genimm(imm,&armval)) {
57871462 1279 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1280 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1281 }else{
514ed0d9 1282 assert(imm>0&&imm<65536);
57871462 1283 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1284 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1285 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1286 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1287 }
1288}
1289
e2b5e7aa 1290static void emit_shlimm(int rs,u_int imm,int rt)
57871462 1291{
1292 assert(imm>0);
1293 assert(imm<32);
1294 //if(imm==1) ...
1295 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1296 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1297}
1298
e2b5e7aa 1299static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 1300{
1301 assert(imm>0);
1302 assert(imm<32);
1303 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1304 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1305}
1306
e2b5e7aa 1307static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 1308{
1309 assert(imm>0);
1310 assert(imm<32);
1311 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1312 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1313}
1314
e2b5e7aa 1315static void emit_shrimm(int rs,u_int imm,int rt)
57871462 1316{
1317 assert(imm>0);
1318 assert(imm<32);
1319 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1320 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1321}
1322
e2b5e7aa 1323static void emit_sarimm(int rs,u_int imm,int rt)
57871462 1324{
1325 assert(imm>0);
1326 assert(imm<32);
1327 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1328 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1329}
1330
e2b5e7aa 1331static void emit_rorimm(int rs,u_int imm,int rt)
57871462 1332{
1333 assert(imm>0);
1334 assert(imm<32);
1335 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1336 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1337}
1338
e2b5e7aa 1339static void emit_shldimm(int rs,int rs2,u_int imm,int rt)
57871462 1340{
1341 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1342 assert(imm>0);
1343 assert(imm<32);
1344 //if(imm==1) ...
1345 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1346 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1347 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1348 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1349}
1350
e2b5e7aa 1351static void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
57871462 1352{
1353 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1354 assert(imm>0);
1355 assert(imm<32);
1356 //if(imm==1) ...
1357 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1358 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1359 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1360 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1361}
1362
e2b5e7aa 1363static void emit_signextend16(int rs,int rt)
b9b61529 1364{
332a4533 1365 #ifndef HAVE_ARMV6
b9b61529 1366 emit_shlimm(rs,16,rt);
1367 emit_sarimm(rt,16,rt);
1368 #else
1369 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1370 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1371 #endif
1372}
1373
e2b5e7aa 1374static void emit_signextend8(int rs,int rt)
c6c3b1b3 1375{
332a4533 1376 #ifndef HAVE_ARMV6
c6c3b1b3 1377 emit_shlimm(rs,24,rt);
1378 emit_sarimm(rt,24,rt);
1379 #else
1380 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1381 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1382 #endif
1383}
1384
e2b5e7aa 1385static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 1386{
1387 assert(rs<16);
1388 assert(rt<16);
1389 assert(shift<16);
1390 //if(imm==1) ...
1391 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1392 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1393}
e2b5e7aa 1394
1395static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 1396{
1397 assert(rs<16);
1398 assert(rt<16);
1399 assert(shift<16);
1400 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1401 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1402}
e2b5e7aa 1403
1404static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 1405{
1406 assert(rs<16);
1407 assert(rt<16);
1408 assert(shift<16);
1409 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1410 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1411}
57871462 1412
e2b5e7aa 1413static void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 1414{
1415 assert(rs<16);
1416 assert(rt<16);
1417 assert(shift<16);
1418 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1419 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1420}
e2b5e7aa 1421
1422static void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 1423{
1424 assert(rs<16);
1425 assert(rt<16);
1426 assert(shift<16);
1427 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1428 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1429}
1430
e2b5e7aa 1431static void emit_cmpimm(int rs,int imm)
57871462 1432{
1433 u_int armval;
1434 if(genimm(imm,&armval)) {
5a05d80c 1435 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1436 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1437 }else if(genimm(-imm,&armval)) {
5a05d80c 1438 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1439 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1440 }else if(imm>0) {
1441 assert(imm<65536);
57871462 1442 emit_movimm(imm,HOST_TEMPREG);
57871462 1443 assem_debug("cmp %s,r14\n",regname[rs]);
1444 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1445 }else{
1446 assert(imm>-65536);
57871462 1447 emit_movimm(-imm,HOST_TEMPREG);
57871462 1448 assem_debug("cmn %s,r14\n",regname[rs]);
1449 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1450 }
1451}
1452
e2b5e7aa 1453static void emit_cmovne_imm(int imm,int rt)
57871462 1454{
1455 assem_debug("movne %s,#%d\n",regname[rt],imm);
1456 u_int armval;
cfbd3c6e 1457 genimm_checked(imm,&armval);
57871462 1458 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1459}
e2b5e7aa 1460
1461static void emit_cmovl_imm(int imm,int rt)
57871462 1462{
1463 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1464 u_int armval;
cfbd3c6e 1465 genimm_checked(imm,&armval);
57871462 1466 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1467}
e2b5e7aa 1468
1469static void emit_cmovb_imm(int imm,int rt)
57871462 1470{
1471 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1472 u_int armval;
cfbd3c6e 1473 genimm_checked(imm,&armval);
57871462 1474 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1475}
e2b5e7aa 1476
1477static void emit_cmovs_imm(int imm,int rt)
57871462 1478{
1479 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1480 u_int armval;
cfbd3c6e 1481 genimm_checked(imm,&armval);
57871462 1482 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1483}
e2b5e7aa 1484
1485static void emit_cmove_reg(int rs,int rt)
57871462 1486{
1487 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1488 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1489}
e2b5e7aa 1490
1491static void emit_cmovne_reg(int rs,int rt)
57871462 1492{
1493 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1494 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1495}
e2b5e7aa 1496
1497static void emit_cmovl_reg(int rs,int rt)
57871462 1498{
1499 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1500 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1501}
e2b5e7aa 1502
1503static void emit_cmovs_reg(int rs,int rt)
57871462 1504{
1505 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1506 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1507}
1508
e2b5e7aa 1509static void emit_slti32(int rs,int imm,int rt)
57871462 1510{
1511 if(rs!=rt) emit_zeroreg(rt);
1512 emit_cmpimm(rs,imm);
1513 if(rs==rt) emit_movimm(0,rt);
1514 emit_cmovl_imm(1,rt);
1515}
e2b5e7aa 1516
1517static void emit_sltiu32(int rs,int imm,int rt)
57871462 1518{
1519 if(rs!=rt) emit_zeroreg(rt);
1520 emit_cmpimm(rs,imm);
1521 if(rs==rt) emit_movimm(0,rt);
1522 emit_cmovb_imm(1,rt);
1523}
e2b5e7aa 1524
1525static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
57871462 1526{
1527 assert(rsh!=rt);
1528 emit_slti32(rsl,imm,rt);
1529 if(imm>=0)
1530 {
1531 emit_test(rsh,rsh);
1532 emit_cmovne_imm(0,rt);
1533 emit_cmovs_imm(1,rt);
1534 }
1535 else
1536 {
1537 emit_cmpimm(rsh,-1);
1538 emit_cmovne_imm(0,rt);
1539 emit_cmovl_imm(1,rt);
1540 }
1541}
e2b5e7aa 1542
1543static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
57871462 1544{
1545 assert(rsh!=rt);
1546 emit_sltiu32(rsl,imm,rt);
1547 if(imm>=0)
1548 {
1549 emit_test(rsh,rsh);
1550 emit_cmovne_imm(0,rt);
1551 }
1552 else
1553 {
1554 emit_cmpimm(rsh,-1);
1555 emit_cmovne_imm(1,rt);
1556 }
1557}
1558
e2b5e7aa 1559static void emit_cmp(int rs,int rt)
57871462 1560{
1561 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1562 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1563}
e2b5e7aa 1564
1565static void emit_set_gz32(int rs, int rt)
57871462 1566{
1567 //assem_debug("set_gz32\n");
1568 emit_cmpimm(rs,1);
1569 emit_movimm(1,rt);
1570 emit_cmovl_imm(0,rt);
1571}
e2b5e7aa 1572
1573static void emit_set_nz32(int rs, int rt)
57871462 1574{
1575 //assem_debug("set_nz32\n");
1576 if(rs!=rt) emit_movs(rs,rt);
1577 else emit_test(rs,rs);
1578 emit_cmovne_imm(1,rt);
1579}
e2b5e7aa 1580
1581static void emit_set_gz64_32(int rsh, int rsl, int rt)
57871462 1582{
1583 //assem_debug("set_gz64\n");
1584 emit_set_gz32(rsl,rt);
1585 emit_test(rsh,rsh);
1586 emit_cmovne_imm(1,rt);
1587 emit_cmovs_imm(0,rt);
1588}
e2b5e7aa 1589
1590static void emit_set_nz64_32(int rsh, int rsl, int rt)
57871462 1591{
1592 //assem_debug("set_nz64\n");
1593 emit_or_and_set_flags(rsh,rsl,rt);
1594 emit_cmovne_imm(1,rt);
1595}
e2b5e7aa 1596
1597static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1598{
1599 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1600 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1601 emit_cmp(rs1,rs2);
1602 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1603 emit_cmovl_imm(1,rt);
1604}
e2b5e7aa 1605
1606static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1607{
1608 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1609 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1610 emit_cmp(rs1,rs2);
1611 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1612 emit_cmovb_imm(1,rt);
1613}
e2b5e7aa 1614
1615static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1616{
1617 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1618 assert(u1!=rt);
1619 assert(u2!=rt);
1620 emit_cmp(l1,l2);
1621 emit_movimm(0,rt);
1622 emit_sbcs(u1,u2,HOST_TEMPREG);
1623 emit_cmovl_imm(1,rt);
1624}
e2b5e7aa 1625
1626static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1627{
1628 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1629 assert(u1!=rt);
1630 assert(u2!=rt);
1631 emit_cmp(l1,l2);
1632 emit_movimm(0,rt);
1633 emit_sbcs(u1,u2,HOST_TEMPREG);
1634 emit_cmovb_imm(1,rt);
1635}
1636
e2b5e7aa 1637static void emit_call(int a)
57871462 1638{
1639 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1640 u_int offset=genjmp(a);
1641 output_w32(0xeb000000|offset);
1642}
e2b5e7aa 1643
1644static void emit_jmp(int a)
57871462 1645{
1646 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1647 u_int offset=genjmp(a);
1648 output_w32(0xea000000|offset);
1649}
e2b5e7aa 1650
1651static void emit_jne(int a)
57871462 1652{
1653 assem_debug("bne %x\n",a);
1654 u_int offset=genjmp(a);
1655 output_w32(0x1a000000|offset);
1656}
e2b5e7aa 1657
1658static void emit_jeq(int a)
57871462 1659{
1660 assem_debug("beq %x\n",a);
1661 u_int offset=genjmp(a);
1662 output_w32(0x0a000000|offset);
1663}
e2b5e7aa 1664
1665static void emit_js(int a)
57871462 1666{
1667 assem_debug("bmi %x\n",a);
1668 u_int offset=genjmp(a);
1669 output_w32(0x4a000000|offset);
1670}
e2b5e7aa 1671
1672static void emit_jns(int a)
57871462 1673{
1674 assem_debug("bpl %x\n",a);
1675 u_int offset=genjmp(a);
1676 output_w32(0x5a000000|offset);
1677}
e2b5e7aa 1678
1679static void emit_jl(int a)
57871462 1680{
1681 assem_debug("blt %x\n",a);
1682 u_int offset=genjmp(a);
1683 output_w32(0xba000000|offset);
1684}
e2b5e7aa 1685
1686static void emit_jge(int a)
57871462 1687{
1688 assem_debug("bge %x\n",a);
1689 u_int offset=genjmp(a);
1690 output_w32(0xaa000000|offset);
1691}
e2b5e7aa 1692
1693static void emit_jno(int a)
57871462 1694{
1695 assem_debug("bvc %x\n",a);
1696 u_int offset=genjmp(a);
1697 output_w32(0x7a000000|offset);
1698}
e2b5e7aa 1699
1700static void emit_jc(int a)
57871462 1701{
1702 assem_debug("bcs %x\n",a);
1703 u_int offset=genjmp(a);
1704 output_w32(0x2a000000|offset);
1705}
e2b5e7aa 1706
1707static void emit_jcc(int a)
57871462 1708{
1709 assem_debug("bcc %x\n",a);
1710 u_int offset=genjmp(a);
1711 output_w32(0x3a000000|offset);
1712}
1713
e2b5e7aa 1714static void emit_callreg(u_int r)
57871462 1715{
c6c3b1b3 1716 assert(r<15);
1717 assem_debug("blx %s\n",regname[r]);
1718 output_w32(0xe12fff30|r);
57871462 1719}
e2b5e7aa 1720
1721static void emit_jmpreg(u_int r)
57871462 1722{
1723 assem_debug("mov pc,%s\n",regname[r]);
1724 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1725}
1726
e2b5e7aa 1727static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1728{
1729 assert(offset>-4096&&offset<4096);
1730 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1731 if(offset>=0) {
1732 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1733 }else{
1734 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1735 }
1736}
e2b5e7aa 1737
1738static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1739{
1740 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1741 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1742}
e2b5e7aa 1743
1744static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1745{
1746 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1747 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1748}
e2b5e7aa 1749
1750static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1751{
1752 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1753 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1754}
e2b5e7aa 1755
1756static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1757{
1758 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1759 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1760}
e2b5e7aa 1761
1762static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1763{
1764 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1765 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1766}
e2b5e7aa 1767
1768static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1769{
1770 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1771 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1772}
e2b5e7aa 1773
1774static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1775{
1776 if(map<0) emit_readword_indexed(addr, rs, rt);
1777 else {
1778 assert(addr==0);
1779 emit_readword_dualindexedx4(rs, map, rt);
1780 }
1781}
e2b5e7aa 1782
1783static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
57871462 1784{
1785 if(map<0) {
1786 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1787 emit_readword_indexed(addr+4, rs, rl);
1788 }else{
1789 assert(rh!=rs);
1790 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1791 emit_addimm(map,1,map);
1792 emit_readword_indexed_tlb(addr, rs, map, rl);
1793 }
1794}
e2b5e7aa 1795
1796static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1797{
1798 assert(offset>-256&&offset<256);
1799 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1800 if(offset>=0) {
1801 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1802 }else{
1803 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1804 }
1805}
e2b5e7aa 1806
1807static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1808{
1809 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1810 else {
1811 if(addr==0) {
1812 emit_shlimm(map,2,map);
1813 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1814 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1815 }else{
1816 assert(addr>-256&&addr<256);
1817 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1818 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1819 emit_movsbl_indexed(addr, rt, rt);
1820 }
1821 }
1822}
e2b5e7aa 1823
1824static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1825{
1826 assert(offset>-256&&offset<256);
1827 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1828 if(offset>=0) {
1829 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1830 }else{
1831 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1832 }
1833}
e2b5e7aa 1834
1835static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1836{
1837 assert(offset>-4096&&offset<4096);
1838 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1839 if(offset>=0) {
1840 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1841 }else{
1842 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1843 }
1844}
e2b5e7aa 1845
1846static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
57871462 1847{
1848 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1849 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1850}
e2b5e7aa 1851
1852static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1853{
1854 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1855 else {
1856 if(addr==0) {
1857 emit_movzbl_dualindexedx4(rs, map, rt);
1858 }else{
1859 emit_addimm(rs,addr,rt);
1860 emit_movzbl_dualindexedx4(rt, map, rt);
1861 }
1862 }
1863}
e2b5e7aa 1864
1865static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1866{
1867 assert(offset>-256&&offset<256);
1868 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1869 if(offset>=0) {
1870 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1871 }else{
1872 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1873 }
1874}
e2b5e7aa 1875
054175e9 1876static void emit_ldrd(int offset, int rs, int rt)
1877{
1878 assert(offset>-256&&offset<256);
1879 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1880 if(offset>=0) {
1881 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1882 }else{
1883 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1884 }
1885}
e2b5e7aa 1886
1887static void emit_readword(int addr, int rt)
57871462 1888{
1889 u_int offset = addr-(u_int)&dynarec_local;
1890 assert(offset<4096);
1891 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1892 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1893}
e2b5e7aa 1894
1895static unused void emit_movsbl(int addr, int rt)
57871462 1896{
1897 u_int offset = addr-(u_int)&dynarec_local;
1898 assert(offset<256);
1899 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1900 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1901}
e2b5e7aa 1902
1903static unused void emit_movswl(int addr, int rt)
57871462 1904{
1905 u_int offset = addr-(u_int)&dynarec_local;
1906 assert(offset<256);
1907 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1908 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1909}
e2b5e7aa 1910
1911static unused void emit_movzbl(int addr, int rt)
57871462 1912{
1913 u_int offset = addr-(u_int)&dynarec_local;
1914 assert(offset<4096);
1915 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1916 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1917}
e2b5e7aa 1918
1919static unused void emit_movzwl(int addr, int rt)
57871462 1920{
1921 u_int offset = addr-(u_int)&dynarec_local;
1922 assert(offset<256);
1923 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1924 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1925}
57871462 1926
e2b5e7aa 1927static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1928{
1929 assert(offset>-4096&&offset<4096);
1930 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1931 if(offset>=0) {
1932 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1933 }else{
1934 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1935 }
1936}
e2b5e7aa 1937
1938static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
57871462 1939{
1940 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1941 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1942}
e2b5e7aa 1943
1944static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
57871462 1945{
1946 if(map<0) emit_writeword_indexed(rt, addr, rs);
1947 else {
1948 assert(addr==0);
1949 emit_writeword_dualindexedx4(rt, rs, map);
1950 }
1951}
e2b5e7aa 1952
1953static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
57871462 1954{
1955 if(map<0) {
1956 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1957 emit_writeword_indexed(rl, addr+4, rs);
1958 }else{
1959 assert(rh>=0);
1960 if(temp!=rs) emit_addimm(map,1,temp);
1961 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1962 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1963 else {
1964 emit_addimm(rs,4,rs);
1965 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1966 }
1967 }
1968}
e2b5e7aa 1969
1970static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1971{
1972 assert(offset>-256&&offset<256);
1973 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1974 if(offset>=0) {
1975 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1976 }else{
1977 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1978 }
1979}
e2b5e7aa 1980
1981static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1982{
1983 assert(offset>-4096&&offset<4096);
1984 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1985 if(offset>=0) {
1986 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1987 }else{
1988 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1989 }
1990}
e2b5e7aa 1991
1992static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
57871462 1993{
1994 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1995 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1996}
e2b5e7aa 1997
1998static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
57871462 1999{
2000 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2001 else {
2002 if(addr==0) {
2003 emit_writebyte_dualindexedx4(rt, rs, map);
2004 }else{
2005 emit_addimm(rs,addr,temp);
2006 emit_writebyte_dualindexedx4(rt, temp, map);
2007 }
2008 }
2009}
e2b5e7aa 2010
2011static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2012{
2013 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2014 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2015}
e2b5e7aa 2016
2017static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2018{
2019 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2020 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2021}
e2b5e7aa 2022
2023static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2024{
2025 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2026 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2027}
e2b5e7aa 2028
2029static void emit_writeword(int rt, int addr)
57871462 2030{
2031 u_int offset = addr-(u_int)&dynarec_local;
2032 assert(offset<4096);
2033 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2034 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2035}
e2b5e7aa 2036
2037static unused void emit_writehword(int rt, int addr)
57871462 2038{
2039 u_int offset = addr-(u_int)&dynarec_local;
2040 assert(offset<256);
2041 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2042 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2043}
e2b5e7aa 2044
2045static unused void emit_writebyte(int rt, int addr)
57871462 2046{
2047 u_int offset = addr-(u_int)&dynarec_local;
2048 assert(offset<4096);
74426039 2049 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2050 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2051}
57871462 2052
e2b5e7aa 2053static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 2054{
2055 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2056 assert(rs1<16);
2057 assert(rs2<16);
2058 assert(hi<16);
2059 assert(lo<16);
2060 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2061}
e2b5e7aa 2062
2063static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 2064{
2065 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2066 assert(rs1<16);
2067 assert(rs2<16);
2068 assert(hi<16);
2069 assert(lo<16);
2070 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2071}
2072
e2b5e7aa 2073static void emit_clz(int rs,int rt)
57871462 2074{
2075 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2076 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2077}
2078
e2b5e7aa 2079static void emit_subcs(int rs1,int rs2,int rt)
57871462 2080{
2081 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2082 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2083}
2084
e2b5e7aa 2085static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 2086{
2087 assert(imm>0);
2088 assert(imm<32);
2089 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2090 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2091}
2092
e2b5e7aa 2093static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 2094{
2095 assert(imm>0);
2096 assert(imm<32);
2097 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2098 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2099}
2100
e2b5e7aa 2101static void emit_negmi(int rs, int rt)
57871462 2102{
2103 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2104 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2105}
2106
e2b5e7aa 2107static void emit_negsmi(int rs, int rt)
57871462 2108{
2109 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2110 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2111}
2112
e2b5e7aa 2113static void emit_orreq(u_int rs1,u_int rs2,u_int rt)
57871462 2114{
2115 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2116 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2117}
2118
e2b5e7aa 2119static void emit_orrne(u_int rs1,u_int rs2,u_int rt)
57871462 2120{
2121 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2122 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2123}
2124
e2b5e7aa 2125static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2126{
2127 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2128 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2129}
2130
e2b5e7aa 2131static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2132{
2133 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2134 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2135}
2136
e2b5e7aa 2137static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2138{
2139 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2140 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2141}
2142
e2b5e7aa 2143static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2144{
2145 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2146 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2147}
2148
e2b5e7aa 2149static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2150{
2151 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2152 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2153}
2154
e2b5e7aa 2155static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2156{
2157 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2158 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2159}
2160
e2b5e7aa 2161static void emit_teq(int rs, int rt)
57871462 2162{
2163 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2164 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2165}
2166
e2b5e7aa 2167static void emit_rsbimm(int rs, int imm, int rt)
57871462 2168{
2169 u_int armval;
cfbd3c6e 2170 genimm_checked(imm,&armval);
57871462 2171 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2172 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2173}
2174
2175// Load 2 immediates optimizing for small code size
e2b5e7aa 2176static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
57871462 2177{
2178 emit_movimm(imm1,rt1);
2179 u_int armval;
2180 if(genimm(imm2-imm1,&armval)) {
2181 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2182 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2183 }else if(genimm(imm1-imm2,&armval)) {
2184 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2185 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2186 }
2187 else emit_movimm(imm2,rt2);
2188}
2189
2190// Conditionally select one of two immediates, optimizing for small code size
2191// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 2192static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 2193{
2194 u_int armval;
2195 if(genimm(imm2-imm1,&armval)) {
2196 emit_movimm(imm1,rt);
2197 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2198 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2199 }else if(genimm(imm1-imm2,&armval)) {
2200 emit_movimm(imm1,rt);
2201 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2202 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2203 }
2204 else {
665f33e1 2205 #ifndef HAVE_ARMV7
57871462 2206 emit_movimm(imm1,rt);
2207 add_literal((int)out,imm2);
2208 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2209 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2210 #else
2211 emit_movw(imm1&0x0000FFFF,rt);
2212 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2213 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2214 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2215 }
2216 emit_movt(imm1&0xFFFF0000,rt);
2217 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2218 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2219 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2220 }
2221 #endif
2222 }
2223}
2224
57871462 2225// special case for checking invalid_code
e2b5e7aa 2226static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 2227{
2228 assert(imm<128&&imm>=0);
2229 assert(r>=0&&r<16);
2230 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2231 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2232 emit_cmpimm(HOST_TEMPREG,imm);
2233}
2234
e2b5e7aa 2235static void emit_callne(int a)
0bbd1454 2236{
2237 assem_debug("blne %x\n",a);
2238 u_int offset=genjmp(a);
2239 output_w32(0x1b000000|offset);
2240}
2241
57871462 2242// Used to preload hash table entries
e2b5e7aa 2243static unused void emit_prefetchreg(int r)
57871462 2244{
2245 assem_debug("pld %s\n",regname[r]);
2246 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2247}
2248
2249// Special case for mini_ht
e2b5e7aa 2250static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 2251{
2252 assert(offset<4096);
2253 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2254 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2255}
2256
e2b5e7aa 2257static unused void emit_bicne_imm(int rs,int imm,int rt)
57871462 2258{
2259 u_int armval;
cfbd3c6e 2260 genimm_checked(imm,&armval);
57871462 2261 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2262 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2263}
2264
e2b5e7aa 2265static unused void emit_biccs_imm(int rs,int imm,int rt)
57871462 2266{
2267 u_int armval;
cfbd3c6e 2268 genimm_checked(imm,&armval);
57871462 2269 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2270 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2271}
2272
e2b5e7aa 2273static unused void emit_bicvc_imm(int rs,int imm,int rt)
57871462 2274{
2275 u_int armval;
cfbd3c6e 2276 genimm_checked(imm,&armval);
57871462 2277 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2278 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2279}
2280
e2b5e7aa 2281static unused void emit_bichi_imm(int rs,int imm,int rt)
57871462 2282{
2283 u_int armval;
cfbd3c6e 2284 genimm_checked(imm,&armval);
57871462 2285 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2286 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2287}
2288
e2b5e7aa 2289static unused void emit_orrvs_imm(int rs,int imm,int rt)
57871462 2290{
2291 u_int armval;
cfbd3c6e 2292 genimm_checked(imm,&armval);
57871462 2293 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2294 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2295}
2296
e2b5e7aa 2297static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 2298{
2299 u_int armval;
cfbd3c6e 2300 genimm_checked(imm,&armval);
b9b61529 2301 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2302 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2303}
2304
e2b5e7aa 2305static void emit_andne_imm(int rs,int imm,int rt)
b9b61529 2306{
2307 u_int armval;
cfbd3c6e 2308 genimm_checked(imm,&armval);
b9b61529 2309 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2310 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2311}
2312
e2b5e7aa 2313static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 2314{
2315 u_int armval;
2316 genimm_checked(imm,&armval);
2317 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2318 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2319}
2320
e2b5e7aa 2321static void emit_jno_unlikely(int a)
57871462 2322{
2323 //emit_jno(a);
2324 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2325 output_w32(0x72800000|rd_rn_rm(15,15,0));
2326}
2327
054175e9 2328static void save_regs_all(u_int reglist)
57871462 2329{
054175e9 2330 int i;
57871462 2331 if(!reglist) return;
2332 assem_debug("stmia fp,{");
054175e9 2333 for(i=0;i<16;i++)
2334 if(reglist&(1<<i))
2335 assem_debug("r%d,",i);
57871462 2336 assem_debug("}\n");
2337 output_w32(0xe88b0000|reglist);
2338}
e2b5e7aa 2339
054175e9 2340static void restore_regs_all(u_int reglist)
57871462 2341{
054175e9 2342 int i;
57871462 2343 if(!reglist) return;
2344 assem_debug("ldmia fp,{");
054175e9 2345 for(i=0;i<16;i++)
2346 if(reglist&(1<<i))
2347 assem_debug("r%d,",i);
57871462 2348 assem_debug("}\n");
2349 output_w32(0xe89b0000|reglist);
2350}
e2b5e7aa 2351
054175e9 2352// Save registers before function call
2353static void save_regs(u_int reglist)
2354{
4d646738 2355 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 2356 save_regs_all(reglist);
2357}
e2b5e7aa 2358
054175e9 2359// Restore registers after function call
2360static void restore_regs(u_int reglist)
2361{
4d646738 2362 reglist&=CALLER_SAVE_REGS;
054175e9 2363 restore_regs_all(reglist);
2364}
57871462 2365
57871462 2366/* Stubs/epilogue */
2367
e2b5e7aa 2368static void literal_pool(int n)
57871462 2369{
2370 if(!literalcount) return;
2371 if(n) {
2372 if((int)out-literals[0][0]<4096-n) return;
2373 }
2374 u_int *ptr;
2375 int i;
2376 for(i=0;i<literalcount;i++)
2377 {
77750690 2378 u_int l_addr=(u_int)out;
2379 int j;
2380 for(j=0;j<i;j++) {
2381 if(literals[j][1]==literals[i][1]) {
2382 //printf("dup %08x\n",literals[i][1]);
2383 l_addr=literals[j][0];
2384 break;
2385 }
2386 }
57871462 2387 ptr=(u_int *)literals[i][0];
77750690 2388 u_int offset=l_addr-(u_int)ptr-8;
57871462 2389 assert(offset<4096);
2390 assert(!(offset&3));
2391 *ptr|=offset;
77750690 2392 if(l_addr==(u_int)out) {
2393 literals[i][0]=l_addr; // remember for dupes
2394 output_w32(literals[i][1]);
2395 }
57871462 2396 }
2397 literalcount=0;
2398}
2399
e2b5e7aa 2400static void literal_pool_jumpover(int n)
57871462 2401{
2402 if(!literalcount) return;
2403 if(n) {
2404 if((int)out-literals[0][0]<4096-n) return;
2405 }
2406 int jaddr=(int)out;
2407 emit_jmp(0);
2408 literal_pool(0);
2409 set_jump_target(jaddr,(int)out);
2410}
2411
e2b5e7aa 2412static void emit_extjump2(u_int addr, int target, int linker)
57871462 2413{
2414 u_char *ptr=(u_char *)addr;
2415 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 2416 (void)ptr;
2417
57871462 2418 emit_loadlp(target,0);
2419 emit_loadlp(addr,1);
24385cae 2420 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2421 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2422//DEBUG >
2423#ifdef DEBUG_CYCLE_COUNT
2424 emit_readword((int)&last_count,ECX);
2425 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2426 emit_readword((int)&next_interupt,ECX);
2427 emit_writeword(HOST_CCREG,(int)&Count);
2428 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2429 emit_writeword(ECX,(int)&last_count);
2430#endif
2431//DEBUG <
2432 emit_jmp(linker);
2433}
2434
e2b5e7aa 2435static void emit_extjump(int addr, int target)
57871462 2436{
2437 emit_extjump2(addr, target, (int)dyna_linker);
2438}
e2b5e7aa 2439
2440static void emit_extjump_ds(int addr, int target)
57871462 2441{
2442 emit_extjump2(addr, target, (int)dyna_linker_ds);
2443}
2444
13e35c04 2445// put rt_val into rt, potentially making use of rs with value rs_val
2446static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2447{
8575a877 2448 u_int armval;
2449 int diff;
2450 if(genimm(rt_val,&armval)) {
2451 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2452 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2453 return;
2454 }
2455 if(genimm(~rt_val,&armval)) {
2456 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2457 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2458 return;
2459 }
2460 diff=rt_val-rs_val;
2461 if(genimm(diff,&armval)) {
2462 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2463 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2464 return;
2465 }else if(genimm(-diff,&armval)) {
2466 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2467 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2468 return;
2469 }
2470 emit_movimm(rt_val,rt);
2471}
2472
2473// return 1 if above function can do it's job cheaply
2474static int is_similar_value(u_int v1,u_int v2)
2475{
13e35c04 2476 u_int xs;
8575a877 2477 int diff;
2478 if(v1==v2) return 1;
2479 diff=v2-v1;
2480 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2481 ;
8575a877 2482 if(xs<0x100) return 1;
2483 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2484 ;
2485 if(xs<0x100) return 1;
2486 return 0;
13e35c04 2487}
cbbab9cd 2488
b96d3df7 2489// trashes r2
2490static void pass_args(int a0, int a1)
2491{
2492 if(a0==1&&a1==0) {
2493 // must swap
2494 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2495 }
2496 else if(a0!=0&&a1==0) {
2497 emit_mov(a1,1);
2498 if (a0>=0) emit_mov(a0,0);
2499 }
2500 else {
2501 if(a0>=0&&a0!=0) emit_mov(a0,0);
2502 if(a1>=0&&a1!=1) emit_mov(a1,1);
2503 }
2504}
2505
b1be1eee 2506static void mov_loadtype_adj(int type,int rs,int rt)
2507{
2508 switch(type) {
2509 case LOADB_STUB: emit_signextend8(rs,rt); break;
2510 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2511 case LOADH_STUB: emit_signextend16(rs,rt); break;
2512 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2513 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2514 default: assert(0);
2515 }
2516}
2517
b1be1eee 2518#include "pcsxmem.h"
2519#include "pcsxmem_inline.c"
b1be1eee 2520
e2b5e7aa 2521static void do_readstub(int n)
57871462 2522{
2523 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2524 literal_pool(256);
2525 set_jump_target(stubs[n][1],(int)out);
2526 int type=stubs[n][0];
2527 int i=stubs[n][3];
2528 int rs=stubs[n][4];
2529 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2530 u_int reglist=stubs[n][7];
2531 signed char *i_regmap=i_regs->regmap;
581335b0 2532 int rt;
b9b61529 2533 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2534 rt=get_reg(i_regmap,FTEMP);
2535 }else{
57871462 2536 rt=get_reg(i_regmap,rt1[i]);
2537 }
2538 assert(rs>=0);
c6c3b1b3 2539 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2540 reglist|=(1<<rs);
2541 for(r=0;r<=12;r++) {
2542 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2543 temp=r; break;
2544 }
2545 }
db829eeb 2546 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2547 reglist&=~(1<<rt);
2548 if(temp==-1) {
2549 save_regs(reglist);
2550 regs_saved=1;
2551 temp=(rs==0)?2:0;
2552 }
2553 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2554 temp2=1;
2555 emit_readword((int)&mem_rtab,temp);
2556 emit_shrimm(rs,12,temp2);
2557 emit_readword_dualindexedx4(temp,temp2,temp2);
2558 emit_lsls_imm(temp2,1,temp2);
2559 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2560 switch(type) {
2561 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2562 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2563 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2564 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2565 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2566 }
2567 }
2568 if(regs_saved) {
2569 restore_jump=(int)out;
2570 emit_jcc(0); // jump to reg restore
2571 }
2572 else
2573 emit_jcc(stubs[n][2]); // return address
2574
2575 if(!regs_saved)
2576 save_regs(reglist);
2577 int handler=0;
2578 if(type==LOADB_STUB||type==LOADBU_STUB)
2579 handler=(int)jump_handler_read8;
2580 if(type==LOADH_STUB||type==LOADHU_STUB)
2581 handler=(int)jump_handler_read16;
2582 if(type==LOADW_STUB)
2583 handler=(int)jump_handler_read32;
2584 assert(handler!=0);
b96d3df7 2585 pass_args(rs,temp2);
c6c3b1b3 2586 int cc=get_reg(i_regmap,CCREG);
2587 if(cc<0)
2588 emit_loadreg(CCREG,2);
2573466a 2589 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2590 emit_call(handler);
2591 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2592 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2593 }
2594 if(restore_jump)
2595 set_jump_target(restore_jump,(int)out);
2596 restore_regs(reglist);
2597 emit_jmp(stubs[n][2]); // return address
57871462 2598}
2599
c6c3b1b3 2600// return memhandler, or get directly accessable address and return 0
e2b5e7aa 2601static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
c6c3b1b3 2602{
2603 u_int l1,l2=0;
2604 l1=((u_int *)table)[addr>>12];
2605 if((l1&(1<<31))==0) {
2606 u_int v=l1<<1;
2607 *addr_host=v+addr;
2608 return 0;
2609 }
2610 else {
2611 l1<<=1;
2612 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2613 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2614 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2615 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2616 else
2617 l2=((u_int *)l1)[(addr&0xfff)/4];
2618 if((l2&(1<<31))==0) {
2619 u_int v=l2<<1;
2620 *addr_host=v+(addr&0xfff);
2621 return 0;
2622 }
2623 return l2<<1;
2624 }
2625}
c6c3b1b3 2626
e2b5e7aa 2627static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2628{
2629 int rs=get_reg(regmap,target);
57871462 2630 int rt=get_reg(regmap,target);
535d208a 2631 if(rs<0) rs=get_reg(regmap,-1);
57871462 2632 assert(rs>=0);
b1be1eee 2633 u_int handler,host_addr=0,is_dynamic,far_call=0;
2634 int cc=get_reg(regmap,CCREG);
2635 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2636 return;
c6c3b1b3 2637 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2638 if (handler==0) {
db829eeb 2639 if(rt<0||rt1[i]==0)
c6c3b1b3 2640 return;
13e35c04 2641 if(addr!=host_addr)
2642 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 2643 switch(type) {
2644 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2645 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2646 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2647 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2648 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2649 default: assert(0);
2650 }
2651 return;
2652 }
b1be1eee 2653 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2654 if(is_dynamic) {
2655 if(type==LOADB_STUB||type==LOADBU_STUB)
2656 handler=(int)jump_handler_read8;
2657 if(type==LOADH_STUB||type==LOADHU_STUB)
2658 handler=(int)jump_handler_read16;
2659 if(type==LOADW_STUB)
2660 handler=(int)jump_handler_read32;
2661 }
c6c3b1b3 2662
2663 // call a memhandler
db829eeb 2664 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2665 reglist&=~(1<<rt);
2666 save_regs(reglist);
2667 if(target==0)
2668 emit_movimm(addr,0);
2669 else if(rs!=0)
2670 emit_mov(rs,0);
c6c3b1b3 2671 int offset=(int)handler-(int)out-8;
2672 if(offset<-33554432||offset>=33554432) {
2673 // unreachable memhandler, a plugin func perhaps
b1be1eee 2674 emit_movimm(handler,12);
2675 far_call=1;
2676 }
2677 if(cc<0)
2678 emit_loadreg(CCREG,2);
2679 if(is_dynamic) {
2680 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2681 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 2682 }
b1be1eee 2683 else {
2684 emit_readword((int)&last_count,3);
2685 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2686 emit_add(2,3,2);
2687 emit_writeword(2,(int)&Count);
2688 }
2689
2690 if(far_call)
2691 emit_callreg(12);
c6c3b1b3 2692 else
2693 emit_call(handler);
b1be1eee 2694
db829eeb 2695 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 2696 switch(type) {
2697 case LOADB_STUB: emit_signextend8(0,rt); break;
2698 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2699 case LOADH_STUB: emit_signextend16(0,rt); break;
2700 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2701 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2702 default: assert(0);
2703 }
2704 }
2705 restore_regs(reglist);
57871462 2706}
2707
e2b5e7aa 2708static void do_writestub(int n)
57871462 2709{
2710 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2711 literal_pool(256);
2712 set_jump_target(stubs[n][1],(int)out);
2713 int type=stubs[n][0];
2714 int i=stubs[n][3];
2715 int rs=stubs[n][4];
2716 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2717 u_int reglist=stubs[n][7];
2718 signed char *i_regmap=i_regs->regmap;
581335b0 2719 int rt,r;
b9b61529 2720 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2721 rt=get_reg(i_regmap,r=FTEMP);
2722 }else{
57871462 2723 rt=get_reg(i_regmap,r=rs2[i]);
2724 }
2725 assert(rs>=0);
2726 assert(rt>=0);
b96d3df7 2727 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
2728 int reglist2=reglist|(1<<rs)|(1<<rt);
2729 for(rtmp=0;rtmp<=12;rtmp++) {
2730 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2731 temp=rtmp; break;
2732 }
2733 }
2734 if(temp==-1) {
2735 save_regs(reglist);
2736 regs_saved=1;
2737 for(rtmp=0;rtmp<=3;rtmp++)
2738 if(rtmp!=rs&&rtmp!=rt)
2739 {temp=rtmp;break;}
2740 }
2741 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2742 temp2=3;
2743 emit_readword((int)&mem_wtab,temp);
2744 emit_shrimm(rs,12,temp2);
2745 emit_readword_dualindexedx4(temp,temp2,temp2);
2746 emit_lsls_imm(temp2,1,temp2);
2747 switch(type) {
2748 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2749 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2750 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2751 default: assert(0);
2752 }
2753 if(regs_saved) {
2754 restore_jump=(int)out;
2755 emit_jcc(0); // jump to reg restore
2756 }
2757 else
2758 emit_jcc(stubs[n][2]); // return address (invcode check)
2759
2760 if(!regs_saved)
2761 save_regs(reglist);
2762 int handler=0;
2763 switch(type) {
2764 case STOREB_STUB: handler=(int)jump_handler_write8; break;
2765 case STOREH_STUB: handler=(int)jump_handler_write16; break;
2766 case STOREW_STUB: handler=(int)jump_handler_write32; break;
2767 }
2768 assert(handler!=0);
2769 pass_args(rs,rt);
2770 if(temp2!=3)
2771 emit_mov(temp2,3);
2772 int cc=get_reg(i_regmap,CCREG);
2773 if(cc<0)
2774 emit_loadreg(CCREG,2);
2573466a 2775 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 2776 // returns new cycle_count
2777 emit_call(handler);
2573466a 2778 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 2779 if(cc<0)
2780 emit_storereg(CCREG,2);
2781 if(restore_jump)
2782 set_jump_target(restore_jump,(int)out);
2783 restore_regs(reglist);
2784 ra=stubs[n][2];
b96d3df7 2785 emit_jmp(ra);
57871462 2786}
2787
e2b5e7aa 2788static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2789{
2790 int rs=get_reg(regmap,-1);
57871462 2791 int rt=get_reg(regmap,target);
2792 assert(rs>=0);
2793 assert(rt>=0);
b96d3df7 2794 u_int handler,host_addr=0;
b96d3df7 2795 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
2796 if (handler==0) {
13e35c04 2797 if(addr!=host_addr)
2798 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 2799 switch(type) {
2800 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2801 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2802 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2803 default: assert(0);
2804 }
2805 return;
2806 }
2807
2808 // call a memhandler
2809 save_regs(reglist);
13e35c04 2810 pass_args(rs,rt);
b96d3df7 2811 int cc=get_reg(regmap,CCREG);
2812 if(cc<0)
2813 emit_loadreg(CCREG,2);
2573466a 2814 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 2815 emit_movimm(handler,3);
2816 // returns new cycle_count
2817 emit_call((int)jump_handler_write_h);
2573466a 2818 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 2819 if(cc<0)
2820 emit_storereg(CCREG,2);
2821 restore_regs(reglist);
57871462 2822}
2823
e2b5e7aa 2824static void do_unalignedwritestub(int n)
57871462 2825{
b7918751 2826 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2827 literal_pool(256);
57871462 2828 set_jump_target(stubs[n][1],(int)out);
b7918751 2829
2830 int i=stubs[n][3];
2831 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2832 int addr=stubs[n][5];
2833 u_int reglist=stubs[n][7];
2834 signed char *i_regmap=i_regs->regmap;
2835 int temp2=get_reg(i_regmap,FTEMP);
2836 int rt;
b7918751 2837 rt=get_reg(i_regmap,rs2[i]);
2838 assert(rt>=0);
2839 assert(addr>=0);
2840 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2841 reglist|=(1<<addr);
2842 reglist&=~(1<<temp2);
2843
b96d3df7 2844#if 1
2845 // don't bother with it and call write handler
2846 save_regs(reglist);
2847 pass_args(addr,rt);
2848 int cc=get_reg(i_regmap,CCREG);
2849 if(cc<0)
2850 emit_loadreg(CCREG,2);
2573466a 2851 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 2852 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 2853 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 2854 if(cc<0)
2855 emit_storereg(CCREG,2);
2856 restore_regs(reglist);
2857 emit_jmp(stubs[n][2]); // return address
2858#else
b7918751 2859 emit_andimm(addr,0xfffffffc,temp2);
2860 emit_writeword(temp2,(int)&address);
2861
2862 save_regs(reglist);
b7918751 2863 emit_shrimm(addr,16,1);
2864 int cc=get_reg(i_regmap,CCREG);
2865 if(cc<0) {
2866 emit_loadreg(CCREG,2);
2867 }
2868 emit_movimm((u_int)readmem,0);
2869 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
b7918751 2870 emit_call((int)&indirect_jump_indexed);
2871 restore_regs(reglist);
2872
2873 emit_readword((int)&readmem_dword,temp2);
2874 int temp=addr; //hmh
2875 emit_shlimm(addr,3,temp);
2876 emit_andimm(temp,24,temp);
2877#ifdef BIG_ENDIAN_MIPS
2878 if (opcode[i]==0x2e) // SWR
2879#else
2880 if (opcode[i]==0x2a) // SWL
2881#endif
2882 emit_xorimm(temp,24,temp);
2883 emit_movimm(-1,HOST_TEMPREG);
55439448 2884 if (opcode[i]==0x2a) { // SWL
b7918751 2885 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2886 emit_orrshr(rt,temp,temp2);
2887 }else{
2888 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2889 emit_orrshl(rt,temp,temp2);
2890 }
2891 emit_readword((int)&address,addr);
2892 emit_writeword(temp2,(int)&word);
2893 //save_regs(reglist); // don't need to, no state changes
2894 emit_shrimm(addr,16,1);
2895 emit_movimm((u_int)writemem,0);
2896 //emit_call((int)&indirect_jump_indexed);
2897 emit_mov(15,14);
2898 emit_readword_dualindexedx4(0,1,15);
2899 emit_readword((int)&Count,HOST_TEMPREG);
2900 emit_readword((int)&next_interupt,2);
2901 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2902 emit_writeword(2,(int)&last_count);
2903 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2904 if(cc<0) {
2905 emit_storereg(CCREG,HOST_TEMPREG);
2906 }
2907 restore_regs(reglist);
57871462 2908 emit_jmp(stubs[n][2]); // return address
b96d3df7 2909#endif
57871462 2910}
2911
e2b5e7aa 2912static void do_invstub(int n)
57871462 2913{
2914 literal_pool(20);
2915 u_int reglist=stubs[n][3];
2916 set_jump_target(stubs[n][1],(int)out);
2917 save_regs(reglist);
2918 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2919 emit_call((int)&invalidate_addr);
2920 restore_regs(reglist);
2921 emit_jmp(stubs[n][2]); // return address
2922}
2923
2924int do_dirty_stub(int i)
2925{
2926 assem_debug("do_dirty_stub %x\n",start+i*4);
71e490c5 2927 u_int addr=(u_int)source;
57871462 2928 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2929 #ifndef HAVE_ARMV7
ac545b3a 2930 emit_loadlp(addr,1);
57871462 2931 emit_loadlp((int)copy,2);
2932 emit_loadlp(slen*4,3);
2933 #else
ac545b3a 2934 emit_movw(addr&0x0000FFFF,1);
57871462 2935 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 2936 emit_movt(addr&0xFFFF0000,1);
57871462 2937 emit_movt(((u_int)copy)&0xFFFF0000,2);
2938 emit_movw(slen*4,3);
2939 #endif
2940 emit_movimm(start+i*4,0);
2941 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2942 int entry=(int)out;
2943 load_regs_entry(i);
2944 if(entry==(int)out) entry=instr_addr[i];
2945 emit_jmp(instr_addr[i]);
2946 return entry;
2947}
2948
e2b5e7aa 2949static void do_dirty_stub_ds()
57871462 2950{
2951 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2952 #ifndef HAVE_ARMV7
57871462 2953 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2954 emit_loadlp((int)copy,2);
2955 emit_loadlp(slen*4,3);
2956 #else
2957 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2958 emit_movw(((u_int)copy)&0x0000FFFF,2);
2959 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2960 emit_movt(((u_int)copy)&0xFFFF0000,2);
2961 emit_movw(slen*4,3);
2962 #endif
2963 emit_movimm(start+1,0);
2964 emit_call((int)&verify_code_ds);
2965}
2966
e2b5e7aa 2967static void do_cop1stub(int n)
57871462 2968{
2969 literal_pool(256);
2970 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
2971 set_jump_target(stubs[n][1],(int)out);
2972 int i=stubs[n][3];
3d624f89 2973// int rs=stubs[n][4];
57871462 2974 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2975 int ds=stubs[n][6];
2976 if(!ds) {
2977 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2978 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2979 }
2980 //else {printf("fp exception in delay slot\n");}
2981 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2982 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2983 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 2984 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 2985 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
2986}
2987
57871462 2988/* Special assem */
2989
e2b5e7aa 2990static void shift_assemble_arm(int i,struct regstat *i_regs)
57871462 2991{
2992 if(rt1[i]) {
2993 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2994 {
2995 signed char s,t,shift;
2996 t=get_reg(i_regs->regmap,rt1[i]);
2997 s=get_reg(i_regs->regmap,rs1[i]);
2998 shift=get_reg(i_regs->regmap,rs2[i]);
2999 if(t>=0){
3000 if(rs1[i]==0)
3001 {
3002 emit_zeroreg(t);
3003 }
3004 else if(rs2[i]==0)
3005 {
3006 assert(s>=0);
3007 if(s!=t) emit_mov(s,t);
3008 }
3009 else
3010 {
3011 emit_andimm(shift,31,HOST_TEMPREG);
3012 if(opcode2[i]==4) // SLLV
3013 {
3014 emit_shl(s,HOST_TEMPREG,t);
3015 }
3016 if(opcode2[i]==6) // SRLV
3017 {
3018 emit_shr(s,HOST_TEMPREG,t);
3019 }
3020 if(opcode2[i]==7) // SRAV
3021 {
3022 emit_sar(s,HOST_TEMPREG,t);
3023 }
3024 }
3025 }
3026 } else { // DSLLV/DSRLV/DSRAV
3027 signed char sh,sl,th,tl,shift;
3028 th=get_reg(i_regs->regmap,rt1[i]|64);
3029 tl=get_reg(i_regs->regmap,rt1[i]);
3030 sh=get_reg(i_regs->regmap,rs1[i]|64);
3031 sl=get_reg(i_regs->regmap,rs1[i]);
3032 shift=get_reg(i_regs->regmap,rs2[i]);
3033 if(tl>=0){
3034 if(rs1[i]==0)
3035 {
3036 emit_zeroreg(tl);
3037 if(th>=0) emit_zeroreg(th);
3038 }
3039 else if(rs2[i]==0)
3040 {
3041 assert(sl>=0);
3042 if(sl!=tl) emit_mov(sl,tl);
3043 if(th>=0&&sh!=th) emit_mov(sh,th);
3044 }
3045 else
3046 {
3047 // FIXME: What if shift==tl ?
3048 assert(shift!=tl);
3049 int temp=get_reg(i_regs->regmap,-1);
3050 int real_th=th;
3051 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3052 assert(sl>=0);
3053 assert(sh>=0);
3054 emit_andimm(shift,31,HOST_TEMPREG);
3055 if(opcode2[i]==0x14) // DSLLV
3056 {
3057 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3058 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3059 emit_orrshr(sl,HOST_TEMPREG,th);
3060 emit_andimm(shift,31,HOST_TEMPREG);
3061 emit_testimm(shift,32);
3062 emit_shl(sl,HOST_TEMPREG,tl);
3063 if(th>=0) emit_cmovne_reg(tl,th);
3064 emit_cmovne_imm(0,tl);
3065 }
3066 if(opcode2[i]==0x16) // DSRLV
3067 {
3068 assert(th>=0);
3069 emit_shr(sl,HOST_TEMPREG,tl);
3070 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3071 emit_orrshl(sh,HOST_TEMPREG,tl);
3072 emit_andimm(shift,31,HOST_TEMPREG);
3073 emit_testimm(shift,32);
3074 emit_shr(sh,HOST_TEMPREG,th);
3075 emit_cmovne_reg(th,tl);
3076 if(real_th>=0) emit_cmovne_imm(0,th);
3077 }
3078 if(opcode2[i]==0x17) // DSRAV
3079 {
3080 assert(th>=0);
3081 emit_shr(sl,HOST_TEMPREG,tl);
3082 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3083 if(real_th>=0) {
3084 assert(temp>=0);
3085 emit_sarimm(th,31,temp);
3086 }
3087 emit_orrshl(sh,HOST_TEMPREG,tl);
3088 emit_andimm(shift,31,HOST_TEMPREG);
3089 emit_testimm(shift,32);
3090 emit_sar(sh,HOST_TEMPREG,th);
3091 emit_cmovne_reg(th,tl);
3092 if(real_th>=0) emit_cmovne_reg(temp,th);
3093 }
3094 }
3095 }
3096 }
3097 }
3098}
ffb0b9e0 3099
ffb0b9e0 3100static void speculate_mov(int rs,int rt)
3101{
3102 if(rt!=0) {
3103 smrv_strong_next|=1<<rt;
3104 smrv[rt]=smrv[rs];
3105 }
3106}
3107
3108static void speculate_mov_weak(int rs,int rt)
3109{
3110 if(rt!=0) {
3111 smrv_weak_next|=1<<rt;
3112 smrv[rt]=smrv[rs];
3113 }
3114}
3115
3116static void speculate_register_values(int i)
3117{
3118 if(i==0) {
3119 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3120 // gp,sp are likely to stay the same throughout the block
3121 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3122 smrv_weak_next=~smrv_strong_next;
3123 //printf(" llr %08x\n", smrv[4]);
3124 }
3125 smrv_strong=smrv_strong_next;
3126 smrv_weak=smrv_weak_next;
3127 switch(itype[i]) {
3128 case ALU:
3129 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3130 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3131 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3132 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3133 else {
3134 smrv_strong_next&=~(1<<rt1[i]);
3135 smrv_weak_next&=~(1<<rt1[i]);
3136 }
3137 break;
3138 case SHIFTIMM:
3139 smrv_strong_next&=~(1<<rt1[i]);
3140 smrv_weak_next&=~(1<<rt1[i]);
3141 // fallthrough
3142 case IMM16:
3143 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3144 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3145 if(hr>=0) {
3146 if(get_final_value(hr,i,&value))
3147 smrv[rt1[i]]=value;
3148 else smrv[rt1[i]]=constmap[i][hr];
3149 smrv_strong_next|=1<<rt1[i];
3150 }
3151 }
3152 else {
3153 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3154 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3155 }
3156 break;
3157 case LOAD:
3158 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3159 // special case for BIOS
3160 smrv[rt1[i]]=0xa0000000;
3161 smrv_strong_next|=1<<rt1[i];
3162 break;
3163 }
3164 // fallthrough
3165 case SHIFT:
3166 case LOADLR:
3167 case MOV:
3168 smrv_strong_next&=~(1<<rt1[i]);
3169 smrv_weak_next&=~(1<<rt1[i]);
3170 break;
3171 case COP0:
3172 case COP2:
3173 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3174 smrv_strong_next&=~(1<<rt1[i]);
3175 smrv_weak_next&=~(1<<rt1[i]);
3176 }
3177 break;
3178 case C2LS:
3179 if (opcode[i]==0x32) { // LWC2
3180 smrv_strong_next&=~(1<<rt1[i]);
3181 smrv_weak_next&=~(1<<rt1[i]);
3182 }
3183 break;
3184 }
3185#if 0
3186 int r=4;
3187 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3188 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3189#endif
3190}
3191
3192enum {
3193 MTYPE_8000 = 0,
3194 MTYPE_8020,
3195 MTYPE_0000,
3196 MTYPE_A000,
3197 MTYPE_1F80,
3198};
3199
3200static int get_ptr_mem_type(u_int a)
3201{
3202 if(a < 0x00200000) {
3203 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3204 // return wrong, must use memhandler for BIOS self-test to pass
3205 // 007 does similar stuff from a00 mirror, weird stuff
3206 return MTYPE_8000;
3207 return MTYPE_0000;
3208 }
3209 if(0x1f800000 <= a && a < 0x1f801000)
3210 return MTYPE_1F80;
3211 if(0x80200000 <= a && a < 0x80800000)
3212 return MTYPE_8020;
3213 if(0xa0000000 <= a && a < 0xa0200000)
3214 return MTYPE_A000;
3215 return MTYPE_8000;
3216}
ffb0b9e0 3217
3218static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3219{
581335b0 3220 int jaddr=0,type=0;
ffb0b9e0 3221 int mr=rs1[i];
3222 if(((smrv_strong|smrv_weak)>>mr)&1) {
3223 type=get_ptr_mem_type(smrv[mr]);
3224 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3225 }
3226 else {
3227 // use the mirror we are running on
3228 type=get_ptr_mem_type(start);
3229 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3230 }
3231
3232 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3233 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3234 addr=*addr_reg_override=HOST_TEMPREG;
3235 type=0;
3236 }
3237 else if(type==MTYPE_0000) { // RAM 0 mirror
3238 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3239 addr=*addr_reg_override=HOST_TEMPREG;
3240 type=0;
3241 }
3242 else if(type==MTYPE_A000) { // RAM A mirror
3243 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3244 addr=*addr_reg_override=HOST_TEMPREG;
3245 type=0;
3246 }
3247 else if(type==MTYPE_1F80) { // scratchpad
6d760c92 3248 if (psxH == (void *)0x1f800000) {
3249 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3250 emit_cmpimm(HOST_TEMPREG,0x1000);
3251 jaddr=(int)out;
3252 emit_jc(0);
3253 }
3254 else {
3255 // do usual RAM check, jump will go to the right handler
3256 type=0;
3257 }
ffb0b9e0 3258 }
ffb0b9e0 3259
3260 if(type==0)
3261 {
3262 emit_cmpimm(addr,RAM_SIZE);
3263 jaddr=(int)out;
3264 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3265 // Hint to branch predictor that the branch is unlikely to be taken
3266 if(rs1[i]>=28)
3267 emit_jno_unlikely(0);
3268 else
3269 #endif
3270 emit_jno(0);
a327ad27 3271 if(ram_offset!=0) {
3272 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3273 addr=*addr_reg_override=HOST_TEMPREG;
3274 }
ffb0b9e0 3275 }
3276
3277 return jaddr;
3278}
3279
57871462 3280#define shift_assemble shift_assemble_arm
3281
e2b5e7aa 3282static void loadlr_assemble_arm(int i,struct regstat *i_regs)
57871462 3283{
3284 int s,th,tl,temp,temp2,addr,map=-1;
3285 int offset;
3286 int jaddr=0;
af4ee1fe 3287 int memtarget=0,c=0;
ffb0b9e0 3288 int fastload_reg_override=0;
57871462 3289 u_int hr,reglist=0;
3290 th=get_reg(i_regs->regmap,rt1[i]|64);
3291 tl=get_reg(i_regs->regmap,rt1[i]);
3292 s=get_reg(i_regs->regmap,rs1[i]);
3293 temp=get_reg(i_regs->regmap,-1);
3294 temp2=get_reg(i_regs->regmap,FTEMP);
3295 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3296 assert(addr<0);
3297 offset=imm[i];
3298 for(hr=0;hr<HOST_REGS;hr++) {
3299 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3300 }
3301 reglist|=1<<temp;
3302 if(offset||s<0||c) addr=temp2;
3303 else addr=s;
3304 if(s>=0) {
3305 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3306 if(c) {
3307 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 3308 }
57871462 3309 }
1edfcc68 3310 if(!c) {
3311 #ifdef RAM_OFFSET
3312 map=get_reg(i_regs->regmap,ROREG);
3313 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3314 #endif
3315 emit_shlimm(addr,3,temp);
3316 if (opcode[i]==0x22||opcode[i]==0x26) {
3317 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3318 }else{
3319 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
535d208a 3320 }
1edfcc68 3321 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3322 }
3323 else {
3324 if(ram_offset&&memtarget) {
3325 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
3326 fastload_reg_override=HOST_TEMPREG;
57871462 3327 }
1edfcc68 3328 if (opcode[i]==0x22||opcode[i]==0x26) {
3329 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
535d208a 3330 }else{
1edfcc68 3331 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
535d208a 3332 }
535d208a 3333 }
3334 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3335 if(!c||memtarget) {
ffb0b9e0 3336 int a=temp2;
3337 if(fastload_reg_override) a=fastload_reg_override;
535d208a 3338 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 3339 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 3340 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3341 }
3342 else
3343 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3344 if(rt1[i]) {
3345 assert(tl>=0);
57871462 3346 emit_andimm(temp,24,temp);
2002a1db 3347#ifdef BIG_ENDIAN_MIPS
3348 if (opcode[i]==0x26) // LWR
3349#else
3350 if (opcode[i]==0x22) // LWL
3351#endif
3352 emit_xorimm(temp,24,temp);
57871462 3353 emit_movimm(-1,HOST_TEMPREG);
3354 if (opcode[i]==0x26) {
3355 emit_shr(temp2,temp,temp2);
3356 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3357 }else{
3358 emit_shl(temp2,temp,temp2);
3359 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3360 }
3361 emit_or(temp2,tl,tl);
57871462 3362 }
535d208a 3363 //emit_storereg(rt1[i],tl); // DEBUG
3364 }
3365 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 3366 // FIXME: little endian, fastload_reg_override
535d208a 3367 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3368 if(!c||memtarget) {
3369 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3370 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3371 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3372 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3373 }
3374 else
3375 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3376 if(rt1[i]) {
3377 assert(th>=0);
3378 assert(tl>=0);
57871462 3379 emit_testimm(temp,32);
3380 emit_andimm(temp,24,temp);
3381 if (opcode[i]==0x1A) { // LDL
3382 emit_rsbimm(temp,32,HOST_TEMPREG);
3383 emit_shl(temp2h,temp,temp2h);
3384 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3385 emit_movimm(-1,HOST_TEMPREG);
3386 emit_shl(temp2,temp,temp2);
3387 emit_cmove_reg(temp2h,th);
3388 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3389 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3390 emit_orreq(temp2,tl,tl);
3391 emit_orrne(temp2,th,th);
3392 }
3393 if (opcode[i]==0x1B) { // LDR
3394 emit_xorimm(temp,24,temp);
3395 emit_rsbimm(temp,32,HOST_TEMPREG);
3396 emit_shr(temp2,temp,temp2);
3397 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3398 emit_movimm(-1,HOST_TEMPREG);
3399 emit_shr(temp2h,temp,temp2h);
3400 emit_cmovne_reg(temp2,tl);
3401 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3402 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3403 emit_orrne(temp2h,th,th);
3404 emit_orreq(temp2h,tl,tl);
3405 }
3406 }
3407 }
3408}
3409#define loadlr_assemble loadlr_assemble_arm
3410
e2b5e7aa 3411static void cop0_assemble(int i,struct regstat *i_regs)
57871462 3412{
3413 if(opcode2[i]==0) // MFC0
3414 {
3415 signed char t=get_reg(i_regs->regmap,rt1[i]);
3416 char copr=(source[i]>>11)&0x1f;
3417 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3418 if(t>=0&&rt1[i]!=0) {
7139f3c8 3419 emit_readword((int)&reg_cop0+copr*4,t);
57871462 3420 }
3421 }
3422 else if(opcode2[i]==4) // MTC0
3423 {
3424 signed char s=get_reg(i_regs->regmap,rs1[i]);
3425 char copr=(source[i]>>11)&0x1f;
3426 assert(s>=0);
63cb0298 3427 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 3428 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 3429 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 3430 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 3431 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 3432 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 3433 emit_writeword(HOST_CCREG,(int)&Count);
3434 }
3435 // What a mess. The status register (12) can enable interrupts,
3436 // so needs a special case to handle a pending interrupt.
3437 // The interrupt must be taken immediately, because a subsequent
3438 // instruction might disable interrupts again.
7139f3c8 3439 if(copr==12||copr==13) {
fca1aef2 3440 if (is_delayslot) {
3441 // burn cycles to cause cc_interrupt, which will
3442 // reschedule next_interupt. Relies on CCREG from above.
3443 assem_debug("MTC0 DS %d\n", copr);
3444 emit_writeword(HOST_CCREG,(int)&last_count);
3445 emit_movimm(0,HOST_CCREG);
3446 emit_storereg(CCREG,HOST_CCREG);
caeefe31 3447 emit_loadreg(rs1[i],1);
fca1aef2 3448 emit_movimm(copr,0);
3449 emit_call((int)pcsx_mtc0_ds);
042c7287 3450 emit_loadreg(rs1[i],s);
fca1aef2 3451 return;
3452 }
63cb0298 3453 emit_movimm(start+i*4+4,HOST_TEMPREG);
3454 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
3455 emit_movimm(0,HOST_TEMPREG);
3456 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 3457 }
3458 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3459 //else
caeefe31 3460 if(s==HOST_CCREG)
3461 emit_loadreg(rs1[i],1);
3462 else if(s!=1)
63cb0298 3463 emit_mov(s,1);
fca1aef2 3464 emit_movimm(copr,0);
3465 emit_call((int)pcsx_mtc0);
7139f3c8 3466 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3467 emit_readword((int)&Count,HOST_CCREG);
042c7287 3468 emit_readword((int)&next_interupt,HOST_TEMPREG);
2573466a 3469 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 3470 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3471 emit_writeword(HOST_TEMPREG,(int)&last_count);
57871462 3472 emit_storereg(CCREG,HOST_CCREG);
3473 }
7139f3c8 3474 if(copr==12||copr==13) {
57871462 3475 assert(!is_delayslot);
3476 emit_readword((int)&pending_exception,14);
042c7287 3477 emit_test(14,14);
3478 emit_jne((int)&do_interrupt);
57871462 3479 }
3480 emit_loadreg(rs1[i],s);
3481 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3482 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 3483 cop1_usable=0;
3484 }
3485 else
3486 {
3487 assert(opcode2[i]==0x10);
576bbd8f 3488 if((source[i]&0x3f)==0x10) // RFE
3489 {
3490 emit_readword((int)&Status,0);
3491 emit_andimm(0,0x3c,1);
3492 emit_andimm(0,~0xf,0);
3493 emit_orrshr_imm(1,2,0);
3494 emit_writeword(0,(int)&Status);
3495 }
57871462 3496 }
3497}
3498
b9b61529 3499static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3500{
3501 switch (copr) {
3502 case 1:
3503 case 3:
3504 case 5:
3505 case 8:
3506 case 9:
3507 case 10:
3508 case 11:
3509 emit_readword((int)&reg_cop2d[copr],tl);
3510 emit_signextend16(tl,tl);
3511 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3512 break;
3513 case 7:
3514 case 16:
3515 case 17:
3516 case 18:
3517 case 19:
3518 emit_readword((int)&reg_cop2d[copr],tl);
3519 emit_andimm(tl,0xffff,tl);
3520 emit_writeword(tl,(int)&reg_cop2d[copr]);
3521 break;
3522 case 15:
3523 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3524 emit_writeword(tl,(int)&reg_cop2d[copr]);
3525 break;
3526 case 28:
b9b61529 3527 case 29:
3528 emit_readword((int)&reg_cop2d[9],temp);
3529 emit_testimm(temp,0x8000); // do we need this?
3530 emit_andimm(temp,0xf80,temp);
3531 emit_andne_imm(temp,0,temp);
f70d384d 3532 emit_shrimm(temp,7,tl);
b9b61529 3533 emit_readword((int)&reg_cop2d[10],temp);
3534 emit_testimm(temp,0x8000);
3535 emit_andimm(temp,0xf80,temp);
3536 emit_andne_imm(temp,0,temp);
f70d384d 3537 emit_orrshr_imm(temp,2,tl);
b9b61529 3538 emit_readword((int)&reg_cop2d[11],temp);
3539 emit_testimm(temp,0x8000);
3540 emit_andimm(temp,0xf80,temp);
3541 emit_andne_imm(temp,0,temp);
f70d384d 3542 emit_orrshl_imm(temp,3,tl);
b9b61529 3543 emit_writeword(tl,(int)&reg_cop2d[copr]);
3544 break;
3545 default:
3546 emit_readword((int)&reg_cop2d[copr],tl);
3547 break;
3548 }
3549}
3550
3551static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3552{
3553 switch (copr) {
3554 case 15:
3555 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3556 emit_writeword(sl,(int)&reg_cop2d[copr]);
3557 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3558 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3559 emit_writeword(sl,(int)&reg_cop2d[14]);
3560 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3561 break;
3562 case 28:
3563 emit_andimm(sl,0x001f,temp);
f70d384d 3564 emit_shlimm(temp,7,temp);
b9b61529 3565 emit_writeword(temp,(int)&reg_cop2d[9]);
3566 emit_andimm(sl,0x03e0,temp);
f70d384d 3567 emit_shlimm(temp,2,temp);
b9b61529 3568 emit_writeword(temp,(int)&reg_cop2d[10]);
3569 emit_andimm(sl,0x7c00,temp);
f70d384d 3570 emit_shrimm(temp,3,temp);
b9b61529 3571 emit_writeword(temp,(int)&reg_cop2d[11]);
3572 emit_writeword(sl,(int)&reg_cop2d[28]);
3573 break;
3574 case 30:
3575 emit_movs(sl,temp);
3576 emit_mvnmi(temp,temp);
665f33e1 3577#ifdef HAVE_ARMV5
b9b61529 3578 emit_clz(temp,temp);
665f33e1 3579#else
3580 emit_movs(temp,HOST_TEMPREG);
3581 emit_movimm(0,temp);
3582 emit_jeq((int)out+4*4);
3583 emit_addpl_imm(temp,1,temp);
3584 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3585 emit_jns((int)out-2*4);
3586#endif
b9b61529 3587 emit_writeword(sl,(int)&reg_cop2d[30]);
3588 emit_writeword(temp,(int)&reg_cop2d[31]);
3589 break;
b9b61529 3590 case 31:
3591 break;
3592 default:
3593 emit_writeword(sl,(int)&reg_cop2d[copr]);
3594 break;
3595 }
3596}
3597
e2b5e7aa 3598static void cop2_assemble(int i,struct regstat *i_regs)
b9b61529 3599{
3600 u_int copr=(source[i]>>11)&0x1f;
3601 signed char temp=get_reg(i_regs->regmap,-1);
3602 if (opcode2[i]==0) { // MFC2
3603 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3604 if(tl>=0&&rt1[i]!=0)
b9b61529 3605 cop2_get_dreg(copr,tl,temp);
3606 }
3607 else if (opcode2[i]==4) { // MTC2
3608 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3609 cop2_put_dreg(copr,sl,temp);
3610 }
3611 else if (opcode2[i]==2) // CFC2
3612 {
3613 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3614 if(tl>=0&&rt1[i]!=0)
b9b61529 3615 emit_readword((int)&reg_cop2c[copr],tl);
3616 }
3617 else if (opcode2[i]==6) // CTC2
3618 {
3619 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3620 switch(copr) {
3621 case 4:
3622 case 12:
3623 case 20:
3624 case 26:
3625 case 27:
3626 case 29:
3627 case 30:
3628 emit_signextend16(sl,temp);
3629 break;
3630 case 31:
3631 //value = value & 0x7ffff000;
3632 //if (value & 0x7f87e000) value |= 0x80000000;
3633 emit_shrimm(sl,12,temp);
3634 emit_shlimm(temp,12,temp);
3635 emit_testimm(temp,0x7f000000);
3636 emit_testeqimm(temp,0x00870000);
3637 emit_testeqimm(temp,0x0000e000);
3638 emit_orrne_imm(temp,0x80000000,temp);
3639 break;
3640 default:
3641 temp=sl;
3642 break;
3643 }
3644 emit_writeword(temp,(int)&reg_cop2c[copr]);
3645 assert(sl>=0);
3646 }
3647}
3648
054175e9 3649static void c2op_prologue(u_int op,u_int reglist)
3650{
3651 save_regs_all(reglist);
82ed88eb 3652#ifdef PCNT
3653 emit_movimm(op,0);
3654 emit_call((int)pcnt_gte_start);
3655#endif
054175e9 3656 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3657}
3658
3659static void c2op_epilogue(u_int op,u_int reglist)
3660{
82ed88eb 3661#ifdef PCNT
3662 emit_movimm(op,0);
3663 emit_call((int)pcnt_gte_end);
3664#endif
054175e9 3665 restore_regs_all(reglist);
3666}
3667
6c0eefaf 3668static void c2op_call_MACtoIR(int lm,int need_flags)
3669{
3670 if(need_flags)
3671 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
3672 else
3673 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
3674}
3675
3676static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3677{
3678 emit_call((int)func);
3679 // func is C code and trashes r0
3680 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3681 if(need_flags||need_ir)
3682 c2op_call_MACtoIR(lm,need_flags);
3683 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
3684}
3685
054175e9 3686static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 3687{
b9b61529 3688 u_int c2op=source[i]&0x3f;
6c0eefaf 3689 u_int hr,reglist_full=0,reglist;
054175e9 3690 int need_flags,need_ir;
b9b61529 3691 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 3692 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 3693 }
4d646738 3694 reglist=reglist_full&CALLER_SAVE_REGS;
b9b61529 3695
3696 if (gte_handlers[c2op]!=NULL) {
bedfea38 3697 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 3698 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 3699 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3700 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 3701 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3702 need_flags=0;
6c0eefaf 3703 int shift = (source[i] >> 19) & 1;
3704 int lm = (source[i] >> 10) & 1;
054175e9 3705 switch(c2op) {
19776aef 3706#ifndef DRC_DBG
054175e9 3707 case GTE_MVMVA: {
82336ba3 3708#ifdef HAVE_ARMV5
054175e9 3709 int v = (source[i] >> 15) & 3;
3710 int cv = (source[i] >> 13) & 3;
3711 int mx = (source[i] >> 17) & 3;
4d646738 3712 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
054175e9 3713 c2op_prologue(c2op,reglist);
3714 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3715 if(v<3)
3716 emit_ldrd(v*8,0,4);
3717 else {
3718 emit_movzwl_indexed(9*4,0,4); // gteIR
3719 emit_movzwl_indexed(10*4,0,6);
3720 emit_movzwl_indexed(11*4,0,5);
3721 emit_orrshl_imm(6,16,4);
3722 }
3723 if(mx<3)
3724 emit_addimm(0,32*4+mx*8*4,6);
3725 else
3726 emit_readword((int)&zeromem_ptr,6);
3727 if(cv<3)
3728 emit_addimm(0,32*4+(cv*8+5)*4,7);
3729 else
3730 emit_readword((int)&zeromem_ptr,7);
3731#ifdef __ARM_NEON__
3732 emit_movimm(source[i],1); // opcode
3733 emit_call((int)gteMVMVA_part_neon);
3734 if(need_flags) {
3735 emit_movimm(lm,1);
3736 emit_call((int)gteMACtoIR_flags_neon);
3737 }
3738#else
3739 if(cv==3&&shift)
3740 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3741 else {
3742 emit_movimm(shift,1);
3743 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3744 }
6c0eefaf 3745 if(need_flags||need_ir)
3746 c2op_call_MACtoIR(lm,need_flags);
82336ba3 3747#endif
3748#else /* if not HAVE_ARMV5 */
3749 c2op_prologue(c2op,reglist);
3750 emit_movimm(source[i],1); // opcode
3751 emit_writeword(1,(int)&psxRegs.code);
3752 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
054175e9 3753#endif
3754 break;
3755 }
6c0eefaf 3756 case GTE_OP:
3757 c2op_prologue(c2op,reglist);
3758 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
3759 if(need_flags||need_ir) {
3760 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3761 c2op_call_MACtoIR(lm,need_flags);
3762 }
3763 break;
3764 case GTE_DPCS:
3765 c2op_prologue(c2op,reglist);
3766 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3767 break;
3768 case GTE_INTPL:
3769 c2op_prologue(c2op,reglist);
3770 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3771 break;
3772 case GTE_SQR:
3773 c2op_prologue(c2op,reglist);
3774 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
3775 if(need_flags||need_ir) {
3776 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3777 c2op_call_MACtoIR(lm,need_flags);
3778 }
3779 break;
3780 case GTE_DCPL:
3781 c2op_prologue(c2op,reglist);
3782 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3783 break;
3784 case GTE_GPF:
3785 c2op_prologue(c2op,reglist);
3786 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3787 break;
3788 case GTE_GPL:
3789 c2op_prologue(c2op,reglist);
3790 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3791 break;
19776aef 3792#endif
054175e9 3793 default:
054175e9 3794 c2op_prologue(c2op,reglist);
19776aef 3795#ifdef DRC_DBG
3796 emit_movimm(source[i],1); // opcode
3797 emit_writeword(1,(int)&psxRegs.code);
3798#endif
054175e9 3799 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3800 break;
3801 }
3802 c2op_epilogue(c2op,reglist);
3803 }
b9b61529 3804}
3805
e2b5e7aa 3806static void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3807{
3808 // XXX: should just just do the exception instead
3809 if(!cop1_usable) {
3810 int jaddr=(int)out;
3811 emit_jmp(0);
3812 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3813 cop1_usable=1;
3814 }
3815}
3816
e2b5e7aa 3817static void cop1_assemble(int i,struct regstat *i_regs)
57871462 3818{
3d624f89 3819 cop1_unusable(i, i_regs);
57871462 3820}
3821
e2b5e7aa 3822static void fconv_assemble_arm(int i,struct regstat *i_regs)
57871462 3823{
3d624f89 3824 cop1_unusable(i, i_regs);
57871462 3825}
3826#define fconv_assemble fconv_assemble_arm
3827
e2b5e7aa 3828static void fcomp_assemble(int i,struct regstat *i_regs)
57871462 3829{
3d624f89 3830 cop1_unusable(i, i_regs);
57871462 3831}
3832
e2b5e7aa 3833static void float_assemble(int i,struct regstat *i_regs)
57871462 3834{
3d624f89 3835 cop1_unusable(i, i_regs);
57871462 3836}
3837
e2b5e7aa 3838static void multdiv_assemble_arm(int i,struct regstat *i_regs)
57871462 3839{
3840 // case 0x18: MULT
3841 // case 0x19: MULTU
3842 // case 0x1A: DIV
3843 // case 0x1B: DIVU
3844 // case 0x1C: DMULT
3845 // case 0x1D: DMULTU
3846 // case 0x1E: DDIV
3847 // case 0x1F: DDIVU
3848 if(rs1[i]&&rs2[i])
3849 {
3850 if((opcode2[i]&4)==0) // 32-bit
3851 {
3852 if(opcode2[i]==0x18) // MULT
3853 {
3854 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3855 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3856 signed char hi=get_reg(i_regs->regmap,HIREG);
3857 signed char lo=get_reg(i_regs->regmap,LOREG);
3858 assert(m1>=0);
3859 assert(m2>=0);
3860 assert(hi>=0);
3861 assert(lo>=0);
3862 emit_smull(m1,m2,hi,lo);
3863 }
3864 if(opcode2[i]==0x19) // MULTU
3865 {
3866 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3867 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3868 signed char hi=get_reg(i_regs->regmap,HIREG);
3869 signed char lo=get_reg(i_regs->regmap,LOREG);
3870 assert(m1>=0);
3871 assert(m2>=0);
3872 assert(hi>=0);
3873 assert(lo>=0);
3874 emit_umull(m1,m2,hi,lo);
3875 }
3876 if(opcode2[i]==0x1A) // DIV
3877 {
3878 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3879 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3880 assert(d1>=0);
3881 assert(d2>=0);
3882 signed char quotient=get_reg(i_regs->regmap,LOREG);
3883 signed char remainder=get_reg(i_regs->regmap,HIREG);
3884 assert(quotient>=0);
3885 assert(remainder>=0);
3886 emit_movs(d1,remainder);
44a80f6a 3887 emit_movimm(0xffffffff,quotient);
3888 emit_negmi(quotient,quotient); // .. quotient and ..
3889 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 3890 emit_movs(d2,HOST_TEMPREG);
3891 emit_jeq((int)out+52); // Division by zero
82336ba3 3892 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 3893#ifdef HAVE_ARMV5
57871462 3894 emit_clz(HOST_TEMPREG,quotient);
3895 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 3896#else
3897 emit_movimm(0,quotient);
3898 emit_addpl_imm(quotient,1,quotient);
3899 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3900 emit_jns((int)out-2*4);
3901#endif
57871462 3902 emit_orimm(quotient,1<<31,quotient);
3903 emit_shr(quotient,quotient,quotient);
3904 emit_cmp(remainder,HOST_TEMPREG);
3905 emit_subcs(remainder,HOST_TEMPREG,remainder);
3906 emit_adcs(quotient,quotient,quotient);
3907 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
3908 emit_jcc((int)out-16); // -4
3909 emit_teq(d1,d2);
3910 emit_negmi(quotient,quotient);
3911 emit_test(d1,d1);
3912 emit_negmi(remainder,remainder);
3913 }
3914 if(opcode2[i]==0x1B) // DIVU
3915 {
3916 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3917 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3918 assert(d1>=0);
3919 assert(d2>=0);
3920 signed char quotient=get_reg(i_regs->regmap,LOREG);
3921 signed char remainder=get_reg(i_regs->regmap,HIREG);
3922 assert(quotient>=0);
3923 assert(remainder>=0);
44a80f6a 3924 emit_mov(d1,remainder);
3925 emit_movimm(0xffffffff,quotient); // div0 case
57871462 3926 emit_test(d2,d2);
44a80f6a 3927 emit_jeq((int)out+40); // Division by zero
665f33e1 3928#ifdef HAVE_ARMV5
57871462 3929 emit_clz(d2,HOST_TEMPREG);
3930 emit_movimm(1<<31,quotient);
3931 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 3932#else
3933 emit_movimm(0,HOST_TEMPREG);
82336ba3 3934 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3935 emit_lslpls_imm(d2,1,d2);
665f33e1 3936 emit_jns((int)out-2*4);
3937 emit_movimm(1<<31,quotient);
3938#endif
57871462 3939 emit_shr(quotient,HOST_TEMPREG,quotient);
3940 emit_cmp(remainder,d2);
3941 emit_subcs(remainder,d2,remainder);
3942 emit_adcs(quotient,quotient,quotient);
3943 emit_shrcc_imm(d2,1,d2);
3944 emit_jcc((int)out-16); // -4
3945 }
3946 }
3947 else // 64-bit
71e490c5 3948 assert(0);
57871462 3949 }
3950 else
3951 {
3952 // Multiply by zero is zero.
3953 // MIPS does not have a divide by zero exception.
3954 // The result is undefined, we return zero.
3955 signed char hr=get_reg(i_regs->regmap,HIREG);
3956 signed char lr=get_reg(i_regs->regmap,LOREG);
3957 if(hr>=0) emit_zeroreg(hr);
3958 if(lr>=0) emit_zeroreg(lr);
3959 }
3960}
3961#define multdiv_assemble multdiv_assemble_arm
3962
e2b5e7aa 3963static void do_preload_rhash(int r) {
57871462 3964 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
3965 // register. On ARM the hash can be done with a single instruction (below)
3966}
3967
e2b5e7aa 3968static void do_preload_rhtbl(int ht) {
57871462 3969 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
3970}
3971
e2b5e7aa 3972static void do_rhash(int rs,int rh) {
57871462 3973 emit_andimm(rs,0xf8,rh);
3974}
3975
e2b5e7aa 3976static void do_miniht_load(int ht,int rh) {
57871462 3977 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
3978 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
3979}
3980
e2b5e7aa 3981static void do_miniht_jump(int rs,int rh,int ht) {
57871462 3982 emit_cmp(rh,rs);
3983 emit_ldreq_indexed(ht,4,15);
3984 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3985 emit_mov(rs,7);
3986 emit_jmp(jump_vaddr_reg[7]);
3987 #else
3988 emit_jmp(jump_vaddr_reg[rs]);
3989 #endif
3990}
3991
e2b5e7aa 3992static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 3993 #ifndef HAVE_ARMV7
57871462 3994 emit_movimm(return_address,rt); // PC into link register
3995 add_to_linker((int)out,return_address,1);
3996 emit_pcreladdr(temp);
3997 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
3998 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
3999 #else
4000 emit_movw(return_address&0x0000FFFF,rt);
4001 add_to_linker((int)out,return_address,1);
4002 emit_pcreladdr(temp);
4003 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4004 emit_movt(return_address&0xFFFF0000,rt);
4005 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4006 #endif
4007}
4008
e2b5e7aa 4009static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
57871462 4010{
4011 //if(dirty_pre==dirty) return;
581335b0 4012 int hr,reg;
57871462 4013 for(hr=0;hr<HOST_REGS;hr++) {
4014 if(hr!=EXCLUDE_REG) {
4015 reg=pre[hr];
4016 if(((~u)>>(reg&63))&1) {
f776eb14 4017 if(reg>0) {
57871462 4018 if(((dirty_pre&~dirty)>>hr)&1) {
4019 if(reg>0&&reg<34) {
4020 emit_storereg(reg,hr);
4021 if( ((is32_pre&~uu)>>reg)&1 ) {
4022 emit_sarimm(hr,31,HOST_TEMPREG);
4023 emit_storereg(reg|64,HOST_TEMPREG);
4024 }
4025 }
4026 else if(reg>=64) {
4027 emit_storereg(reg,hr);
4028 }
4029 }
4030 }
57871462 4031 }
4032 }
4033 }
4034}
4035
4036
4037/* using strd could possibly help but you'd have to allocate registers in pairs
e2b5e7aa 4038static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
57871462 4039{
4040 int hr;
4041 int wrote=-1;
4042 for(hr=HOST_REGS-1;hr>=0;hr--) {
4043 if(hr!=EXCLUDE_REG) {
4044 if(pre[hr]!=entry[hr]) {
4045 if(pre[hr]>=0) {
4046 if((dirty>>hr)&1) {
4047 if(get_reg(entry,pre[hr])<0) {
4048 if(pre[hr]<64) {
4049 if(!((u>>pre[hr])&1)) {
4050 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4051 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4052 emit_sarimm(hr,31,hr+1);
4053 emit_strdreg(pre[hr],hr);
4054 }
4055 else
4056 emit_storereg(pre[hr],hr);
4057 }else{
4058 emit_storereg(pre[hr],hr);
4059 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4060 emit_sarimm(hr,31,hr);
4061 emit_storereg(pre[hr]|64,hr);
4062 }
4063 }
4064 }
4065 }else{
4066 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4067 emit_storereg(pre[hr],hr);
4068 }
4069 }
4070 wrote=hr;
4071 }
4072 }
4073 }
4074 }
4075 }
4076 }
4077 for(hr=0;hr<HOST_REGS;hr++) {
4078 if(hr!=EXCLUDE_REG) {
4079 if(pre[hr]!=entry[hr]) {
4080 if(pre[hr]>=0) {
4081 int nr;
4082 if((nr=get_reg(entry,pre[hr]))>=0) {
4083 emit_mov(hr,nr);
4084 }
4085 }
4086 }
4087 }
4088 }
4089}
4090#define wb_invalidate wb_invalidate_arm
4091*/
4092
d148d265 4093static void mark_clear_cache(void *target)
4094{
4095 u_long offset = (char *)target - (char *)BASE_ADDR;
4096 u_int mask = 1u << ((offset >> 12) & 31);
4097 if (!(needs_clear_cache[offset >> 17] & mask)) {
4098 char *start = (char *)((u_long)target & ~4095ul);
4099 start_tcache_write(start, start + 4096);
4100 needs_clear_cache[offset >> 17] |= mask;
4101 }
4102}
4103
dd3a91a1 4104// Clearing the cache is rather slow on ARM Linux, so mark the areas
4105// that need to be cleared, and then only clear these areas once.
e2b5e7aa 4106static void do_clear_cache()
dd3a91a1 4107{
4108 int i,j;
4109 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4110 {
4111 u_int bitmap=needs_clear_cache[i];
4112 if(bitmap) {
4113 u_int start,end;
9f51b4b9 4114 for(j=0;j<32;j++)
dd3a91a1 4115 {
4116 if(bitmap&(1<<j)) {
bdeade46 4117 start=(u_int)BASE_ADDR+i*131072+j*4096;
dd3a91a1 4118 end=start+4095;
4119 j++;
4120 while(j<32) {
4121 if(bitmap&(1<<j)) {
4122 end+=4096;
4123 j++;
4124 }else{
d148d265 4125 end_tcache_write((void *)start,(void *)end);
dd3a91a1 4126 break;
4127 }
4128 }
4129 }
4130 }
4131 needs_clear_cache[i]=0;
4132 }
4133 }
4134}
4135
57871462 4136// CPU-architecture-specific initialization
71e490c5 4137static void arch_init() {
57871462 4138}
b9b61529 4139
4140// vim:shiftwidth=2:expandtab