some cleanup to reduce confusion
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
054175e9 26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
665f33e1 29#include "arm_features.h"
054175e9 30
1e212a25 31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33char *translation_cache;
34#else
bdeade46 35char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
4d646738 38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
e2b5e7aa 44#define unused __attribute__((unused))
45
57871462 46extern int cycle_count;
47extern int last_count;
48extern int pcaddr;
49extern int pending_exception;
50extern int branch_target;
51extern uint64_t readmem_dword;
57871462 52extern void *dynarec_local;
57871462 53extern u_int mini_ht[32][2];
57871462 54
55void indirect_jump_indexed();
56void indirect_jump();
57void do_interrupt();
58void jump_vaddr_r0();
59void jump_vaddr_r1();
60void jump_vaddr_r2();
61void jump_vaddr_r3();
62void jump_vaddr_r4();
63void jump_vaddr_r5();
64void jump_vaddr_r6();
65void jump_vaddr_r7();
66void jump_vaddr_r8();
67void jump_vaddr_r9();
68void jump_vaddr_r10();
69void jump_vaddr_r12();
70
71const u_int jump_vaddr_reg[16] = {
72 (int)jump_vaddr_r0,
73 (int)jump_vaddr_r1,
74 (int)jump_vaddr_r2,
75 (int)jump_vaddr_r3,
76 (int)jump_vaddr_r4,
77 (int)jump_vaddr_r5,
78 (int)jump_vaddr_r6,
79 (int)jump_vaddr_r7,
80 (int)jump_vaddr_r8,
81 (int)jump_vaddr_r9,
82 (int)jump_vaddr_r10,
83 0,
84 (int)jump_vaddr_r12,
85 0,
86 0,
87 0};
88
0bbd1454 89void invalidate_addr_r0();
90void invalidate_addr_r1();
91void invalidate_addr_r2();
92void invalidate_addr_r3();
93void invalidate_addr_r4();
94void invalidate_addr_r5();
95void invalidate_addr_r6();
96void invalidate_addr_r7();
97void invalidate_addr_r8();
98void invalidate_addr_r9();
99void invalidate_addr_r10();
100void invalidate_addr_r12();
101
102const u_int invalidate_addr_reg[16] = {
103 (int)invalidate_addr_r0,
104 (int)invalidate_addr_r1,
105 (int)invalidate_addr_r2,
106 (int)invalidate_addr_r3,
107 (int)invalidate_addr_r4,
108 (int)invalidate_addr_r5,
109 (int)invalidate_addr_r6,
110 (int)invalidate_addr_r7,
111 (int)invalidate_addr_r8,
112 (int)invalidate_addr_r9,
113 (int)invalidate_addr_r10,
114 0,
115 (int)invalidate_addr_r12,
116 0,
117 0,
118 0};
119
d148d265 120static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
dd3a91a1 121
57871462 122/* Linker */
123
e2b5e7aa 124static void set_jump_target(int addr,u_int target)
57871462 125{
126 u_char *ptr=(u_char *)addr;
127 u_int *ptr2=(u_int *)ptr;
128 if(ptr[3]==0xe2) {
129 assert((target-(u_int)ptr2-8)<1024);
130 assert((addr&3)==0);
131 assert((target&3)==0);
132 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
133 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
134 }
135 else if(ptr[3]==0x72) {
136 // generated by emit_jno_unlikely
137 if((target-(u_int)ptr2-8)<1024) {
138 assert((addr&3)==0);
139 assert((target&3)==0);
140 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
141 }
142 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
143 assert((addr&3)==0);
144 assert((target&3)==0);
145 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
146 }
147 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
148 }
149 else {
150 assert((ptr[3]&0x0e)==0xa);
151 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
152 }
153}
154
155// This optionally copies the instruction from the target of the branch into
156// the space before the branch. Works, but the difference in speed is
157// usually insignificant.
e2b5e7aa 158#if 0
159static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 160{
161 u_char *ptr=(u_char *)addr;
162 u_int *ptr2=(u_int *)ptr;
163 assert(!copy||ptr2[-1]==0xe28dd000);
164 if(ptr[3]==0xe2) {
165 assert(!copy);
166 assert((target-(u_int)ptr2-8)<4096);
167 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
168 }
169 else {
170 assert((ptr[3]&0x0e)==0xa);
171 u_int target_insn=*(u_int *)target;
172 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
173 copy=0;
174 }
175 if((target_insn&0x0c100000)==0x04100000) { // Load
176 copy=0;
177 }
178 if(target_insn&0x08000000) {
179 copy=0;
180 }
181 if(copy) {
182 ptr2[-1]=target_insn;
183 target+=4;
184 }
185 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
186 }
187}
e2b5e7aa 188#endif
57871462 189
190/* Literal pool */
e2b5e7aa 191static void add_literal(int addr,int val)
57871462 192{
15776b68 193 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 194 literals[literalcount][0]=addr;
195 literals[literalcount][1]=val;
9f51b4b9 196 literalcount++;
197}
57871462 198
d148d265 199// from a pointer to external jump stub (which was produced by emit_extjump2)
200// find where the jumping insn is
201static void *find_extjump_insn(void *stub)
57871462 202{
203 int *ptr=(int *)(stub+4);
d148d265 204 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 205 u_int offset=*ptr&0xfff;
d148d265 206 void **l_ptr=(void *)ptr+offset+8;
207 return *l_ptr;
57871462 208}
209
f968d35d 210// find where external branch is liked to using addr of it's stub:
211// get address that insn one after stub loads (dyna_linker arg1),
212// treat it as a pointer to branch insn,
213// return addr where that branch jumps to
e2b5e7aa 214static int get_pointer(void *stub)
57871462 215{
216 //printf("get_pointer(%x)\n",(int)stub);
d148d265 217 int *i_ptr=find_extjump_insn(stub);
57871462 218 assert((*i_ptr&0x0f000000)==0x0a000000);
219 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
220}
221
222// Find the "clean" entry point from a "dirty" entry point
223// by skipping past the call to verify_code
e2b5e7aa 224static u_int get_clean_addr(int addr)
57871462 225{
226 int *ptr=(int *)addr;
665f33e1 227 #ifndef HAVE_ARMV7
57871462 228 ptr+=4;
229 #else
230 ptr+=6;
231 #endif
232 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
233 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
234 ptr++;
235 if((*ptr&0xFF000000)==0xea000000) {
236 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
237 }
238 return (u_int)ptr;
239}
240
e2b5e7aa 241static int verify_dirty(u_int *ptr)
57871462 242{
665f33e1 243 #ifndef HAVE_ARMV7
16c8be17 244 u_int offset;
57871462 245 // get from literal pool
15776b68 246 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 247 offset=*ptr&0xfff;
248 u_int source=*(u_int*)((void *)ptr+offset+8);
249 ptr++;
250 assert((*ptr&0xFFFF0000)==0xe59f0000);
251 offset=*ptr&0xfff;
252 u_int copy=*(u_int*)((void *)ptr+offset+8);
253 ptr++;
254 assert((*ptr&0xFFFF0000)==0xe59f0000);
255 offset=*ptr&0xfff;
256 u_int len=*(u_int*)((void *)ptr+offset+8);
257 ptr++;
258 ptr++;
57871462 259 #else
260 // ARMv7 movw/movt
261 assert((*ptr&0xFFF00000)==0xe3000000);
262 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
263 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
264 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
265 ptr+=6;
266 #endif
267 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
268 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 269 //printf("verify_dirty: %x %x %x\n",source,copy,len);
270 return !memcmp((void *)source,(void *)copy,len);
271}
272
273// This doesn't necessarily find all clean entry points, just
274// guarantees that it's not dirty
e2b5e7aa 275static int isclean(int addr)
57871462 276{
665f33e1 277 #ifndef HAVE_ARMV7
581335b0 278 u_int *ptr=((u_int *)addr)+4;
57871462 279 #else
581335b0 280 u_int *ptr=((u_int *)addr)+6;
57871462 281 #endif
282 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
283 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
284 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
285 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
286 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
287 return 1;
288}
289
4a35de07 290// get source that block at addr was compiled from (host pointers)
e2b5e7aa 291static void get_bounds(int addr,u_int *start,u_int *end)
57871462 292{
293 u_int *ptr=(u_int *)addr;
665f33e1 294 #ifndef HAVE_ARMV7
16c8be17 295 u_int offset;
57871462 296 // get from literal pool
15776b68 297 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 298 offset=*ptr&0xfff;
299 u_int source=*(u_int*)((void *)ptr+offset+8);
300 ptr++;
301 //assert((*ptr&0xFFFF0000)==0xe59f0000);
302 //offset=*ptr&0xfff;
303 //u_int copy=*(u_int*)((void *)ptr+offset+8);
304 ptr++;
305 assert((*ptr&0xFFFF0000)==0xe59f0000);
306 offset=*ptr&0xfff;
307 u_int len=*(u_int*)((void *)ptr+offset+8);
308 ptr++;
309 ptr++;
57871462 310 #else
311 // ARMv7 movw/movt
312 assert((*ptr&0xFFF00000)==0xe3000000);
313 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
314 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
315 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
316 ptr+=6;
317 #endif
318 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
319 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 320 *start=source;
321 *end=source+len;
322}
323
324/* Register allocation */
325
326// Note: registers are allocated clean (unmodified state)
327// if you intend to modify the register, you must call dirty_reg().
e2b5e7aa 328static void alloc_reg(struct regstat *cur,int i,signed char reg)
57871462 329{
330 int r,hr;
331 int preferred_reg = (reg&7);
332 if(reg==CCREG) preferred_reg=HOST_CCREG;
333 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
9f51b4b9 334
57871462 335 // Don't allocate unused registers
336 if((cur->u>>reg)&1) return;
9f51b4b9 337
57871462 338 // see if it's already allocated
339 for(hr=0;hr<HOST_REGS;hr++)
340 {
341 if(cur->regmap[hr]==reg) return;
342 }
9f51b4b9 343
57871462 344 // Keep the same mapping if the register was already allocated in a loop
345 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 346
57871462 347 // Try to allocate the preferred register
348 if(cur->regmap[preferred_reg]==-1) {
349 cur->regmap[preferred_reg]=reg;
350 cur->dirty&=~(1<<preferred_reg);
351 cur->isconst&=~(1<<preferred_reg);
352 return;
353 }
354 r=cur->regmap[preferred_reg];
355 if(r<64&&((cur->u>>r)&1)) {
356 cur->regmap[preferred_reg]=reg;
357 cur->dirty&=~(1<<preferred_reg);
358 cur->isconst&=~(1<<preferred_reg);
359 return;
360 }
361 if(r>=64&&((cur->uu>>(r&63))&1)) {
362 cur->regmap[preferred_reg]=reg;
363 cur->dirty&=~(1<<preferred_reg);
364 cur->isconst&=~(1<<preferred_reg);
365 return;
366 }
9f51b4b9 367
57871462 368 // Clear any unneeded registers
369 // We try to keep the mapping consistent, if possible, because it
370 // makes branches easier (especially loops). So we try to allocate
371 // first (see above) before removing old mappings. If this is not
372 // possible then go ahead and clear out the registers that are no
373 // longer needed.
374 for(hr=0;hr<HOST_REGS;hr++)
375 {
376 r=cur->regmap[hr];
377 if(r>=0) {
378 if(r<64) {
379 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
380 }
381 else
382 {
383 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
384 }
385 }
386 }
387 // Try to allocate any available register, but prefer
388 // registers that have not been used recently.
389 if(i>0) {
390 for(hr=0;hr<HOST_REGS;hr++) {
391 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
392 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
393 cur->regmap[hr]=reg;
394 cur->dirty&=~(1<<hr);
395 cur->isconst&=~(1<<hr);
396 return;
397 }
398 }
399 }
400 }
401 // Try to allocate any available register
402 for(hr=0;hr<HOST_REGS;hr++) {
403 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
404 cur->regmap[hr]=reg;
405 cur->dirty&=~(1<<hr);
406 cur->isconst&=~(1<<hr);
407 return;
408 }
409 }
9f51b4b9 410
57871462 411 // Ok, now we have to evict someone
412 // Pick a register we hopefully won't need soon
413 u_char hsn[MAXREG+1];
414 memset(hsn,10,sizeof(hsn));
415 int j;
416 lsn(hsn,i,&preferred_reg);
417 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
418 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
419 if(i>0) {
420 // Don't evict the cycle count at entry points, otherwise the entry
421 // stub will have to write it.
422 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
423 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
424 for(j=10;j>=3;j--)
425 {
426 // Alloc preferred register if available
427 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
428 for(hr=0;hr<HOST_REGS;hr++) {
429 // Evict both parts of a 64-bit register
430 if((cur->regmap[hr]&63)==r) {
431 cur->regmap[hr]=-1;
432 cur->dirty&=~(1<<hr);
433 cur->isconst&=~(1<<hr);
434 }
435 }
436 cur->regmap[preferred_reg]=reg;
437 return;
438 }
439 for(r=1;r<=MAXREG;r++)
440 {
441 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
442 for(hr=0;hr<HOST_REGS;hr++) {
443 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
444 if(cur->regmap[hr]==r+64) {
445 cur->regmap[hr]=reg;
446 cur->dirty&=~(1<<hr);
447 cur->isconst&=~(1<<hr);
448 return;
449 }
450 }
451 }
452 for(hr=0;hr<HOST_REGS;hr++) {
453 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
454 if(cur->regmap[hr]==r) {
455 cur->regmap[hr]=reg;
456 cur->dirty&=~(1<<hr);
457 cur->isconst&=~(1<<hr);
458 return;
459 }
460 }
461 }
462 }
463 }
464 }
465 }
466 for(j=10;j>=0;j--)
467 {
468 for(r=1;r<=MAXREG;r++)
469 {
470 if(hsn[r]==j) {
471 for(hr=0;hr<HOST_REGS;hr++) {
472 if(cur->regmap[hr]==r+64) {
473 cur->regmap[hr]=reg;
474 cur->dirty&=~(1<<hr);
475 cur->isconst&=~(1<<hr);
476 return;
477 }
478 }
479 for(hr=0;hr<HOST_REGS;hr++) {
480 if(cur->regmap[hr]==r) {
481 cur->regmap[hr]=reg;
482 cur->dirty&=~(1<<hr);
483 cur->isconst&=~(1<<hr);
484 return;
485 }
486 }
487 }
488 }
489 }
c43b5311 490 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
57871462 491}
492
e2b5e7aa 493static void alloc_reg64(struct regstat *cur,int i,signed char reg)
57871462 494{
495 int preferred_reg = 8+(reg&1);
496 int r,hr;
9f51b4b9 497
57871462 498 // allocate the lower 32 bits
499 alloc_reg(cur,i,reg);
9f51b4b9 500
57871462 501 // Don't allocate unused registers
502 if((cur->uu>>reg)&1) return;
9f51b4b9 503
57871462 504 // see if the upper half is already allocated
505 for(hr=0;hr<HOST_REGS;hr++)
506 {
507 if(cur->regmap[hr]==reg+64) return;
508 }
9f51b4b9 509
57871462 510 // Keep the same mapping if the register was already allocated in a loop
511 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 512
57871462 513 // Try to allocate the preferred register
514 if(cur->regmap[preferred_reg]==-1) {
515 cur->regmap[preferred_reg]=reg|64;
516 cur->dirty&=~(1<<preferred_reg);
517 cur->isconst&=~(1<<preferred_reg);
518 return;
519 }
520 r=cur->regmap[preferred_reg];
521 if(r<64&&((cur->u>>r)&1)) {
522 cur->regmap[preferred_reg]=reg|64;
523 cur->dirty&=~(1<<preferred_reg);
524 cur->isconst&=~(1<<preferred_reg);
525 return;
526 }
527 if(r>=64&&((cur->uu>>(r&63))&1)) {
528 cur->regmap[preferred_reg]=reg|64;
529 cur->dirty&=~(1<<preferred_reg);
530 cur->isconst&=~(1<<preferred_reg);
531 return;
532 }
9f51b4b9 533
57871462 534 // Clear any unneeded registers
535 // We try to keep the mapping consistent, if possible, because it
536 // makes branches easier (especially loops). So we try to allocate
537 // first (see above) before removing old mappings. If this is not
538 // possible then go ahead and clear out the registers that are no
539 // longer needed.
540 for(hr=HOST_REGS-1;hr>=0;hr--)
541 {
542 r=cur->regmap[hr];
543 if(r>=0) {
544 if(r<64) {
545 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
546 }
547 else
548 {
549 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
550 }
551 }
552 }
553 // Try to allocate any available register, but prefer
554 // registers that have not been used recently.
555 if(i>0) {
556 for(hr=0;hr<HOST_REGS;hr++) {
557 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
558 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
559 cur->regmap[hr]=reg|64;
560 cur->dirty&=~(1<<hr);
561 cur->isconst&=~(1<<hr);
562 return;
563 }
564 }
565 }
566 }
567 // Try to allocate any available register
568 for(hr=0;hr<HOST_REGS;hr++) {
569 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
570 cur->regmap[hr]=reg|64;
571 cur->dirty&=~(1<<hr);
572 cur->isconst&=~(1<<hr);
573 return;
574 }
575 }
9f51b4b9 576
57871462 577 // Ok, now we have to evict someone
578 // Pick a register we hopefully won't need soon
579 u_char hsn[MAXREG+1];
580 memset(hsn,10,sizeof(hsn));
581 int j;
582 lsn(hsn,i,&preferred_reg);
583 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
584 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
585 if(i>0) {
586 // Don't evict the cycle count at entry points, otherwise the entry
587 // stub will have to write it.
588 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
589 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
590 for(j=10;j>=3;j--)
591 {
592 // Alloc preferred register if available
593 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
594 for(hr=0;hr<HOST_REGS;hr++) {
595 // Evict both parts of a 64-bit register
596 if((cur->regmap[hr]&63)==r) {
597 cur->regmap[hr]=-1;
598 cur->dirty&=~(1<<hr);
599 cur->isconst&=~(1<<hr);
600 }
601 }
602 cur->regmap[preferred_reg]=reg|64;
603 return;
604 }
605 for(r=1;r<=MAXREG;r++)
606 {
607 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
608 for(hr=0;hr<HOST_REGS;hr++) {
609 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
610 if(cur->regmap[hr]==r+64) {
611 cur->regmap[hr]=reg|64;
612 cur->dirty&=~(1<<hr);
613 cur->isconst&=~(1<<hr);
614 return;
615 }
616 }
617 }
618 for(hr=0;hr<HOST_REGS;hr++) {
619 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
620 if(cur->regmap[hr]==r) {
621 cur->regmap[hr]=reg|64;
622 cur->dirty&=~(1<<hr);
623 cur->isconst&=~(1<<hr);
624 return;
625 }
626 }
627 }
628 }
629 }
630 }
631 }
632 for(j=10;j>=0;j--)
633 {
634 for(r=1;r<=MAXREG;r++)
635 {
636 if(hsn[r]==j) {
637 for(hr=0;hr<HOST_REGS;hr++) {
638 if(cur->regmap[hr]==r+64) {
639 cur->regmap[hr]=reg|64;
640 cur->dirty&=~(1<<hr);
641 cur->isconst&=~(1<<hr);
642 return;
643 }
644 }
645 for(hr=0;hr<HOST_REGS;hr++) {
646 if(cur->regmap[hr]==r) {
647 cur->regmap[hr]=reg|64;
648 cur->dirty&=~(1<<hr);
649 cur->isconst&=~(1<<hr);
650 return;
651 }
652 }
653 }
654 }
655 }
c43b5311 656 SysPrintf("This shouldn't happen");exit(1);
57871462 657}
658
659// Allocate a temporary register. This is done without regard to
660// dirty status or whether the register we request is on the unneeded list
661// Note: This will only allocate one register, even if called multiple times
e2b5e7aa 662static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
57871462 663{
664 int r,hr;
665 int preferred_reg = -1;
9f51b4b9 666
57871462 667 // see if it's already allocated
668 for(hr=0;hr<HOST_REGS;hr++)
669 {
670 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
671 }
9f51b4b9 672
57871462 673 // Try to allocate any available register
674 for(hr=HOST_REGS-1;hr>=0;hr--) {
675 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
676 cur->regmap[hr]=reg;
677 cur->dirty&=~(1<<hr);
678 cur->isconst&=~(1<<hr);
679 return;
680 }
681 }
9f51b4b9 682
57871462 683 // Find an unneeded register
684 for(hr=HOST_REGS-1;hr>=0;hr--)
685 {
686 r=cur->regmap[hr];
687 if(r>=0) {
688 if(r<64) {
689 if((cur->u>>r)&1) {
690 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
691 cur->regmap[hr]=reg;
692 cur->dirty&=~(1<<hr);
693 cur->isconst&=~(1<<hr);
694 return;
695 }
696 }
697 }
698 else
699 {
700 if((cur->uu>>(r&63))&1) {
701 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
702 cur->regmap[hr]=reg;
703 cur->dirty&=~(1<<hr);
704 cur->isconst&=~(1<<hr);
705 return;
706 }
707 }
708 }
709 }
710 }
9f51b4b9 711
57871462 712 // Ok, now we have to evict someone
713 // Pick a register we hopefully won't need soon
714 // TODO: we might want to follow unconditional jumps here
715 // TODO: get rid of dupe code and make this into a function
716 u_char hsn[MAXREG+1];
717 memset(hsn,10,sizeof(hsn));
718 int j;
719 lsn(hsn,i,&preferred_reg);
720 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
721 if(i>0) {
722 // Don't evict the cycle count at entry points, otherwise the entry
723 // stub will have to write it.
724 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
725 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
726 for(j=10;j>=3;j--)
727 {
728 for(r=1;r<=MAXREG;r++)
729 {
730 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
731 for(hr=0;hr<HOST_REGS;hr++) {
732 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
733 if(cur->regmap[hr]==r+64) {
734 cur->regmap[hr]=reg;
735 cur->dirty&=~(1<<hr);
736 cur->isconst&=~(1<<hr);
737 return;
738 }
739 }
740 }
741 for(hr=0;hr<HOST_REGS;hr++) {
742 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
743 if(cur->regmap[hr]==r) {
744 cur->regmap[hr]=reg;
745 cur->dirty&=~(1<<hr);
746 cur->isconst&=~(1<<hr);
747 return;
748 }
749 }
750 }
751 }
752 }
753 }
754 }
755 for(j=10;j>=0;j--)
756 {
757 for(r=1;r<=MAXREG;r++)
758 {
759 if(hsn[r]==j) {
760 for(hr=0;hr<HOST_REGS;hr++) {
761 if(cur->regmap[hr]==r+64) {
762 cur->regmap[hr]=reg;
763 cur->dirty&=~(1<<hr);
764 cur->isconst&=~(1<<hr);
765 return;
766 }
767 }
768 for(hr=0;hr<HOST_REGS;hr++) {
769 if(cur->regmap[hr]==r) {
770 cur->regmap[hr]=reg;
771 cur->dirty&=~(1<<hr);
772 cur->isconst&=~(1<<hr);
773 return;
774 }
775 }
776 }
777 }
778 }
c43b5311 779 SysPrintf("This shouldn't happen");exit(1);
57871462 780}
e2b5e7aa 781
57871462 782// Allocate a specific ARM register.
e2b5e7aa 783static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 784{
785 int n;
f776eb14 786 int dirty=0;
9f51b4b9 787
57871462 788 // see if it's already allocated (and dealloc it)
789 for(n=0;n<HOST_REGS;n++)
790 {
f776eb14 791 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
792 dirty=(cur->dirty>>n)&1;
793 cur->regmap[n]=-1;
794 }
57871462 795 }
9f51b4b9 796
57871462 797 cur->regmap[hr]=reg;
798 cur->dirty&=~(1<<hr);
f776eb14 799 cur->dirty|=dirty<<hr;
57871462 800 cur->isconst&=~(1<<hr);
801}
802
803// Alloc cycle count into dedicated register
e2b5e7aa 804static void alloc_cc(struct regstat *cur,int i)
57871462 805{
806 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
807}
808
809/* Special alloc */
810
811
812/* Assembler */
813
e2b5e7aa 814static unused char regname[16][4] = {
57871462 815 "r0",
816 "r1",
817 "r2",
818 "r3",
819 "r4",
820 "r5",
821 "r6",
822 "r7",
823 "r8",
824 "r9",
825 "r10",
826 "fp",
827 "r12",
828 "sp",
829 "lr",
830 "pc"};
831
e2b5e7aa 832static void output_w32(u_int word)
57871462 833{
834 *((u_int *)out)=word;
835 out+=4;
836}
e2b5e7aa 837
838static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 839{
840 assert(rd<16);
841 assert(rn<16);
842 assert(rm<16);
843 return((rn<<16)|(rd<<12)|rm);
844}
e2b5e7aa 845
846static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 847{
848 assert(rd<16);
849 assert(rn<16);
850 assert(imm<256);
851 assert((shift&1)==0);
852 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
853}
e2b5e7aa 854
855static u_int genimm(u_int imm,u_int *encoded)
57871462 856{
c2e3bd42 857 *encoded=0;
858 if(imm==0) return 1;
57871462 859 int i=32;
860 while(i>0)
861 {
862 if(imm<256) {
863 *encoded=((i&30)<<7)|imm;
864 return 1;
865 }
866 imm=(imm>>2)|(imm<<30);i-=2;
867 }
868 return 0;
869}
e2b5e7aa 870
871static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 872{
873 u_int ret=genimm(imm,encoded);
874 assert(ret);
581335b0 875 (void)ret;
cfbd3c6e 876}
e2b5e7aa 877
878static u_int genjmp(u_int addr)
57871462 879{
880 int offset=addr-(int)out-8;
e80343e2 881 if(offset<-33554432||offset>=33554432) {
882 if (addr>2) {
c43b5311 883 SysPrintf("genjmp: out of range: %08x\n", offset);
e80343e2 884 exit(1);
885 }
886 return 0;
887 }
57871462 888 return ((u_int)offset>>2)&0xffffff;
889}
890
e2b5e7aa 891static void emit_mov(int rs,int rt)
57871462 892{
893 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
894 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
895}
896
e2b5e7aa 897static void emit_movs(int rs,int rt)
57871462 898{
899 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
900 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
901}
902
e2b5e7aa 903static void emit_add(int rs1,int rs2,int rt)
57871462 904{
905 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
906 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
907}
908
e2b5e7aa 909static void emit_adds(int rs1,int rs2,int rt)
57871462 910{
911 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
912 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
913}
914
e2b5e7aa 915static void emit_adcs(int rs1,int rs2,int rt)
57871462 916{
917 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
918 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
919}
920
e2b5e7aa 921static void emit_sbc(int rs1,int rs2,int rt)
57871462 922{
923 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
924 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
925}
926
e2b5e7aa 927static void emit_sbcs(int rs1,int rs2,int rt)
57871462 928{
929 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
930 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
931}
932
e2b5e7aa 933static void emit_neg(int rs, int rt)
57871462 934{
935 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
936 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
937}
938
e2b5e7aa 939static void emit_negs(int rs, int rt)
57871462 940{
941 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
942 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
943}
944
e2b5e7aa 945static void emit_sub(int rs1,int rs2,int rt)
57871462 946{
947 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
948 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
949}
950
e2b5e7aa 951static void emit_subs(int rs1,int rs2,int rt)
57871462 952{
953 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
954 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
955}
956
e2b5e7aa 957static void emit_zeroreg(int rt)
57871462 958{
959 assem_debug("mov %s,#0\n",regname[rt]);
960 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
961}
962
e2b5e7aa 963static void emit_loadlp(u_int imm,u_int rt)
790ee18e 964{
965 add_literal((int)out,imm);
966 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
967 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
968}
e2b5e7aa 969
970static void emit_movw(u_int imm,u_int rt)
790ee18e 971{
972 assert(imm<65536);
973 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
974 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
975}
e2b5e7aa 976
977static void emit_movt(u_int imm,u_int rt)
790ee18e 978{
979 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
980 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
981}
e2b5e7aa 982
983static void emit_movimm(u_int imm,u_int rt)
790ee18e 984{
985 u_int armval;
986 if(genimm(imm,&armval)) {
987 assem_debug("mov %s,#%d\n",regname[rt],imm);
988 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
989 }else if(genimm(~imm,&armval)) {
990 assem_debug("mvn %s,#%d\n",regname[rt],imm);
991 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
992 }else if(imm<65536) {
665f33e1 993 #ifndef HAVE_ARMV7
790ee18e 994 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
995 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
996 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
997 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
998 #else
999 emit_movw(imm,rt);
1000 #endif
1001 }else{
665f33e1 1002 #ifndef HAVE_ARMV7
790ee18e 1003 emit_loadlp(imm,rt);
1004 #else
1005 emit_movw(imm&0x0000FFFF,rt);
1006 emit_movt(imm&0xFFFF0000,rt);
1007 #endif
1008 }
1009}
e2b5e7aa 1010
1011static void emit_pcreladdr(u_int rt)
790ee18e 1012{
1013 assem_debug("add %s,pc,#?\n",regname[rt]);
1014 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1015}
1016
e2b5e7aa 1017static void emit_loadreg(int r, int hr)
57871462 1018{
3d624f89 1019 if(r&64) {
c43b5311 1020 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 1021 assert(0);
1022 return;
3d624f89 1023 }
57871462 1024 if((r&63)==0)
1025 emit_zeroreg(hr);
1026 else {
3d624f89 1027 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1028 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1029 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1030 if(r==CCREG) addr=(int)&cycle_count;
1031 if(r==CSREG) addr=(int)&Status;
1032 if(r==FSREG) addr=(int)&FCR31;
1033 if(r==INVCP) addr=(int)&invc_ptr;
1034 u_int offset = addr-(u_int)&dynarec_local;
1035 assert(offset<4096);
1036 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1037 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1038 }
1039}
e2b5e7aa 1040
1041static void emit_storereg(int r, int hr)
57871462 1042{
3d624f89 1043 if(r&64) {
c43b5311 1044 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 1045 assert(0);
1046 return;
3d624f89 1047 }
3d624f89 1048 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1049 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1050 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1051 if(r==CCREG) addr=(int)&cycle_count;
1052 if(r==FSREG) addr=(int)&FCR31;
1053 u_int offset = addr-(u_int)&dynarec_local;
1054 assert(offset<4096);
1055 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1056 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1057}
1058
e2b5e7aa 1059static void emit_test(int rs, int rt)
57871462 1060{
1061 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1062 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1063}
1064
e2b5e7aa 1065static void emit_testimm(int rs,int imm)
57871462 1066{
1067 u_int armval;
5a05d80c 1068 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1069 genimm_checked(imm,&armval);
57871462 1070 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1071}
1072
e2b5e7aa 1073static void emit_testeqimm(int rs,int imm)
b9b61529 1074{
1075 u_int armval;
1076 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1077 genimm_checked(imm,&armval);
b9b61529 1078 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1079}
1080
e2b5e7aa 1081static void emit_not(int rs,int rt)
57871462 1082{
1083 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1084 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1085}
1086
e2b5e7aa 1087static void emit_mvnmi(int rs,int rt)
b9b61529 1088{
1089 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1090 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1091}
1092
e2b5e7aa 1093static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 1094{
1095 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1096 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1097}
1098
e2b5e7aa 1099static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 1100{
1101 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1102 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1103}
e2b5e7aa 1104
1105static void emit_or_and_set_flags(int rs1,int rs2,int rt)
57871462 1106{
1107 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1108 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1109}
1110
e2b5e7aa 1111static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 1112{
1113 assert(rs<16);
1114 assert(rt<16);
1115 assert(imm<32);
1116 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1117 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1118}
1119
e2b5e7aa 1120static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 1121{
1122 assert(rs<16);
1123 assert(rt<16);
1124 assert(imm<32);
1125 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1126 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1127}
1128
e2b5e7aa 1129static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 1130{
1131 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1132 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1133}
1134
e2b5e7aa 1135static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 1136{
1137 assert(rs<16);
1138 assert(rt<16);
1139 if(imm!=0) {
57871462 1140 u_int armval;
1141 if(genimm(imm,&armval)) {
1142 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1143 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1144 }else if(genimm(-imm,&armval)) {
8a0a8423 1145 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1146 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 1147 #ifdef HAVE_ARMV7
1148 }else if(rt!=rs&&(u_int)imm<65536) {
1149 emit_movw(imm&0x0000ffff,rt);
1150 emit_add(rs,rt,rt);
1151 }else if(rt!=rs&&(u_int)-imm<65536) {
1152 emit_movw(-imm&0x0000ffff,rt);
1153 emit_sub(rs,rt,rt);
1154 #endif
1155 }else if((u_int)-imm<65536) {
57871462 1156 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1157 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1158 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1159 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 1160 }else {
1161 do {
1162 int shift = (ffs(imm) - 1) & ~1;
1163 int imm8 = imm & (0xff << shift);
1164 genimm_checked(imm8,&armval);
1165 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1166 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1167 rs = rt;
1168 imm &= ~imm8;
1169 }
1170 while (imm != 0);
57871462 1171 }
1172 }
1173 else if(rs!=rt) emit_mov(rs,rt);
1174}
1175
e2b5e7aa 1176static void emit_addimm_and_set_flags(int imm,int rt)
57871462 1177{
1178 assert(imm>-65536&&imm<65536);
1179 u_int armval;
1180 if(genimm(imm,&armval)) {
1181 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1182 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1183 }else if(genimm(-imm,&armval)) {
1184 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1185 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1186 }else if(imm<0) {
1187 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1188 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1189 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1190 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1191 }else{
1192 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1193 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1194 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1195 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1196 }
1197}
e2b5e7aa 1198
1199static void emit_addimm_no_flags(u_int imm,u_int rt)
57871462 1200{
1201 emit_addimm(rt,imm,rt);
1202}
1203
e2b5e7aa 1204static void emit_addnop(u_int r)
57871462 1205{
1206 assert(r<16);
1207 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1208 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1209}
1210
e2b5e7aa 1211static void emit_adcimm(u_int rs,int imm,u_int rt)
57871462 1212{
1213 u_int armval;
cfbd3c6e 1214 genimm_checked(imm,&armval);
57871462 1215 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1216 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1217}
1edfcc68 1218
e2b5e7aa 1219static void emit_rscimm(int rs,int imm,u_int rt)
57871462 1220{
1221 assert(0);
1222 u_int armval;
cfbd3c6e 1223 genimm_checked(imm,&armval);
57871462 1224 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1225 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1226}
1227
e2b5e7aa 1228static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
57871462 1229{
1230 // TODO: if(genimm(imm,&armval)) ...
1231 // else
1232 emit_movimm(imm,HOST_TEMPREG);
1233 emit_adds(HOST_TEMPREG,rsl,rtl);
1234 emit_adcimm(rsh,0,rth);
1235}
1236
e2b5e7aa 1237static void emit_andimm(int rs,int imm,int rt)
57871462 1238{
1239 u_int armval;
790ee18e 1240 if(imm==0) {
1241 emit_zeroreg(rt);
1242 }else if(genimm(imm,&armval)) {
57871462 1243 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1244 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1245 }else if(genimm(~imm,&armval)) {
1246 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1247 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1248 }else if(imm==65535) {
332a4533 1249 #ifndef HAVE_ARMV6
57871462 1250 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1251 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1252 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1253 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1254 #else
1255 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1256 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1257 #endif
1258 }else{
1259 assert(imm>0&&imm<65535);
665f33e1 1260 #ifndef HAVE_ARMV7
57871462 1261 assem_debug("mov r14,#%d\n",imm&0xFF00);
1262 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1263 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1264 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1265 #else
1266 emit_movw(imm,HOST_TEMPREG);
1267 #endif
1268 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1269 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1270 }
1271}
1272
e2b5e7aa 1273static void emit_orimm(int rs,int imm,int rt)
57871462 1274{
1275 u_int armval;
790ee18e 1276 if(imm==0) {
1277 if(rs!=rt) emit_mov(rs,rt);
1278 }else if(genimm(imm,&armval)) {
57871462 1279 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1280 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1281 }else{
1282 assert(imm>0&&imm<65536);
1283 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1284 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1285 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1286 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1287 }
1288}
1289
e2b5e7aa 1290static void emit_xorimm(int rs,int imm,int rt)
57871462 1291{
57871462 1292 u_int armval;
790ee18e 1293 if(imm==0) {
1294 if(rs!=rt) emit_mov(rs,rt);
1295 }else if(genimm(imm,&armval)) {
57871462 1296 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1297 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1298 }else{
514ed0d9 1299 assert(imm>0&&imm<65536);
57871462 1300 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1301 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1302 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1303 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1304 }
1305}
1306
e2b5e7aa 1307static void emit_shlimm(int rs,u_int imm,int rt)
57871462 1308{
1309 assert(imm>0);
1310 assert(imm<32);
1311 //if(imm==1) ...
1312 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1313 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1314}
1315
e2b5e7aa 1316static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 1317{
1318 assert(imm>0);
1319 assert(imm<32);
1320 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1321 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1322}
1323
e2b5e7aa 1324static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 1325{
1326 assert(imm>0);
1327 assert(imm<32);
1328 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1329 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1330}
1331
e2b5e7aa 1332static void emit_shrimm(int rs,u_int imm,int rt)
57871462 1333{
1334 assert(imm>0);
1335 assert(imm<32);
1336 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1337 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1338}
1339
e2b5e7aa 1340static void emit_sarimm(int rs,u_int imm,int rt)
57871462 1341{
1342 assert(imm>0);
1343 assert(imm<32);
1344 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1345 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1346}
1347
e2b5e7aa 1348static void emit_rorimm(int rs,u_int imm,int rt)
57871462 1349{
1350 assert(imm>0);
1351 assert(imm<32);
1352 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1353 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1354}
1355
e2b5e7aa 1356static void emit_shldimm(int rs,int rs2,u_int imm,int rt)
57871462 1357{
1358 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1359 assert(imm>0);
1360 assert(imm<32);
1361 //if(imm==1) ...
1362 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1363 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1364 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1365 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1366}
1367
e2b5e7aa 1368static void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
57871462 1369{
1370 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1371 assert(imm>0);
1372 assert(imm<32);
1373 //if(imm==1) ...
1374 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1375 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1376 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1377 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1378}
1379
e2b5e7aa 1380static void emit_signextend16(int rs,int rt)
b9b61529 1381{
332a4533 1382 #ifndef HAVE_ARMV6
b9b61529 1383 emit_shlimm(rs,16,rt);
1384 emit_sarimm(rt,16,rt);
1385 #else
1386 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1387 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1388 #endif
1389}
1390
e2b5e7aa 1391static void emit_signextend8(int rs,int rt)
c6c3b1b3 1392{
332a4533 1393 #ifndef HAVE_ARMV6
c6c3b1b3 1394 emit_shlimm(rs,24,rt);
1395 emit_sarimm(rt,24,rt);
1396 #else
1397 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1398 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1399 #endif
1400}
1401
e2b5e7aa 1402static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 1403{
1404 assert(rs<16);
1405 assert(rt<16);
1406 assert(shift<16);
1407 //if(imm==1) ...
1408 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1409 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1410}
e2b5e7aa 1411
1412static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 1413{
1414 assert(rs<16);
1415 assert(rt<16);
1416 assert(shift<16);
1417 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1418 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1419}
e2b5e7aa 1420
1421static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 1422{
1423 assert(rs<16);
1424 assert(rt<16);
1425 assert(shift<16);
1426 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1427 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1428}
57871462 1429
e2b5e7aa 1430static void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 1431{
1432 assert(rs<16);
1433 assert(rt<16);
1434 assert(shift<16);
1435 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1436 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1437}
e2b5e7aa 1438
1439static void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 1440{
1441 assert(rs<16);
1442 assert(rt<16);
1443 assert(shift<16);
1444 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1445 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1446}
1447
e2b5e7aa 1448static void emit_cmpimm(int rs,int imm)
57871462 1449{
1450 u_int armval;
1451 if(genimm(imm,&armval)) {
5a05d80c 1452 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1453 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1454 }else if(genimm(-imm,&armval)) {
5a05d80c 1455 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1456 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1457 }else if(imm>0) {
1458 assert(imm<65536);
57871462 1459 emit_movimm(imm,HOST_TEMPREG);
57871462 1460 assem_debug("cmp %s,r14\n",regname[rs]);
1461 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1462 }else{
1463 assert(imm>-65536);
57871462 1464 emit_movimm(-imm,HOST_TEMPREG);
57871462 1465 assem_debug("cmn %s,r14\n",regname[rs]);
1466 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1467 }
1468}
1469
e2b5e7aa 1470static void emit_cmovne_imm(int imm,int rt)
57871462 1471{
1472 assem_debug("movne %s,#%d\n",regname[rt],imm);
1473 u_int armval;
cfbd3c6e 1474 genimm_checked(imm,&armval);
57871462 1475 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1476}
e2b5e7aa 1477
1478static void emit_cmovl_imm(int imm,int rt)
57871462 1479{
1480 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1481 u_int armval;
cfbd3c6e 1482 genimm_checked(imm,&armval);
57871462 1483 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1484}
e2b5e7aa 1485
1486static void emit_cmovb_imm(int imm,int rt)
57871462 1487{
1488 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1489 u_int armval;
cfbd3c6e 1490 genimm_checked(imm,&armval);
57871462 1491 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1492}
e2b5e7aa 1493
1494static void emit_cmovs_imm(int imm,int rt)
57871462 1495{
1496 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1497 u_int armval;
cfbd3c6e 1498 genimm_checked(imm,&armval);
57871462 1499 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1500}
e2b5e7aa 1501
1502static void emit_cmove_reg(int rs,int rt)
57871462 1503{
1504 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1505 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1506}
e2b5e7aa 1507
1508static void emit_cmovne_reg(int rs,int rt)
57871462 1509{
1510 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1511 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1512}
e2b5e7aa 1513
1514static void emit_cmovl_reg(int rs,int rt)
57871462 1515{
1516 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1517 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1518}
e2b5e7aa 1519
1520static void emit_cmovs_reg(int rs,int rt)
57871462 1521{
1522 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1523 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1524}
1525
e2b5e7aa 1526static void emit_slti32(int rs,int imm,int rt)
57871462 1527{
1528 if(rs!=rt) emit_zeroreg(rt);
1529 emit_cmpimm(rs,imm);
1530 if(rs==rt) emit_movimm(0,rt);
1531 emit_cmovl_imm(1,rt);
1532}
e2b5e7aa 1533
1534static void emit_sltiu32(int rs,int imm,int rt)
57871462 1535{
1536 if(rs!=rt) emit_zeroreg(rt);
1537 emit_cmpimm(rs,imm);
1538 if(rs==rt) emit_movimm(0,rt);
1539 emit_cmovb_imm(1,rt);
1540}
e2b5e7aa 1541
1542static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
57871462 1543{
1544 assert(rsh!=rt);
1545 emit_slti32(rsl,imm,rt);
1546 if(imm>=0)
1547 {
1548 emit_test(rsh,rsh);
1549 emit_cmovne_imm(0,rt);
1550 emit_cmovs_imm(1,rt);
1551 }
1552 else
1553 {
1554 emit_cmpimm(rsh,-1);
1555 emit_cmovne_imm(0,rt);
1556 emit_cmovl_imm(1,rt);
1557 }
1558}
e2b5e7aa 1559
1560static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
57871462 1561{
1562 assert(rsh!=rt);
1563 emit_sltiu32(rsl,imm,rt);
1564 if(imm>=0)
1565 {
1566 emit_test(rsh,rsh);
1567 emit_cmovne_imm(0,rt);
1568 }
1569 else
1570 {
1571 emit_cmpimm(rsh,-1);
1572 emit_cmovne_imm(1,rt);
1573 }
1574}
1575
e2b5e7aa 1576static void emit_cmp(int rs,int rt)
57871462 1577{
1578 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1579 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1580}
e2b5e7aa 1581
1582static void emit_set_gz32(int rs, int rt)
57871462 1583{
1584 //assem_debug("set_gz32\n");
1585 emit_cmpimm(rs,1);
1586 emit_movimm(1,rt);
1587 emit_cmovl_imm(0,rt);
1588}
e2b5e7aa 1589
1590static void emit_set_nz32(int rs, int rt)
57871462 1591{
1592 //assem_debug("set_nz32\n");
1593 if(rs!=rt) emit_movs(rs,rt);
1594 else emit_test(rs,rs);
1595 emit_cmovne_imm(1,rt);
1596}
e2b5e7aa 1597
1598static void emit_set_gz64_32(int rsh, int rsl, int rt)
57871462 1599{
1600 //assem_debug("set_gz64\n");
1601 emit_set_gz32(rsl,rt);
1602 emit_test(rsh,rsh);
1603 emit_cmovne_imm(1,rt);
1604 emit_cmovs_imm(0,rt);
1605}
e2b5e7aa 1606
1607static void emit_set_nz64_32(int rsh, int rsl, int rt)
57871462 1608{
1609 //assem_debug("set_nz64\n");
1610 emit_or_and_set_flags(rsh,rsl,rt);
1611 emit_cmovne_imm(1,rt);
1612}
e2b5e7aa 1613
1614static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1615{
1616 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1617 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1618 emit_cmp(rs1,rs2);
1619 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1620 emit_cmovl_imm(1,rt);
1621}
e2b5e7aa 1622
1623static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1624{
1625 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1626 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1627 emit_cmp(rs1,rs2);
1628 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1629 emit_cmovb_imm(1,rt);
1630}
e2b5e7aa 1631
1632static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1633{
1634 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1635 assert(u1!=rt);
1636 assert(u2!=rt);
1637 emit_cmp(l1,l2);
1638 emit_movimm(0,rt);
1639 emit_sbcs(u1,u2,HOST_TEMPREG);
1640 emit_cmovl_imm(1,rt);
1641}
e2b5e7aa 1642
1643static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1644{
1645 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1646 assert(u1!=rt);
1647 assert(u2!=rt);
1648 emit_cmp(l1,l2);
1649 emit_movimm(0,rt);
1650 emit_sbcs(u1,u2,HOST_TEMPREG);
1651 emit_cmovb_imm(1,rt);
1652}
1653
e2b5e7aa 1654static void emit_call(int a)
57871462 1655{
1656 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1657 u_int offset=genjmp(a);
1658 output_w32(0xeb000000|offset);
1659}
e2b5e7aa 1660
1661static void emit_jmp(int a)
57871462 1662{
1663 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1664 u_int offset=genjmp(a);
1665 output_w32(0xea000000|offset);
1666}
e2b5e7aa 1667
1668static void emit_jne(int a)
57871462 1669{
1670 assem_debug("bne %x\n",a);
1671 u_int offset=genjmp(a);
1672 output_w32(0x1a000000|offset);
1673}
e2b5e7aa 1674
1675static void emit_jeq(int a)
57871462 1676{
1677 assem_debug("beq %x\n",a);
1678 u_int offset=genjmp(a);
1679 output_w32(0x0a000000|offset);
1680}
e2b5e7aa 1681
1682static void emit_js(int a)
57871462 1683{
1684 assem_debug("bmi %x\n",a);
1685 u_int offset=genjmp(a);
1686 output_w32(0x4a000000|offset);
1687}
e2b5e7aa 1688
1689static void emit_jns(int a)
57871462 1690{
1691 assem_debug("bpl %x\n",a);
1692 u_int offset=genjmp(a);
1693 output_w32(0x5a000000|offset);
1694}
e2b5e7aa 1695
1696static void emit_jl(int a)
57871462 1697{
1698 assem_debug("blt %x\n",a);
1699 u_int offset=genjmp(a);
1700 output_w32(0xba000000|offset);
1701}
e2b5e7aa 1702
1703static void emit_jge(int a)
57871462 1704{
1705 assem_debug("bge %x\n",a);
1706 u_int offset=genjmp(a);
1707 output_w32(0xaa000000|offset);
1708}
e2b5e7aa 1709
1710static void emit_jno(int a)
57871462 1711{
1712 assem_debug("bvc %x\n",a);
1713 u_int offset=genjmp(a);
1714 output_w32(0x7a000000|offset);
1715}
e2b5e7aa 1716
1717static void emit_jc(int a)
57871462 1718{
1719 assem_debug("bcs %x\n",a);
1720 u_int offset=genjmp(a);
1721 output_w32(0x2a000000|offset);
1722}
e2b5e7aa 1723
1724static void emit_jcc(int a)
57871462 1725{
1726 assem_debug("bcc %x\n",a);
1727 u_int offset=genjmp(a);
1728 output_w32(0x3a000000|offset);
1729}
1730
e2b5e7aa 1731static void emit_callreg(u_int r)
57871462 1732{
c6c3b1b3 1733 assert(r<15);
1734 assem_debug("blx %s\n",regname[r]);
1735 output_w32(0xe12fff30|r);
57871462 1736}
e2b5e7aa 1737
1738static void emit_jmpreg(u_int r)
57871462 1739{
1740 assem_debug("mov pc,%s\n",regname[r]);
1741 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1742}
1743
e2b5e7aa 1744static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1745{
1746 assert(offset>-4096&&offset<4096);
1747 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1748 if(offset>=0) {
1749 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1750 }else{
1751 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1752 }
1753}
e2b5e7aa 1754
1755static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1756{
1757 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1758 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1759}
e2b5e7aa 1760
1761static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1762{
1763 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1764 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1765}
e2b5e7aa 1766
1767static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1768{
1769 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1770 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1771}
e2b5e7aa 1772
1773static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1774{
1775 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1776 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1777}
e2b5e7aa 1778
1779static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1780{
1781 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1782 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1783}
e2b5e7aa 1784
1785static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1786{
1787 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1788 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1789}
e2b5e7aa 1790
1791static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1792{
1793 if(map<0) emit_readword_indexed(addr, rs, rt);
1794 else {
1795 assert(addr==0);
1796 emit_readword_dualindexedx4(rs, map, rt);
1797 }
1798}
e2b5e7aa 1799
1800static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
57871462 1801{
1802 if(map<0) {
1803 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1804 emit_readword_indexed(addr+4, rs, rl);
1805 }else{
1806 assert(rh!=rs);
1807 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1808 emit_addimm(map,1,map);
1809 emit_readword_indexed_tlb(addr, rs, map, rl);
1810 }
1811}
e2b5e7aa 1812
1813static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1814{
1815 assert(offset>-256&&offset<256);
1816 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1817 if(offset>=0) {
1818 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1819 }else{
1820 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1821 }
1822}
e2b5e7aa 1823
1824static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1825{
1826 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1827 else {
1828 if(addr==0) {
1829 emit_shlimm(map,2,map);
1830 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1831 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1832 }else{
1833 assert(addr>-256&&addr<256);
1834 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1835 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1836 emit_movsbl_indexed(addr, rt, rt);
1837 }
1838 }
1839}
e2b5e7aa 1840
1841static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1842{
1843 assert(offset>-256&&offset<256);
1844 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1845 if(offset>=0) {
1846 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1847 }else{
1848 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1849 }
1850}
e2b5e7aa 1851
1852static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1853{
1854 assert(offset>-4096&&offset<4096);
1855 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1856 if(offset>=0) {
1857 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1858 }else{
1859 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1860 }
1861}
e2b5e7aa 1862
1863static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
57871462 1864{
1865 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1866 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1867}
e2b5e7aa 1868
1869static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1870{
1871 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1872 else {
1873 if(addr==0) {
1874 emit_movzbl_dualindexedx4(rs, map, rt);
1875 }else{
1876 emit_addimm(rs,addr,rt);
1877 emit_movzbl_dualindexedx4(rt, map, rt);
1878 }
1879 }
1880}
e2b5e7aa 1881
1882static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1883{
1884 assert(offset>-256&&offset<256);
1885 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1886 if(offset>=0) {
1887 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1888 }else{
1889 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1890 }
1891}
e2b5e7aa 1892
054175e9 1893static void emit_ldrd(int offset, int rs, int rt)
1894{
1895 assert(offset>-256&&offset<256);
1896 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1897 if(offset>=0) {
1898 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1899 }else{
1900 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1901 }
1902}
e2b5e7aa 1903
1904static void emit_readword(int addr, int rt)
57871462 1905{
1906 u_int offset = addr-(u_int)&dynarec_local;
1907 assert(offset<4096);
1908 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1909 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1910}
e2b5e7aa 1911
1912static unused void emit_movsbl(int addr, int rt)
57871462 1913{
1914 u_int offset = addr-(u_int)&dynarec_local;
1915 assert(offset<256);
1916 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1917 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1918}
e2b5e7aa 1919
1920static unused void emit_movswl(int addr, int rt)
57871462 1921{
1922 u_int offset = addr-(u_int)&dynarec_local;
1923 assert(offset<256);
1924 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1925 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1926}
e2b5e7aa 1927
1928static unused void emit_movzbl(int addr, int rt)
57871462 1929{
1930 u_int offset = addr-(u_int)&dynarec_local;
1931 assert(offset<4096);
1932 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1933 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1934}
e2b5e7aa 1935
1936static unused void emit_movzwl(int addr, int rt)
57871462 1937{
1938 u_int offset = addr-(u_int)&dynarec_local;
1939 assert(offset<256);
1940 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1941 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1942}
57871462 1943
e2b5e7aa 1944static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1945{
1946 assert(offset>-4096&&offset<4096);
1947 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1948 if(offset>=0) {
1949 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1950 }else{
1951 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1952 }
1953}
e2b5e7aa 1954
1955static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
57871462 1956{
1957 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1958 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1959}
e2b5e7aa 1960
1961static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
57871462 1962{
1963 if(map<0) emit_writeword_indexed(rt, addr, rs);
1964 else {
1965 assert(addr==0);
1966 emit_writeword_dualindexedx4(rt, rs, map);
1967 }
1968}
e2b5e7aa 1969
1970static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
57871462 1971{
1972 if(map<0) {
1973 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1974 emit_writeword_indexed(rl, addr+4, rs);
1975 }else{
1976 assert(rh>=0);
1977 if(temp!=rs) emit_addimm(map,1,temp);
1978 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1979 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1980 else {
1981 emit_addimm(rs,4,rs);
1982 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1983 }
1984 }
1985}
e2b5e7aa 1986
1987static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1988{
1989 assert(offset>-256&&offset<256);
1990 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1991 if(offset>=0) {
1992 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1993 }else{
1994 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1995 }
1996}
e2b5e7aa 1997
1998static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1999{
2000 assert(offset>-4096&&offset<4096);
2001 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2002 if(offset>=0) {
2003 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2004 }else{
2005 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2006 }
2007}
e2b5e7aa 2008
2009static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
57871462 2010{
2011 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2012 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2013}
e2b5e7aa 2014
2015static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
57871462 2016{
2017 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2018 else {
2019 if(addr==0) {
2020 emit_writebyte_dualindexedx4(rt, rs, map);
2021 }else{
2022 emit_addimm(rs,addr,temp);
2023 emit_writebyte_dualindexedx4(rt, temp, map);
2024 }
2025 }
2026}
e2b5e7aa 2027
2028static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2029{
2030 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2031 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2032}
e2b5e7aa 2033
2034static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2035{
2036 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2037 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2038}
e2b5e7aa 2039
2040static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2041{
2042 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2043 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2044}
e2b5e7aa 2045
2046static void emit_writeword(int rt, int addr)
57871462 2047{
2048 u_int offset = addr-(u_int)&dynarec_local;
2049 assert(offset<4096);
2050 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2051 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2052}
e2b5e7aa 2053
2054static unused void emit_writehword(int rt, int addr)
57871462 2055{
2056 u_int offset = addr-(u_int)&dynarec_local;
2057 assert(offset<256);
2058 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2059 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2060}
e2b5e7aa 2061
2062static unused void emit_writebyte(int rt, int addr)
57871462 2063{
2064 u_int offset = addr-(u_int)&dynarec_local;
2065 assert(offset<4096);
74426039 2066 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2067 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2068}
57871462 2069
e2b5e7aa 2070static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 2071{
2072 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2073 assert(rs1<16);
2074 assert(rs2<16);
2075 assert(hi<16);
2076 assert(lo<16);
2077 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2078}
e2b5e7aa 2079
2080static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 2081{
2082 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2083 assert(rs1<16);
2084 assert(rs2<16);
2085 assert(hi<16);
2086 assert(lo<16);
2087 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2088}
2089
e2b5e7aa 2090static void emit_clz(int rs,int rt)
57871462 2091{
2092 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2093 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2094}
2095
e2b5e7aa 2096static void emit_subcs(int rs1,int rs2,int rt)
57871462 2097{
2098 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2099 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2100}
2101
e2b5e7aa 2102static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 2103{
2104 assert(imm>0);
2105 assert(imm<32);
2106 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2107 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2108}
2109
e2b5e7aa 2110static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 2111{
2112 assert(imm>0);
2113 assert(imm<32);
2114 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2115 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2116}
2117
e2b5e7aa 2118static void emit_negmi(int rs, int rt)
57871462 2119{
2120 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2121 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2122}
2123
e2b5e7aa 2124static void emit_negsmi(int rs, int rt)
57871462 2125{
2126 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2127 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2128}
2129
e2b5e7aa 2130static void emit_orreq(u_int rs1,u_int rs2,u_int rt)
57871462 2131{
2132 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2133 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2134}
2135
e2b5e7aa 2136static void emit_orrne(u_int rs1,u_int rs2,u_int rt)
57871462 2137{
2138 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2139 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2140}
2141
e2b5e7aa 2142static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2143{
2144 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2145 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2146}
2147
e2b5e7aa 2148static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2149{
2150 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2151 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2152}
2153
e2b5e7aa 2154static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2155{
2156 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2157 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2158}
2159
e2b5e7aa 2160static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2161{
2162 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2163 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2164}
2165
e2b5e7aa 2166static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2167{
2168 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2169 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2170}
2171
e2b5e7aa 2172static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2173{
2174 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2175 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2176}
2177
e2b5e7aa 2178static void emit_teq(int rs, int rt)
57871462 2179{
2180 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2181 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2182}
2183
e2b5e7aa 2184static void emit_rsbimm(int rs, int imm, int rt)
57871462 2185{
2186 u_int armval;
cfbd3c6e 2187 genimm_checked(imm,&armval);
57871462 2188 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2189 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2190}
2191
2192// Load 2 immediates optimizing for small code size
e2b5e7aa 2193static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
57871462 2194{
2195 emit_movimm(imm1,rt1);
2196 u_int armval;
2197 if(genimm(imm2-imm1,&armval)) {
2198 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2199 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2200 }else if(genimm(imm1-imm2,&armval)) {
2201 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2202 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2203 }
2204 else emit_movimm(imm2,rt2);
2205}
2206
2207// Conditionally select one of two immediates, optimizing for small code size
2208// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 2209static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 2210{
2211 u_int armval;
2212 if(genimm(imm2-imm1,&armval)) {
2213 emit_movimm(imm1,rt);
2214 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2215 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2216 }else if(genimm(imm1-imm2,&armval)) {
2217 emit_movimm(imm1,rt);
2218 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2219 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2220 }
2221 else {
665f33e1 2222 #ifndef HAVE_ARMV7
57871462 2223 emit_movimm(imm1,rt);
2224 add_literal((int)out,imm2);
2225 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2226 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2227 #else
2228 emit_movw(imm1&0x0000FFFF,rt);
2229 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2230 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2231 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2232 }
2233 emit_movt(imm1&0xFFFF0000,rt);
2234 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2235 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2236 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2237 }
2238 #endif
2239 }
2240}
2241
57871462 2242// special case for checking invalid_code
e2b5e7aa 2243static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 2244{
2245 assert(imm<128&&imm>=0);
2246 assert(r>=0&&r<16);
2247 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2248 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2249 emit_cmpimm(HOST_TEMPREG,imm);
2250}
2251
e2b5e7aa 2252static void emit_callne(int a)
0bbd1454 2253{
2254 assem_debug("blne %x\n",a);
2255 u_int offset=genjmp(a);
2256 output_w32(0x1b000000|offset);
2257}
2258
57871462 2259// Used to preload hash table entries
e2b5e7aa 2260static unused void emit_prefetchreg(int r)
57871462 2261{
2262 assem_debug("pld %s\n",regname[r]);
2263 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2264}
2265
2266// Special case for mini_ht
e2b5e7aa 2267static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 2268{
2269 assert(offset<4096);
2270 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2271 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2272}
2273
e2b5e7aa 2274static unused void emit_bicne_imm(int rs,int imm,int rt)
57871462 2275{
2276 u_int armval;
cfbd3c6e 2277 genimm_checked(imm,&armval);
57871462 2278 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2279 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2280}
2281
e2b5e7aa 2282static unused void emit_biccs_imm(int rs,int imm,int rt)
57871462 2283{
2284 u_int armval;
cfbd3c6e 2285 genimm_checked(imm,&armval);
57871462 2286 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2287 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2288}
2289
e2b5e7aa 2290static unused void emit_bicvc_imm(int rs,int imm,int rt)
57871462 2291{
2292 u_int armval;
cfbd3c6e 2293 genimm_checked(imm,&armval);
57871462 2294 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2295 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2296}
2297
e2b5e7aa 2298static unused void emit_bichi_imm(int rs,int imm,int rt)
57871462 2299{
2300 u_int armval;
cfbd3c6e 2301 genimm_checked(imm,&armval);
57871462 2302 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2303 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2304}
2305
e2b5e7aa 2306static unused void emit_orrvs_imm(int rs,int imm,int rt)
57871462 2307{
2308 u_int armval;
cfbd3c6e 2309 genimm_checked(imm,&armval);
57871462 2310 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2311 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2312}
2313
e2b5e7aa 2314static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 2315{
2316 u_int armval;
cfbd3c6e 2317 genimm_checked(imm,&armval);
b9b61529 2318 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2319 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2320}
2321
e2b5e7aa 2322static void emit_andne_imm(int rs,int imm,int rt)
b9b61529 2323{
2324 u_int armval;
cfbd3c6e 2325 genimm_checked(imm,&armval);
b9b61529 2326 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2327 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2328}
2329
e2b5e7aa 2330static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 2331{
2332 u_int armval;
2333 genimm_checked(imm,&armval);
2334 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2335 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2336}
2337
e2b5e7aa 2338static void emit_jno_unlikely(int a)
57871462 2339{
2340 //emit_jno(a);
2341 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2342 output_w32(0x72800000|rd_rn_rm(15,15,0));
2343}
2344
054175e9 2345static void save_regs_all(u_int reglist)
57871462 2346{
054175e9 2347 int i;
57871462 2348 if(!reglist) return;
2349 assem_debug("stmia fp,{");
054175e9 2350 for(i=0;i<16;i++)
2351 if(reglist&(1<<i))
2352 assem_debug("r%d,",i);
57871462 2353 assem_debug("}\n");
2354 output_w32(0xe88b0000|reglist);
2355}
e2b5e7aa 2356
054175e9 2357static void restore_regs_all(u_int reglist)
57871462 2358{
054175e9 2359 int i;
57871462 2360 if(!reglist) return;
2361 assem_debug("ldmia fp,{");
054175e9 2362 for(i=0;i<16;i++)
2363 if(reglist&(1<<i))
2364 assem_debug("r%d,",i);
57871462 2365 assem_debug("}\n");
2366 output_w32(0xe89b0000|reglist);
2367}
e2b5e7aa 2368
054175e9 2369// Save registers before function call
2370static void save_regs(u_int reglist)
2371{
4d646738 2372 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 2373 save_regs_all(reglist);
2374}
e2b5e7aa 2375
054175e9 2376// Restore registers after function call
2377static void restore_regs(u_int reglist)
2378{
4d646738 2379 reglist&=CALLER_SAVE_REGS;
054175e9 2380 restore_regs_all(reglist);
2381}
57871462 2382
57871462 2383/* Stubs/epilogue */
2384
e2b5e7aa 2385static void literal_pool(int n)
57871462 2386{
2387 if(!literalcount) return;
2388 if(n) {
2389 if((int)out-literals[0][0]<4096-n) return;
2390 }
2391 u_int *ptr;
2392 int i;
2393 for(i=0;i<literalcount;i++)
2394 {
77750690 2395 u_int l_addr=(u_int)out;
2396 int j;
2397 for(j=0;j<i;j++) {
2398 if(literals[j][1]==literals[i][1]) {
2399 //printf("dup %08x\n",literals[i][1]);
2400 l_addr=literals[j][0];
2401 break;
2402 }
2403 }
57871462 2404 ptr=(u_int *)literals[i][0];
77750690 2405 u_int offset=l_addr-(u_int)ptr-8;
57871462 2406 assert(offset<4096);
2407 assert(!(offset&3));
2408 *ptr|=offset;
77750690 2409 if(l_addr==(u_int)out) {
2410 literals[i][0]=l_addr; // remember for dupes
2411 output_w32(literals[i][1]);
2412 }
57871462 2413 }
2414 literalcount=0;
2415}
2416
e2b5e7aa 2417static void literal_pool_jumpover(int n)
57871462 2418{
2419 if(!literalcount) return;
2420 if(n) {
2421 if((int)out-literals[0][0]<4096-n) return;
2422 }
2423 int jaddr=(int)out;
2424 emit_jmp(0);
2425 literal_pool(0);
2426 set_jump_target(jaddr,(int)out);
2427}
2428
e2b5e7aa 2429static void emit_extjump2(u_int addr, int target, int linker)
57871462 2430{
2431 u_char *ptr=(u_char *)addr;
2432 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 2433 (void)ptr;
2434
57871462 2435 emit_loadlp(target,0);
2436 emit_loadlp(addr,1);
24385cae 2437 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2438 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2439//DEBUG >
2440#ifdef DEBUG_CYCLE_COUNT
2441 emit_readword((int)&last_count,ECX);
2442 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2443 emit_readword((int)&next_interupt,ECX);
2444 emit_writeword(HOST_CCREG,(int)&Count);
2445 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2446 emit_writeword(ECX,(int)&last_count);
2447#endif
2448//DEBUG <
2449 emit_jmp(linker);
2450}
2451
e2b5e7aa 2452static void emit_extjump(int addr, int target)
57871462 2453{
2454 emit_extjump2(addr, target, (int)dyna_linker);
2455}
e2b5e7aa 2456
2457static void emit_extjump_ds(int addr, int target)
57871462 2458{
2459 emit_extjump2(addr, target, (int)dyna_linker_ds);
2460}
2461
13e35c04 2462// put rt_val into rt, potentially making use of rs with value rs_val
2463static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2464{
8575a877 2465 u_int armval;
2466 int diff;
2467 if(genimm(rt_val,&armval)) {
2468 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2469 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2470 return;
2471 }
2472 if(genimm(~rt_val,&armval)) {
2473 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2474 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2475 return;
2476 }
2477 diff=rt_val-rs_val;
2478 if(genimm(diff,&armval)) {
2479 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2480 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2481 return;
2482 }else if(genimm(-diff,&armval)) {
2483 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2484 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2485 return;
2486 }
2487 emit_movimm(rt_val,rt);
2488}
2489
2490// return 1 if above function can do it's job cheaply
2491static int is_similar_value(u_int v1,u_int v2)
2492{
13e35c04 2493 u_int xs;
8575a877 2494 int diff;
2495 if(v1==v2) return 1;
2496 diff=v2-v1;
2497 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2498 ;
8575a877 2499 if(xs<0x100) return 1;
2500 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2501 ;
2502 if(xs<0x100) return 1;
2503 return 0;
13e35c04 2504}
cbbab9cd 2505
b96d3df7 2506// trashes r2
2507static void pass_args(int a0, int a1)
2508{
2509 if(a0==1&&a1==0) {
2510 // must swap
2511 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2512 }
2513 else if(a0!=0&&a1==0) {
2514 emit_mov(a1,1);
2515 if (a0>=0) emit_mov(a0,0);
2516 }
2517 else {
2518 if(a0>=0&&a0!=0) emit_mov(a0,0);
2519 if(a1>=0&&a1!=1) emit_mov(a1,1);
2520 }
2521}
2522
b1be1eee 2523static void mov_loadtype_adj(int type,int rs,int rt)
2524{
2525 switch(type) {
2526 case LOADB_STUB: emit_signextend8(rs,rt); break;
2527 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2528 case LOADH_STUB: emit_signextend16(rs,rt); break;
2529 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2530 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2531 default: assert(0);
2532 }
2533}
2534
b1be1eee 2535#include "pcsxmem.h"
2536#include "pcsxmem_inline.c"
b1be1eee 2537
e2b5e7aa 2538static void do_readstub(int n)
57871462 2539{
2540 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2541 literal_pool(256);
2542 set_jump_target(stubs[n][1],(int)out);
2543 int type=stubs[n][0];
2544 int i=stubs[n][3];
2545 int rs=stubs[n][4];
2546 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2547 u_int reglist=stubs[n][7];
2548 signed char *i_regmap=i_regs->regmap;
581335b0 2549 int rt;
b9b61529 2550 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2551 rt=get_reg(i_regmap,FTEMP);
2552 }else{
57871462 2553 rt=get_reg(i_regmap,rt1[i]);
2554 }
2555 assert(rs>=0);
c6c3b1b3 2556 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2557 reglist|=(1<<rs);
2558 for(r=0;r<=12;r++) {
2559 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2560 temp=r; break;
2561 }
2562 }
db829eeb 2563 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2564 reglist&=~(1<<rt);
2565 if(temp==-1) {
2566 save_regs(reglist);
2567 regs_saved=1;
2568 temp=(rs==0)?2:0;
2569 }
2570 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2571 temp2=1;
2572 emit_readword((int)&mem_rtab,temp);
2573 emit_shrimm(rs,12,temp2);
2574 emit_readword_dualindexedx4(temp,temp2,temp2);
2575 emit_lsls_imm(temp2,1,temp2);
2576 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2577 switch(type) {
2578 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2579 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2580 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2581 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2582 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2583 }
2584 }
2585 if(regs_saved) {
2586 restore_jump=(int)out;
2587 emit_jcc(0); // jump to reg restore
2588 }
2589 else
2590 emit_jcc(stubs[n][2]); // return address
2591
2592 if(!regs_saved)
2593 save_regs(reglist);
2594 int handler=0;
2595 if(type==LOADB_STUB||type==LOADBU_STUB)
2596 handler=(int)jump_handler_read8;
2597 if(type==LOADH_STUB||type==LOADHU_STUB)
2598 handler=(int)jump_handler_read16;
2599 if(type==LOADW_STUB)
2600 handler=(int)jump_handler_read32;
2601 assert(handler!=0);
b96d3df7 2602 pass_args(rs,temp2);
c6c3b1b3 2603 int cc=get_reg(i_regmap,CCREG);
2604 if(cc<0)
2605 emit_loadreg(CCREG,2);
2573466a 2606 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2607 emit_call(handler);
2608 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2609 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2610 }
2611 if(restore_jump)
2612 set_jump_target(restore_jump,(int)out);
2613 restore_regs(reglist);
2614 emit_jmp(stubs[n][2]); // return address
57871462 2615}
2616
c6c3b1b3 2617// return memhandler, or get directly accessable address and return 0
e2b5e7aa 2618static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
c6c3b1b3 2619{
2620 u_int l1,l2=0;
2621 l1=((u_int *)table)[addr>>12];
2622 if((l1&(1<<31))==0) {
2623 u_int v=l1<<1;
2624 *addr_host=v+addr;
2625 return 0;
2626 }
2627 else {
2628 l1<<=1;
2629 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2630 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2631 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2632 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2633 else
2634 l2=((u_int *)l1)[(addr&0xfff)/4];
2635 if((l2&(1<<31))==0) {
2636 u_int v=l2<<1;
2637 *addr_host=v+(addr&0xfff);
2638 return 0;
2639 }
2640 return l2<<1;
2641 }
2642}
c6c3b1b3 2643
e2b5e7aa 2644static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2645{
2646 int rs=get_reg(regmap,target);
57871462 2647 int rt=get_reg(regmap,target);
535d208a 2648 if(rs<0) rs=get_reg(regmap,-1);
57871462 2649 assert(rs>=0);
b1be1eee 2650 u_int handler,host_addr=0,is_dynamic,far_call=0;
2651 int cc=get_reg(regmap,CCREG);
2652 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2653 return;
c6c3b1b3 2654 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2655 if (handler==0) {
db829eeb 2656 if(rt<0||rt1[i]==0)
c6c3b1b3 2657 return;
13e35c04 2658 if(addr!=host_addr)
2659 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 2660 switch(type) {
2661 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2662 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2663 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2664 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2665 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2666 default: assert(0);
2667 }
2668 return;
2669 }
b1be1eee 2670 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2671 if(is_dynamic) {
2672 if(type==LOADB_STUB||type==LOADBU_STUB)
2673 handler=(int)jump_handler_read8;
2674 if(type==LOADH_STUB||type==LOADHU_STUB)
2675 handler=(int)jump_handler_read16;
2676 if(type==LOADW_STUB)
2677 handler=(int)jump_handler_read32;
2678 }
c6c3b1b3 2679
2680 // call a memhandler
db829eeb 2681 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2682 reglist&=~(1<<rt);
2683 save_regs(reglist);
2684 if(target==0)
2685 emit_movimm(addr,0);
2686 else if(rs!=0)
2687 emit_mov(rs,0);
c6c3b1b3 2688 int offset=(int)handler-(int)out-8;
2689 if(offset<-33554432||offset>=33554432) {
2690 // unreachable memhandler, a plugin func perhaps
b1be1eee 2691 emit_movimm(handler,12);
2692 far_call=1;
2693 }
2694 if(cc<0)
2695 emit_loadreg(CCREG,2);
2696 if(is_dynamic) {
2697 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2698 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 2699 }
b1be1eee 2700 else {
2701 emit_readword((int)&last_count,3);
2702 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2703 emit_add(2,3,2);
2704 emit_writeword(2,(int)&Count);
2705 }
2706
2707 if(far_call)
2708 emit_callreg(12);
c6c3b1b3 2709 else
2710 emit_call(handler);
b1be1eee 2711
db829eeb 2712 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 2713 switch(type) {
2714 case LOADB_STUB: emit_signextend8(0,rt); break;
2715 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2716 case LOADH_STUB: emit_signextend16(0,rt); break;
2717 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2718 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2719 default: assert(0);
2720 }
2721 }
2722 restore_regs(reglist);
57871462 2723}
2724
e2b5e7aa 2725static void do_writestub(int n)
57871462 2726{
2727 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2728 literal_pool(256);
2729 set_jump_target(stubs[n][1],(int)out);
2730 int type=stubs[n][0];
2731 int i=stubs[n][3];
2732 int rs=stubs[n][4];
2733 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2734 u_int reglist=stubs[n][7];
2735 signed char *i_regmap=i_regs->regmap;
581335b0 2736 int rt,r;
b9b61529 2737 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2738 rt=get_reg(i_regmap,r=FTEMP);
2739 }else{
57871462 2740 rt=get_reg(i_regmap,r=rs2[i]);
2741 }
2742 assert(rs>=0);
2743 assert(rt>=0);
b96d3df7 2744 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
2745 int reglist2=reglist|(1<<rs)|(1<<rt);
2746 for(rtmp=0;rtmp<=12;rtmp++) {
2747 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2748 temp=rtmp; break;
2749 }
2750 }
2751 if(temp==-1) {
2752 save_regs(reglist);
2753 regs_saved=1;
2754 for(rtmp=0;rtmp<=3;rtmp++)
2755 if(rtmp!=rs&&rtmp!=rt)
2756 {temp=rtmp;break;}
2757 }
2758 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2759 temp2=3;
2760 emit_readword((int)&mem_wtab,temp);
2761 emit_shrimm(rs,12,temp2);
2762 emit_readword_dualindexedx4(temp,temp2,temp2);
2763 emit_lsls_imm(temp2,1,temp2);
2764 switch(type) {
2765 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2766 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2767 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2768 default: assert(0);
2769 }
2770 if(regs_saved) {
2771 restore_jump=(int)out;
2772 emit_jcc(0); // jump to reg restore
2773 }
2774 else
2775 emit_jcc(stubs[n][2]); // return address (invcode check)
2776
2777 if(!regs_saved)
2778 save_regs(reglist);
2779 int handler=0;
2780 switch(type) {
2781 case STOREB_STUB: handler=(int)jump_handler_write8; break;
2782 case STOREH_STUB: handler=(int)jump_handler_write16; break;
2783 case STOREW_STUB: handler=(int)jump_handler_write32; break;
2784 }
2785 assert(handler!=0);
2786 pass_args(rs,rt);
2787 if(temp2!=3)
2788 emit_mov(temp2,3);
2789 int cc=get_reg(i_regmap,CCREG);
2790 if(cc<0)
2791 emit_loadreg(CCREG,2);
2573466a 2792 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 2793 // returns new cycle_count
2794 emit_call(handler);
2573466a 2795 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 2796 if(cc<0)
2797 emit_storereg(CCREG,2);
2798 if(restore_jump)
2799 set_jump_target(restore_jump,(int)out);
2800 restore_regs(reglist);
2801 ra=stubs[n][2];
b96d3df7 2802 emit_jmp(ra);
57871462 2803}
2804
e2b5e7aa 2805static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2806{
2807 int rs=get_reg(regmap,-1);
57871462 2808 int rt=get_reg(regmap,target);
2809 assert(rs>=0);
2810 assert(rt>=0);
b96d3df7 2811 u_int handler,host_addr=0;
b96d3df7 2812 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
2813 if (handler==0) {
13e35c04 2814 if(addr!=host_addr)
2815 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 2816 switch(type) {
2817 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2818 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2819 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2820 default: assert(0);
2821 }
2822 return;
2823 }
2824
2825 // call a memhandler
2826 save_regs(reglist);
13e35c04 2827 pass_args(rs,rt);
b96d3df7 2828 int cc=get_reg(regmap,CCREG);
2829 if(cc<0)
2830 emit_loadreg(CCREG,2);
2573466a 2831 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 2832 emit_movimm(handler,3);
2833 // returns new cycle_count
2834 emit_call((int)jump_handler_write_h);
2573466a 2835 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 2836 if(cc<0)
2837 emit_storereg(CCREG,2);
2838 restore_regs(reglist);
57871462 2839}
2840
e2b5e7aa 2841static void do_unalignedwritestub(int n)
57871462 2842{
b7918751 2843 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2844 literal_pool(256);
57871462 2845 set_jump_target(stubs[n][1],(int)out);
b7918751 2846
2847 int i=stubs[n][3];
2848 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2849 int addr=stubs[n][5];
2850 u_int reglist=stubs[n][7];
2851 signed char *i_regmap=i_regs->regmap;
2852 int temp2=get_reg(i_regmap,FTEMP);
2853 int rt;
b7918751 2854 rt=get_reg(i_regmap,rs2[i]);
2855 assert(rt>=0);
2856 assert(addr>=0);
2857 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2858 reglist|=(1<<addr);
2859 reglist&=~(1<<temp2);
2860
b96d3df7 2861#if 1
2862 // don't bother with it and call write handler
2863 save_regs(reglist);
2864 pass_args(addr,rt);
2865 int cc=get_reg(i_regmap,CCREG);
2866 if(cc<0)
2867 emit_loadreg(CCREG,2);
2573466a 2868 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 2869 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 2870 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 2871 if(cc<0)
2872 emit_storereg(CCREG,2);
2873 restore_regs(reglist);
2874 emit_jmp(stubs[n][2]); // return address
2875#else
b7918751 2876 emit_andimm(addr,0xfffffffc,temp2);
2877 emit_writeword(temp2,(int)&address);
2878
2879 save_regs(reglist);
b7918751 2880 emit_shrimm(addr,16,1);
2881 int cc=get_reg(i_regmap,CCREG);
2882 if(cc<0) {
2883 emit_loadreg(CCREG,2);
2884 }
2885 emit_movimm((u_int)readmem,0);
2886 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
b7918751 2887 emit_call((int)&indirect_jump_indexed);
2888 restore_regs(reglist);
2889
2890 emit_readword((int)&readmem_dword,temp2);
2891 int temp=addr; //hmh
2892 emit_shlimm(addr,3,temp);
2893 emit_andimm(temp,24,temp);
2894#ifdef BIG_ENDIAN_MIPS
2895 if (opcode[i]==0x2e) // SWR
2896#else
2897 if (opcode[i]==0x2a) // SWL
2898#endif
2899 emit_xorimm(temp,24,temp);
2900 emit_movimm(-1,HOST_TEMPREG);
55439448 2901 if (opcode[i]==0x2a) { // SWL
b7918751 2902 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2903 emit_orrshr(rt,temp,temp2);
2904 }else{
2905 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2906 emit_orrshl(rt,temp,temp2);
2907 }
2908 emit_readword((int)&address,addr);
2909 emit_writeword(temp2,(int)&word);
2910 //save_regs(reglist); // don't need to, no state changes
2911 emit_shrimm(addr,16,1);
2912 emit_movimm((u_int)writemem,0);
2913 //emit_call((int)&indirect_jump_indexed);
2914 emit_mov(15,14);
2915 emit_readword_dualindexedx4(0,1,15);
2916 emit_readword((int)&Count,HOST_TEMPREG);
2917 emit_readword((int)&next_interupt,2);
2918 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2919 emit_writeword(2,(int)&last_count);
2920 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2921 if(cc<0) {
2922 emit_storereg(CCREG,HOST_TEMPREG);
2923 }
2924 restore_regs(reglist);
57871462 2925 emit_jmp(stubs[n][2]); // return address
b96d3df7 2926#endif
57871462 2927}
2928
e2b5e7aa 2929static void do_invstub(int n)
57871462 2930{
2931 literal_pool(20);
2932 u_int reglist=stubs[n][3];
2933 set_jump_target(stubs[n][1],(int)out);
2934 save_regs(reglist);
2935 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2936 emit_call((int)&invalidate_addr);
2937 restore_regs(reglist);
2938 emit_jmp(stubs[n][2]); // return address
2939}
2940
2941int do_dirty_stub(int i)
2942{
2943 assem_debug("do_dirty_stub %x\n",start+i*4);
71e490c5 2944 u_int addr=(u_int)source;
57871462 2945 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2946 #ifndef HAVE_ARMV7
ac545b3a 2947 emit_loadlp(addr,1);
57871462 2948 emit_loadlp((int)copy,2);
2949 emit_loadlp(slen*4,3);
2950 #else
ac545b3a 2951 emit_movw(addr&0x0000FFFF,1);
57871462 2952 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 2953 emit_movt(addr&0xFFFF0000,1);
57871462 2954 emit_movt(((u_int)copy)&0xFFFF0000,2);
2955 emit_movw(slen*4,3);
2956 #endif
2957 emit_movimm(start+i*4,0);
2958 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2959 int entry=(int)out;
2960 load_regs_entry(i);
2961 if(entry==(int)out) entry=instr_addr[i];
2962 emit_jmp(instr_addr[i]);
2963 return entry;
2964}
2965
e2b5e7aa 2966static void do_dirty_stub_ds()
57871462 2967{
2968 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2969 #ifndef HAVE_ARMV7
57871462 2970 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2971 emit_loadlp((int)copy,2);
2972 emit_loadlp(slen*4,3);
2973 #else
2974 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2975 emit_movw(((u_int)copy)&0x0000FFFF,2);
2976 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2977 emit_movt(((u_int)copy)&0xFFFF0000,2);
2978 emit_movw(slen*4,3);
2979 #endif
2980 emit_movimm(start+1,0);
2981 emit_call((int)&verify_code_ds);
2982}
2983
e2b5e7aa 2984static void do_cop1stub(int n)
57871462 2985{
2986 literal_pool(256);
2987 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
2988 set_jump_target(stubs[n][1],(int)out);
2989 int i=stubs[n][3];
3d624f89 2990// int rs=stubs[n][4];
57871462 2991 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2992 int ds=stubs[n][6];
2993 if(!ds) {
2994 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2995 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2996 }
2997 //else {printf("fp exception in delay slot\n");}
2998 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2999 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3000 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 3001 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 3002 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3003}
3004
57871462 3005/* Special assem */
3006
e2b5e7aa 3007static void shift_assemble_arm(int i,struct regstat *i_regs)
57871462 3008{
3009 if(rt1[i]) {
3010 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3011 {
3012 signed char s,t,shift;
3013 t=get_reg(i_regs->regmap,rt1[i]);
3014 s=get_reg(i_regs->regmap,rs1[i]);
3015 shift=get_reg(i_regs->regmap,rs2[i]);
3016 if(t>=0){
3017 if(rs1[i]==0)
3018 {
3019 emit_zeroreg(t);
3020 }
3021 else if(rs2[i]==0)
3022 {
3023 assert(s>=0);
3024 if(s!=t) emit_mov(s,t);
3025 }
3026 else
3027 {
3028 emit_andimm(shift,31,HOST_TEMPREG);
3029 if(opcode2[i]==4) // SLLV
3030 {
3031 emit_shl(s,HOST_TEMPREG,t);
3032 }
3033 if(opcode2[i]==6) // SRLV
3034 {
3035 emit_shr(s,HOST_TEMPREG,t);
3036 }
3037 if(opcode2[i]==7) // SRAV
3038 {
3039 emit_sar(s,HOST_TEMPREG,t);
3040 }
3041 }
3042 }
3043 } else { // DSLLV/DSRLV/DSRAV
3044 signed char sh,sl,th,tl,shift;
3045 th=get_reg(i_regs->regmap,rt1[i]|64);
3046 tl=get_reg(i_regs->regmap,rt1[i]);
3047 sh=get_reg(i_regs->regmap,rs1[i]|64);
3048 sl=get_reg(i_regs->regmap,rs1[i]);
3049 shift=get_reg(i_regs->regmap,rs2[i]);
3050 if(tl>=0){
3051 if(rs1[i]==0)
3052 {
3053 emit_zeroreg(tl);
3054 if(th>=0) emit_zeroreg(th);
3055 }
3056 else if(rs2[i]==0)
3057 {
3058 assert(sl>=0);
3059 if(sl!=tl) emit_mov(sl,tl);
3060 if(th>=0&&sh!=th) emit_mov(sh,th);
3061 }
3062 else
3063 {
3064 // FIXME: What if shift==tl ?
3065 assert(shift!=tl);
3066 int temp=get_reg(i_regs->regmap,-1);
3067 int real_th=th;
3068 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3069 assert(sl>=0);
3070 assert(sh>=0);
3071 emit_andimm(shift,31,HOST_TEMPREG);
3072 if(opcode2[i]==0x14) // DSLLV
3073 {
3074 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3075 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3076 emit_orrshr(sl,HOST_TEMPREG,th);
3077 emit_andimm(shift,31,HOST_TEMPREG);
3078 emit_testimm(shift,32);
3079 emit_shl(sl,HOST_TEMPREG,tl);
3080 if(th>=0) emit_cmovne_reg(tl,th);
3081 emit_cmovne_imm(0,tl);
3082 }
3083 if(opcode2[i]==0x16) // DSRLV
3084 {
3085 assert(th>=0);
3086 emit_shr(sl,HOST_TEMPREG,tl);
3087 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3088 emit_orrshl(sh,HOST_TEMPREG,tl);
3089 emit_andimm(shift,31,HOST_TEMPREG);
3090 emit_testimm(shift,32);
3091 emit_shr(sh,HOST_TEMPREG,th);
3092 emit_cmovne_reg(th,tl);
3093 if(real_th>=0) emit_cmovne_imm(0,th);
3094 }
3095 if(opcode2[i]==0x17) // DSRAV
3096 {
3097 assert(th>=0);
3098 emit_shr(sl,HOST_TEMPREG,tl);
3099 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3100 if(real_th>=0) {
3101 assert(temp>=0);
3102 emit_sarimm(th,31,temp);
3103 }
3104 emit_orrshl(sh,HOST_TEMPREG,tl);
3105 emit_andimm(shift,31,HOST_TEMPREG);
3106 emit_testimm(shift,32);
3107 emit_sar(sh,HOST_TEMPREG,th);
3108 emit_cmovne_reg(th,tl);
3109 if(real_th>=0) emit_cmovne_reg(temp,th);
3110 }
3111 }
3112 }
3113 }
3114 }
3115}
ffb0b9e0 3116
ffb0b9e0 3117static void speculate_mov(int rs,int rt)
3118{
3119 if(rt!=0) {
3120 smrv_strong_next|=1<<rt;
3121 smrv[rt]=smrv[rs];
3122 }
3123}
3124
3125static void speculate_mov_weak(int rs,int rt)
3126{
3127 if(rt!=0) {
3128 smrv_weak_next|=1<<rt;
3129 smrv[rt]=smrv[rs];
3130 }
3131}
3132
3133static void speculate_register_values(int i)
3134{
3135 if(i==0) {
3136 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3137 // gp,sp are likely to stay the same throughout the block
3138 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3139 smrv_weak_next=~smrv_strong_next;
3140 //printf(" llr %08x\n", smrv[4]);
3141 }
3142 smrv_strong=smrv_strong_next;
3143 smrv_weak=smrv_weak_next;
3144 switch(itype[i]) {
3145 case ALU:
3146 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3147 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3148 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3149 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3150 else {
3151 smrv_strong_next&=~(1<<rt1[i]);
3152 smrv_weak_next&=~(1<<rt1[i]);
3153 }
3154 break;
3155 case SHIFTIMM:
3156 smrv_strong_next&=~(1<<rt1[i]);
3157 smrv_weak_next&=~(1<<rt1[i]);
3158 // fallthrough
3159 case IMM16:
3160 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3161 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3162 if(hr>=0) {
3163 if(get_final_value(hr,i,&value))
3164 smrv[rt1[i]]=value;
3165 else smrv[rt1[i]]=constmap[i][hr];
3166 smrv_strong_next|=1<<rt1[i];
3167 }
3168 }
3169 else {
3170 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3171 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3172 }
3173 break;
3174 case LOAD:
3175 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3176 // special case for BIOS
3177 smrv[rt1[i]]=0xa0000000;
3178 smrv_strong_next|=1<<rt1[i];
3179 break;
3180 }
3181 // fallthrough
3182 case SHIFT:
3183 case LOADLR:
3184 case MOV:
3185 smrv_strong_next&=~(1<<rt1[i]);
3186 smrv_weak_next&=~(1<<rt1[i]);
3187 break;
3188 case COP0:
3189 case COP2:
3190 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3191 smrv_strong_next&=~(1<<rt1[i]);
3192 smrv_weak_next&=~(1<<rt1[i]);
3193 }
3194 break;
3195 case C2LS:
3196 if (opcode[i]==0x32) { // LWC2
3197 smrv_strong_next&=~(1<<rt1[i]);
3198 smrv_weak_next&=~(1<<rt1[i]);
3199 }
3200 break;
3201 }
3202#if 0
3203 int r=4;
3204 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3205 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3206#endif
3207}
3208
3209enum {
3210 MTYPE_8000 = 0,
3211 MTYPE_8020,
3212 MTYPE_0000,
3213 MTYPE_A000,
3214 MTYPE_1F80,
3215};
3216
3217static int get_ptr_mem_type(u_int a)
3218{
3219 if(a < 0x00200000) {
3220 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3221 // return wrong, must use memhandler for BIOS self-test to pass
3222 // 007 does similar stuff from a00 mirror, weird stuff
3223 return MTYPE_8000;
3224 return MTYPE_0000;
3225 }
3226 if(0x1f800000 <= a && a < 0x1f801000)
3227 return MTYPE_1F80;
3228 if(0x80200000 <= a && a < 0x80800000)
3229 return MTYPE_8020;
3230 if(0xa0000000 <= a && a < 0xa0200000)
3231 return MTYPE_A000;
3232 return MTYPE_8000;
3233}
ffb0b9e0 3234
3235static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3236{
581335b0 3237 int jaddr=0,type=0;
ffb0b9e0 3238 int mr=rs1[i];
3239 if(((smrv_strong|smrv_weak)>>mr)&1) {
3240 type=get_ptr_mem_type(smrv[mr]);
3241 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3242 }
3243 else {
3244 // use the mirror we are running on
3245 type=get_ptr_mem_type(start);
3246 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3247 }
3248
3249 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3250 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3251 addr=*addr_reg_override=HOST_TEMPREG;
3252 type=0;
3253 }
3254 else if(type==MTYPE_0000) { // RAM 0 mirror
3255 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3256 addr=*addr_reg_override=HOST_TEMPREG;
3257 type=0;
3258 }
3259 else if(type==MTYPE_A000) { // RAM A mirror
3260 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3261 addr=*addr_reg_override=HOST_TEMPREG;
3262 type=0;
3263 }
3264 else if(type==MTYPE_1F80) { // scratchpad
6d760c92 3265 if (psxH == (void *)0x1f800000) {
3266 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3267 emit_cmpimm(HOST_TEMPREG,0x1000);
3268 jaddr=(int)out;
3269 emit_jc(0);
3270 }
3271 else {
3272 // do usual RAM check, jump will go to the right handler
3273 type=0;
3274 }
ffb0b9e0 3275 }
ffb0b9e0 3276
3277 if(type==0)
3278 {
3279 emit_cmpimm(addr,RAM_SIZE);
3280 jaddr=(int)out;
3281 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3282 // Hint to branch predictor that the branch is unlikely to be taken
3283 if(rs1[i]>=28)
3284 emit_jno_unlikely(0);
3285 else
3286 #endif
3287 emit_jno(0);
a327ad27 3288 if(ram_offset!=0) {
3289 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3290 addr=*addr_reg_override=HOST_TEMPREG;
3291 }
ffb0b9e0 3292 }
3293
3294 return jaddr;
3295}
3296
57871462 3297#define shift_assemble shift_assemble_arm
3298
e2b5e7aa 3299static void loadlr_assemble_arm(int i,struct regstat *i_regs)
57871462 3300{
3301 int s,th,tl,temp,temp2,addr,map=-1;
3302 int offset;
3303 int jaddr=0;
af4ee1fe 3304 int memtarget=0,c=0;
ffb0b9e0 3305 int fastload_reg_override=0;
57871462 3306 u_int hr,reglist=0;
3307 th=get_reg(i_regs->regmap,rt1[i]|64);
3308 tl=get_reg(i_regs->regmap,rt1[i]);
3309 s=get_reg(i_regs->regmap,rs1[i]);
3310 temp=get_reg(i_regs->regmap,-1);
3311 temp2=get_reg(i_regs->regmap,FTEMP);
3312 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3313 assert(addr<0);
3314 offset=imm[i];
3315 for(hr=0;hr<HOST_REGS;hr++) {
3316 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3317 }
3318 reglist|=1<<temp;
3319 if(offset||s<0||c) addr=temp2;
3320 else addr=s;
3321 if(s>=0) {
3322 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3323 if(c) {
3324 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 3325 }
57871462 3326 }
1edfcc68 3327 if(!c) {
3328 #ifdef RAM_OFFSET
3329 map=get_reg(i_regs->regmap,ROREG);
3330 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3331 #endif
3332 emit_shlimm(addr,3,temp);
3333 if (opcode[i]==0x22||opcode[i]==0x26) {
3334 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3335 }else{
3336 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
535d208a 3337 }
1edfcc68 3338 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3339 }
3340 else {
3341 if(ram_offset&&memtarget) {
3342 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
3343 fastload_reg_override=HOST_TEMPREG;
57871462 3344 }
1edfcc68 3345 if (opcode[i]==0x22||opcode[i]==0x26) {
3346 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
535d208a 3347 }else{
1edfcc68 3348 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
535d208a 3349 }
535d208a 3350 }
3351 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3352 if(!c||memtarget) {
ffb0b9e0 3353 int a=temp2;
3354 if(fastload_reg_override) a=fastload_reg_override;
535d208a 3355 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 3356 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 3357 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3358 }
3359 else
3360 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3361 if(rt1[i]) {
3362 assert(tl>=0);
57871462 3363 emit_andimm(temp,24,temp);
2002a1db 3364#ifdef BIG_ENDIAN_MIPS
3365 if (opcode[i]==0x26) // LWR
3366#else
3367 if (opcode[i]==0x22) // LWL
3368#endif
3369 emit_xorimm(temp,24,temp);
57871462 3370 emit_movimm(-1,HOST_TEMPREG);
3371 if (opcode[i]==0x26) {
3372 emit_shr(temp2,temp,temp2);
3373 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3374 }else{
3375 emit_shl(temp2,temp,temp2);
3376 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3377 }
3378 emit_or(temp2,tl,tl);
57871462 3379 }
535d208a 3380 //emit_storereg(rt1[i],tl); // DEBUG
3381 }
3382 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 3383 // FIXME: little endian, fastload_reg_override
535d208a 3384 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3385 if(!c||memtarget) {
3386 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3387 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3388 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3389 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3390 }
3391 else
3392 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3393 if(rt1[i]) {
3394 assert(th>=0);
3395 assert(tl>=0);
57871462 3396 emit_testimm(temp,32);
3397 emit_andimm(temp,24,temp);
3398 if (opcode[i]==0x1A) { // LDL
3399 emit_rsbimm(temp,32,HOST_TEMPREG);
3400 emit_shl(temp2h,temp,temp2h);
3401 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3402 emit_movimm(-1,HOST_TEMPREG);
3403 emit_shl(temp2,temp,temp2);
3404 emit_cmove_reg(temp2h,th);
3405 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3406 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3407 emit_orreq(temp2,tl,tl);
3408 emit_orrne(temp2,th,th);
3409 }
3410 if (opcode[i]==0x1B) { // LDR
3411 emit_xorimm(temp,24,temp);
3412 emit_rsbimm(temp,32,HOST_TEMPREG);
3413 emit_shr(temp2,temp,temp2);
3414 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3415 emit_movimm(-1,HOST_TEMPREG);
3416 emit_shr(temp2h,temp,temp2h);
3417 emit_cmovne_reg(temp2,tl);
3418 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3419 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3420 emit_orrne(temp2h,th,th);
3421 emit_orreq(temp2h,tl,tl);
3422 }
3423 }
3424 }
3425}
3426#define loadlr_assemble loadlr_assemble_arm
3427
e2b5e7aa 3428static void cop0_assemble(int i,struct regstat *i_regs)
57871462 3429{
3430 if(opcode2[i]==0) // MFC0
3431 {
3432 signed char t=get_reg(i_regs->regmap,rt1[i]);
3433 char copr=(source[i]>>11)&0x1f;
3434 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3435 if(t>=0&&rt1[i]!=0) {
7139f3c8 3436 emit_readword((int)&reg_cop0+copr*4,t);
57871462 3437 }
3438 }
3439 else if(opcode2[i]==4) // MTC0
3440 {
3441 signed char s=get_reg(i_regs->regmap,rs1[i]);
3442 char copr=(source[i]>>11)&0x1f;
3443 assert(s>=0);
63cb0298 3444 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 3445 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 3446 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 3447 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 3448 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 3449 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 3450 emit_writeword(HOST_CCREG,(int)&Count);
3451 }
3452 // What a mess. The status register (12) can enable interrupts,
3453 // so needs a special case to handle a pending interrupt.
3454 // The interrupt must be taken immediately, because a subsequent
3455 // instruction might disable interrupts again.
7139f3c8 3456 if(copr==12||copr==13) {
fca1aef2 3457 if (is_delayslot) {
3458 // burn cycles to cause cc_interrupt, which will
3459 // reschedule next_interupt. Relies on CCREG from above.
3460 assem_debug("MTC0 DS %d\n", copr);
3461 emit_writeword(HOST_CCREG,(int)&last_count);
3462 emit_movimm(0,HOST_CCREG);
3463 emit_storereg(CCREG,HOST_CCREG);
caeefe31 3464 emit_loadreg(rs1[i],1);
fca1aef2 3465 emit_movimm(copr,0);
3466 emit_call((int)pcsx_mtc0_ds);
042c7287 3467 emit_loadreg(rs1[i],s);
fca1aef2 3468 return;
3469 }
63cb0298 3470 emit_movimm(start+i*4+4,HOST_TEMPREG);
3471 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
3472 emit_movimm(0,HOST_TEMPREG);
3473 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 3474 }
3475 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3476 //else
caeefe31 3477 if(s==HOST_CCREG)
3478 emit_loadreg(rs1[i],1);
3479 else if(s!=1)
63cb0298 3480 emit_mov(s,1);
fca1aef2 3481 emit_movimm(copr,0);
3482 emit_call((int)pcsx_mtc0);
7139f3c8 3483 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3484 emit_readword((int)&Count,HOST_CCREG);
042c7287 3485 emit_readword((int)&next_interupt,HOST_TEMPREG);
2573466a 3486 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 3487 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3488 emit_writeword(HOST_TEMPREG,(int)&last_count);
57871462 3489 emit_storereg(CCREG,HOST_CCREG);
3490 }
7139f3c8 3491 if(copr==12||copr==13) {
57871462 3492 assert(!is_delayslot);
3493 emit_readword((int)&pending_exception,14);
042c7287 3494 emit_test(14,14);
3495 emit_jne((int)&do_interrupt);
57871462 3496 }
3497 emit_loadreg(rs1[i],s);
3498 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3499 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 3500 cop1_usable=0;
3501 }
3502 else
3503 {
3504 assert(opcode2[i]==0x10);
576bbd8f 3505 if((source[i]&0x3f)==0x10) // RFE
3506 {
3507 emit_readword((int)&Status,0);
3508 emit_andimm(0,0x3c,1);
3509 emit_andimm(0,~0xf,0);
3510 emit_orrshr_imm(1,2,0);
3511 emit_writeword(0,(int)&Status);
3512 }
57871462 3513 }
3514}
3515
b9b61529 3516static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3517{
3518 switch (copr) {
3519 case 1:
3520 case 3:
3521 case 5:
3522 case 8:
3523 case 9:
3524 case 10:
3525 case 11:
3526 emit_readword((int)&reg_cop2d[copr],tl);
3527 emit_signextend16(tl,tl);
3528 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3529 break;
3530 case 7:
3531 case 16:
3532 case 17:
3533 case 18:
3534 case 19:
3535 emit_readword((int)&reg_cop2d[copr],tl);
3536 emit_andimm(tl,0xffff,tl);
3537 emit_writeword(tl,(int)&reg_cop2d[copr]);
3538 break;
3539 case 15:
3540 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3541 emit_writeword(tl,(int)&reg_cop2d[copr]);
3542 break;
3543 case 28:
b9b61529 3544 case 29:
3545 emit_readword((int)&reg_cop2d[9],temp);
3546 emit_testimm(temp,0x8000); // do we need this?
3547 emit_andimm(temp,0xf80,temp);
3548 emit_andne_imm(temp,0,temp);
f70d384d 3549 emit_shrimm(temp,7,tl);
b9b61529 3550 emit_readword((int)&reg_cop2d[10],temp);
3551 emit_testimm(temp,0x8000);
3552 emit_andimm(temp,0xf80,temp);
3553 emit_andne_imm(temp,0,temp);
f70d384d 3554 emit_orrshr_imm(temp,2,tl);
b9b61529 3555 emit_readword((int)&reg_cop2d[11],temp);
3556 emit_testimm(temp,0x8000);
3557 emit_andimm(temp,0xf80,temp);
3558 emit_andne_imm(temp,0,temp);
f70d384d 3559 emit_orrshl_imm(temp,3,tl);
b9b61529 3560 emit_writeword(tl,(int)&reg_cop2d[copr]);
3561 break;
3562 default:
3563 emit_readword((int)&reg_cop2d[copr],tl);
3564 break;
3565 }
3566}
3567
3568static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3569{
3570 switch (copr) {
3571 case 15:
3572 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3573 emit_writeword(sl,(int)&reg_cop2d[copr]);
3574 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3575 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3576 emit_writeword(sl,(int)&reg_cop2d[14]);
3577 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3578 break;
3579 case 28:
3580 emit_andimm(sl,0x001f,temp);
f70d384d 3581 emit_shlimm(temp,7,temp);
b9b61529 3582 emit_writeword(temp,(int)&reg_cop2d[9]);
3583 emit_andimm(sl,0x03e0,temp);
f70d384d 3584 emit_shlimm(temp,2,temp);
b9b61529 3585 emit_writeword(temp,(int)&reg_cop2d[10]);
3586 emit_andimm(sl,0x7c00,temp);
f70d384d 3587 emit_shrimm(temp,3,temp);
b9b61529 3588 emit_writeword(temp,(int)&reg_cop2d[11]);
3589 emit_writeword(sl,(int)&reg_cop2d[28]);
3590 break;
3591 case 30:
3592 emit_movs(sl,temp);
3593 emit_mvnmi(temp,temp);
665f33e1 3594#ifdef HAVE_ARMV5
b9b61529 3595 emit_clz(temp,temp);
665f33e1 3596#else
3597 emit_movs(temp,HOST_TEMPREG);
3598 emit_movimm(0,temp);
3599 emit_jeq((int)out+4*4);
3600 emit_addpl_imm(temp,1,temp);
3601 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3602 emit_jns((int)out-2*4);
3603#endif
b9b61529 3604 emit_writeword(sl,(int)&reg_cop2d[30]);
3605 emit_writeword(temp,(int)&reg_cop2d[31]);
3606 break;
b9b61529 3607 case 31:
3608 break;
3609 default:
3610 emit_writeword(sl,(int)&reg_cop2d[copr]);
3611 break;
3612 }
3613}
3614
e2b5e7aa 3615static void cop2_assemble(int i,struct regstat *i_regs)
b9b61529 3616{
3617 u_int copr=(source[i]>>11)&0x1f;
3618 signed char temp=get_reg(i_regs->regmap,-1);
3619 if (opcode2[i]==0) { // MFC2
3620 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3621 if(tl>=0&&rt1[i]!=0)
b9b61529 3622 cop2_get_dreg(copr,tl,temp);
3623 }
3624 else if (opcode2[i]==4) { // MTC2
3625 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3626 cop2_put_dreg(copr,sl,temp);
3627 }
3628 else if (opcode2[i]==2) // CFC2
3629 {
3630 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3631 if(tl>=0&&rt1[i]!=0)
b9b61529 3632 emit_readword((int)&reg_cop2c[copr],tl);
3633 }
3634 else if (opcode2[i]==6) // CTC2
3635 {
3636 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3637 switch(copr) {
3638 case 4:
3639 case 12:
3640 case 20:
3641 case 26:
3642 case 27:
3643 case 29:
3644 case 30:
3645 emit_signextend16(sl,temp);
3646 break;
3647 case 31:
3648 //value = value & 0x7ffff000;
3649 //if (value & 0x7f87e000) value |= 0x80000000;
3650 emit_shrimm(sl,12,temp);
3651 emit_shlimm(temp,12,temp);
3652 emit_testimm(temp,0x7f000000);
3653 emit_testeqimm(temp,0x00870000);
3654 emit_testeqimm(temp,0x0000e000);
3655 emit_orrne_imm(temp,0x80000000,temp);
3656 break;
3657 default:
3658 temp=sl;
3659 break;
3660 }
3661 emit_writeword(temp,(int)&reg_cop2c[copr]);
3662 assert(sl>=0);
3663 }
3664}
3665
054175e9 3666static void c2op_prologue(u_int op,u_int reglist)
3667{
3668 save_regs_all(reglist);
82ed88eb 3669#ifdef PCNT
3670 emit_movimm(op,0);
3671 emit_call((int)pcnt_gte_start);
3672#endif
054175e9 3673 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3674}
3675
3676static void c2op_epilogue(u_int op,u_int reglist)
3677{
82ed88eb 3678#ifdef PCNT
3679 emit_movimm(op,0);
3680 emit_call((int)pcnt_gte_end);
3681#endif
054175e9 3682 restore_regs_all(reglist);
3683}
3684
6c0eefaf 3685static void c2op_call_MACtoIR(int lm,int need_flags)
3686{
3687 if(need_flags)
3688 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
3689 else
3690 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
3691}
3692
3693static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3694{
3695 emit_call((int)func);
3696 // func is C code and trashes r0
3697 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3698 if(need_flags||need_ir)
3699 c2op_call_MACtoIR(lm,need_flags);
3700 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
3701}
3702
054175e9 3703static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 3704{
b9b61529 3705 u_int c2op=source[i]&0x3f;
6c0eefaf 3706 u_int hr,reglist_full=0,reglist;
054175e9 3707 int need_flags,need_ir;
b9b61529 3708 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 3709 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 3710 }
4d646738 3711 reglist=reglist_full&CALLER_SAVE_REGS;
b9b61529 3712
3713 if (gte_handlers[c2op]!=NULL) {
bedfea38 3714 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 3715 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 3716 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3717 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 3718 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3719 need_flags=0;
6c0eefaf 3720 int shift = (source[i] >> 19) & 1;
3721 int lm = (source[i] >> 10) & 1;
054175e9 3722 switch(c2op) {
19776aef 3723#ifndef DRC_DBG
054175e9 3724 case GTE_MVMVA: {
82336ba3 3725#ifdef HAVE_ARMV5
054175e9 3726 int v = (source[i] >> 15) & 3;
3727 int cv = (source[i] >> 13) & 3;
3728 int mx = (source[i] >> 17) & 3;
4d646738 3729 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
054175e9 3730 c2op_prologue(c2op,reglist);
3731 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3732 if(v<3)
3733 emit_ldrd(v*8,0,4);
3734 else {
3735 emit_movzwl_indexed(9*4,0,4); // gteIR
3736 emit_movzwl_indexed(10*4,0,6);
3737 emit_movzwl_indexed(11*4,0,5);
3738 emit_orrshl_imm(6,16,4);
3739 }
3740 if(mx<3)
3741 emit_addimm(0,32*4+mx*8*4,6);
3742 else
3743 emit_readword((int)&zeromem_ptr,6);
3744 if(cv<3)
3745 emit_addimm(0,32*4+(cv*8+5)*4,7);
3746 else
3747 emit_readword((int)&zeromem_ptr,7);
3748#ifdef __ARM_NEON__
3749 emit_movimm(source[i],1); // opcode
3750 emit_call((int)gteMVMVA_part_neon);
3751 if(need_flags) {
3752 emit_movimm(lm,1);
3753 emit_call((int)gteMACtoIR_flags_neon);
3754 }
3755#else
3756 if(cv==3&&shift)
3757 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3758 else {
3759 emit_movimm(shift,1);
3760 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3761 }
6c0eefaf 3762 if(need_flags||need_ir)
3763 c2op_call_MACtoIR(lm,need_flags);
82336ba3 3764#endif
3765#else /* if not HAVE_ARMV5 */
3766 c2op_prologue(c2op,reglist);
3767 emit_movimm(source[i],1); // opcode
3768 emit_writeword(1,(int)&psxRegs.code);
3769 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
054175e9 3770#endif
3771 break;
3772 }
6c0eefaf 3773 case GTE_OP:
3774 c2op_prologue(c2op,reglist);
3775 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
3776 if(need_flags||need_ir) {
3777 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3778 c2op_call_MACtoIR(lm,need_flags);
3779 }
3780 break;
3781 case GTE_DPCS:
3782 c2op_prologue(c2op,reglist);
3783 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3784 break;
3785 case GTE_INTPL:
3786 c2op_prologue(c2op,reglist);
3787 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3788 break;
3789 case GTE_SQR:
3790 c2op_prologue(c2op,reglist);
3791 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
3792 if(need_flags||need_ir) {
3793 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3794 c2op_call_MACtoIR(lm,need_flags);
3795 }
3796 break;
3797 case GTE_DCPL:
3798 c2op_prologue(c2op,reglist);
3799 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3800 break;
3801 case GTE_GPF:
3802 c2op_prologue(c2op,reglist);
3803 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3804 break;
3805 case GTE_GPL:
3806 c2op_prologue(c2op,reglist);
3807 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3808 break;
19776aef 3809#endif
054175e9 3810 default:
054175e9 3811 c2op_prologue(c2op,reglist);
19776aef 3812#ifdef DRC_DBG
3813 emit_movimm(source[i],1); // opcode
3814 emit_writeword(1,(int)&psxRegs.code);
3815#endif
054175e9 3816 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3817 break;
3818 }
3819 c2op_epilogue(c2op,reglist);
3820 }
b9b61529 3821}
3822
e2b5e7aa 3823static void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3824{
3825 // XXX: should just just do the exception instead
3826 if(!cop1_usable) {
3827 int jaddr=(int)out;
3828 emit_jmp(0);
3829 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3830 cop1_usable=1;
3831 }
3832}
3833
e2b5e7aa 3834static void cop1_assemble(int i,struct regstat *i_regs)
57871462 3835{
3d624f89 3836 cop1_unusable(i, i_regs);
57871462 3837}
3838
e2b5e7aa 3839static void fconv_assemble_arm(int i,struct regstat *i_regs)
57871462 3840{
3d624f89 3841 cop1_unusable(i, i_regs);
57871462 3842}
3843#define fconv_assemble fconv_assemble_arm
3844
e2b5e7aa 3845static void fcomp_assemble(int i,struct regstat *i_regs)
57871462 3846{
3d624f89 3847 cop1_unusable(i, i_regs);
57871462 3848}
3849
e2b5e7aa 3850static void float_assemble(int i,struct regstat *i_regs)
57871462 3851{
3d624f89 3852 cop1_unusable(i, i_regs);
57871462 3853}
3854
e2b5e7aa 3855static void multdiv_assemble_arm(int i,struct regstat *i_regs)
57871462 3856{
3857 // case 0x18: MULT
3858 // case 0x19: MULTU
3859 // case 0x1A: DIV
3860 // case 0x1B: DIVU
3861 // case 0x1C: DMULT
3862 // case 0x1D: DMULTU
3863 // case 0x1E: DDIV
3864 // case 0x1F: DDIVU
3865 if(rs1[i]&&rs2[i])
3866 {
3867 if((opcode2[i]&4)==0) // 32-bit
3868 {
3869 if(opcode2[i]==0x18) // MULT
3870 {
3871 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3872 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3873 signed char hi=get_reg(i_regs->regmap,HIREG);
3874 signed char lo=get_reg(i_regs->regmap,LOREG);
3875 assert(m1>=0);
3876 assert(m2>=0);
3877 assert(hi>=0);
3878 assert(lo>=0);
3879 emit_smull(m1,m2,hi,lo);
3880 }
3881 if(opcode2[i]==0x19) // MULTU
3882 {
3883 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3884 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3885 signed char hi=get_reg(i_regs->regmap,HIREG);
3886 signed char lo=get_reg(i_regs->regmap,LOREG);
3887 assert(m1>=0);
3888 assert(m2>=0);
3889 assert(hi>=0);
3890 assert(lo>=0);
3891 emit_umull(m1,m2,hi,lo);
3892 }
3893 if(opcode2[i]==0x1A) // DIV
3894 {
3895 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3896 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3897 assert(d1>=0);
3898 assert(d2>=0);
3899 signed char quotient=get_reg(i_regs->regmap,LOREG);
3900 signed char remainder=get_reg(i_regs->regmap,HIREG);
3901 assert(quotient>=0);
3902 assert(remainder>=0);
3903 emit_movs(d1,remainder);
44a80f6a 3904 emit_movimm(0xffffffff,quotient);
3905 emit_negmi(quotient,quotient); // .. quotient and ..
3906 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 3907 emit_movs(d2,HOST_TEMPREG);
3908 emit_jeq((int)out+52); // Division by zero
82336ba3 3909 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 3910#ifdef HAVE_ARMV5
57871462 3911 emit_clz(HOST_TEMPREG,quotient);
3912 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 3913#else
3914 emit_movimm(0,quotient);
3915 emit_addpl_imm(quotient,1,quotient);
3916 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3917 emit_jns((int)out-2*4);
3918#endif
57871462 3919 emit_orimm(quotient,1<<31,quotient);
3920 emit_shr(quotient,quotient,quotient);
3921 emit_cmp(remainder,HOST_TEMPREG);
3922 emit_subcs(remainder,HOST_TEMPREG,remainder);
3923 emit_adcs(quotient,quotient,quotient);
3924 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
3925 emit_jcc((int)out-16); // -4
3926 emit_teq(d1,d2);
3927 emit_negmi(quotient,quotient);
3928 emit_test(d1,d1);
3929 emit_negmi(remainder,remainder);
3930 }
3931 if(opcode2[i]==0x1B) // DIVU
3932 {
3933 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3934 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3935 assert(d1>=0);
3936 assert(d2>=0);
3937 signed char quotient=get_reg(i_regs->regmap,LOREG);
3938 signed char remainder=get_reg(i_regs->regmap,HIREG);
3939 assert(quotient>=0);
3940 assert(remainder>=0);
44a80f6a 3941 emit_mov(d1,remainder);
3942 emit_movimm(0xffffffff,quotient); // div0 case
57871462 3943 emit_test(d2,d2);
44a80f6a 3944 emit_jeq((int)out+40); // Division by zero
665f33e1 3945#ifdef HAVE_ARMV5
57871462 3946 emit_clz(d2,HOST_TEMPREG);
3947 emit_movimm(1<<31,quotient);
3948 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 3949#else
3950 emit_movimm(0,HOST_TEMPREG);
82336ba3 3951 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3952 emit_lslpls_imm(d2,1,d2);
665f33e1 3953 emit_jns((int)out-2*4);
3954 emit_movimm(1<<31,quotient);
3955#endif
57871462 3956 emit_shr(quotient,HOST_TEMPREG,quotient);
3957 emit_cmp(remainder,d2);
3958 emit_subcs(remainder,d2,remainder);
3959 emit_adcs(quotient,quotient,quotient);
3960 emit_shrcc_imm(d2,1,d2);
3961 emit_jcc((int)out-16); // -4
3962 }
3963 }
3964 else // 64-bit
71e490c5 3965 assert(0);
57871462 3966 }
3967 else
3968 {
3969 // Multiply by zero is zero.
3970 // MIPS does not have a divide by zero exception.
3971 // The result is undefined, we return zero.
3972 signed char hr=get_reg(i_regs->regmap,HIREG);
3973 signed char lr=get_reg(i_regs->regmap,LOREG);
3974 if(hr>=0) emit_zeroreg(hr);
3975 if(lr>=0) emit_zeroreg(lr);
3976 }
3977}
3978#define multdiv_assemble multdiv_assemble_arm
3979
e2b5e7aa 3980static void do_preload_rhash(int r) {
57871462 3981 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
3982 // register. On ARM the hash can be done with a single instruction (below)
3983}
3984
e2b5e7aa 3985static void do_preload_rhtbl(int ht) {
57871462 3986 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
3987}
3988
e2b5e7aa 3989static void do_rhash(int rs,int rh) {
57871462 3990 emit_andimm(rs,0xf8,rh);
3991}
3992
e2b5e7aa 3993static void do_miniht_load(int ht,int rh) {
57871462 3994 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
3995 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
3996}
3997
e2b5e7aa 3998static void do_miniht_jump(int rs,int rh,int ht) {
57871462 3999 emit_cmp(rh,rs);
4000 emit_ldreq_indexed(ht,4,15);
4001 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4002 emit_mov(rs,7);
4003 emit_jmp(jump_vaddr_reg[7]);
4004 #else
4005 emit_jmp(jump_vaddr_reg[rs]);
4006 #endif
4007}
4008
e2b5e7aa 4009static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 4010 #ifndef HAVE_ARMV7
57871462 4011 emit_movimm(return_address,rt); // PC into link register
4012 add_to_linker((int)out,return_address,1);
4013 emit_pcreladdr(temp);
4014 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4015 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4016 #else
4017 emit_movw(return_address&0x0000FFFF,rt);
4018 add_to_linker((int)out,return_address,1);
4019 emit_pcreladdr(temp);
4020 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4021 emit_movt(return_address&0xFFFF0000,rt);
4022 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4023 #endif
4024}
4025
e2b5e7aa 4026static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
57871462 4027{
4028 //if(dirty_pre==dirty) return;
581335b0 4029 int hr,reg;
57871462 4030 for(hr=0;hr<HOST_REGS;hr++) {
4031 if(hr!=EXCLUDE_REG) {
4032 reg=pre[hr];
4033 if(((~u)>>(reg&63))&1) {
f776eb14 4034 if(reg>0) {
57871462 4035 if(((dirty_pre&~dirty)>>hr)&1) {
4036 if(reg>0&&reg<34) {
4037 emit_storereg(reg,hr);
4038 if( ((is32_pre&~uu)>>reg)&1 ) {
4039 emit_sarimm(hr,31,HOST_TEMPREG);
4040 emit_storereg(reg|64,HOST_TEMPREG);
4041 }
4042 }
4043 else if(reg>=64) {
4044 emit_storereg(reg,hr);
4045 }
4046 }
4047 }
57871462 4048 }
4049 }
4050 }
4051}
4052
4053
4054/* using strd could possibly help but you'd have to allocate registers in pairs
e2b5e7aa 4055static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
57871462 4056{
4057 int hr;
4058 int wrote=-1;
4059 for(hr=HOST_REGS-1;hr>=0;hr--) {
4060 if(hr!=EXCLUDE_REG) {
4061 if(pre[hr]!=entry[hr]) {
4062 if(pre[hr]>=0) {
4063 if((dirty>>hr)&1) {
4064 if(get_reg(entry,pre[hr])<0) {
4065 if(pre[hr]<64) {
4066 if(!((u>>pre[hr])&1)) {
4067 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4068 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4069 emit_sarimm(hr,31,hr+1);
4070 emit_strdreg(pre[hr],hr);
4071 }
4072 else
4073 emit_storereg(pre[hr],hr);
4074 }else{
4075 emit_storereg(pre[hr],hr);
4076 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4077 emit_sarimm(hr,31,hr);
4078 emit_storereg(pre[hr]|64,hr);
4079 }
4080 }
4081 }
4082 }else{
4083 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4084 emit_storereg(pre[hr],hr);
4085 }
4086 }
4087 wrote=hr;
4088 }
4089 }
4090 }
4091 }
4092 }
4093 }
4094 for(hr=0;hr<HOST_REGS;hr++) {
4095 if(hr!=EXCLUDE_REG) {
4096 if(pre[hr]!=entry[hr]) {
4097 if(pre[hr]>=0) {
4098 int nr;
4099 if((nr=get_reg(entry,pre[hr]))>=0) {
4100 emit_mov(hr,nr);
4101 }
4102 }
4103 }
4104 }
4105 }
4106}
4107#define wb_invalidate wb_invalidate_arm
4108*/
4109
d148d265 4110static void mark_clear_cache(void *target)
4111{
4112 u_long offset = (char *)target - (char *)BASE_ADDR;
4113 u_int mask = 1u << ((offset >> 12) & 31);
4114 if (!(needs_clear_cache[offset >> 17] & mask)) {
4115 char *start = (char *)((u_long)target & ~4095ul);
4116 start_tcache_write(start, start + 4096);
4117 needs_clear_cache[offset >> 17] |= mask;
4118 }
4119}
4120
dd3a91a1 4121// Clearing the cache is rather slow on ARM Linux, so mark the areas
4122// that need to be cleared, and then only clear these areas once.
e2b5e7aa 4123static void do_clear_cache()
dd3a91a1 4124{
4125 int i,j;
4126 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4127 {
4128 u_int bitmap=needs_clear_cache[i];
4129 if(bitmap) {
4130 u_int start,end;
9f51b4b9 4131 for(j=0;j<32;j++)
dd3a91a1 4132 {
4133 if(bitmap&(1<<j)) {
bdeade46 4134 start=(u_int)BASE_ADDR+i*131072+j*4096;
dd3a91a1 4135 end=start+4095;
4136 j++;
4137 while(j<32) {
4138 if(bitmap&(1<<j)) {
4139 end+=4096;
4140 j++;
4141 }else{
d148d265 4142 end_tcache_write((void *)start,(void *)end);
dd3a91a1 4143 break;
4144 }
4145 }
4146 }
4147 }
4148 needs_clear_cache[i]=0;
4149 }
4150 }
4151}
4152
57871462 4153// CPU-architecture-specific initialization
71e490c5 4154static void arch_init() {
57871462 4155}
b9b61529 4156
4157// vim:shiftwidth=2:expandtab