some drc debug patches
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
29#include "arm_features.h"
30
31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33char *translation_cache;
34#else
35char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
44#define unused __attribute__((unused))
45
46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
52extern int cycle_count;
53extern int last_count;
54extern int pcaddr;
55extern int pending_exception;
56extern int branch_target;
57extern uint64_t readmem_dword;
58extern void *dynarec_local;
59extern u_int mini_ht[32][2];
60
61void indirect_jump_indexed();
62void indirect_jump();
63void do_interrupt();
64void jump_vaddr_r0();
65void jump_vaddr_r1();
66void jump_vaddr_r2();
67void jump_vaddr_r3();
68void jump_vaddr_r4();
69void jump_vaddr_r5();
70void jump_vaddr_r6();
71void jump_vaddr_r7();
72void jump_vaddr_r8();
73void jump_vaddr_r9();
74void jump_vaddr_r10();
75void jump_vaddr_r12();
76
77const u_int jump_vaddr_reg[16] = {
78 (int)jump_vaddr_r0,
79 (int)jump_vaddr_r1,
80 (int)jump_vaddr_r2,
81 (int)jump_vaddr_r3,
82 (int)jump_vaddr_r4,
83 (int)jump_vaddr_r5,
84 (int)jump_vaddr_r6,
85 (int)jump_vaddr_r7,
86 (int)jump_vaddr_r8,
87 (int)jump_vaddr_r9,
88 (int)jump_vaddr_r10,
89 0,
90 (int)jump_vaddr_r12,
91 0,
92 0,
93 0};
94
95void invalidate_addr_r0();
96void invalidate_addr_r1();
97void invalidate_addr_r2();
98void invalidate_addr_r3();
99void invalidate_addr_r4();
100void invalidate_addr_r5();
101void invalidate_addr_r6();
102void invalidate_addr_r7();
103void invalidate_addr_r8();
104void invalidate_addr_r9();
105void invalidate_addr_r10();
106void invalidate_addr_r12();
107
108const u_int invalidate_addr_reg[16] = {
109 (int)invalidate_addr_r0,
110 (int)invalidate_addr_r1,
111 (int)invalidate_addr_r2,
112 (int)invalidate_addr_r3,
113 (int)invalidate_addr_r4,
114 (int)invalidate_addr_r5,
115 (int)invalidate_addr_r6,
116 (int)invalidate_addr_r7,
117 (int)invalidate_addr_r8,
118 (int)invalidate_addr_r9,
119 (int)invalidate_addr_r10,
120 0,
121 (int)invalidate_addr_r12,
122 0,
123 0,
124 0};
125
126static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
127
128/* Linker */
129
130static void set_jump_target(int addr,u_int target)
131{
132 u_char *ptr=(u_char *)addr;
133 u_int *ptr2=(u_int *)ptr;
134 if(ptr[3]==0xe2) {
135 assert((target-(u_int)ptr2-8)<1024);
136 assert((addr&3)==0);
137 assert((target&3)==0);
138 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
139 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
140 }
141 else if(ptr[3]==0x72) {
142 // generated by emit_jno_unlikely
143 if((target-(u_int)ptr2-8)<1024) {
144 assert((addr&3)==0);
145 assert((target&3)==0);
146 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
147 }
148 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
149 assert((addr&3)==0);
150 assert((target&3)==0);
151 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
152 }
153 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
154 }
155 else {
156 assert((ptr[3]&0x0e)==0xa);
157 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
158 }
159}
160
161// This optionally copies the instruction from the target of the branch into
162// the space before the branch. Works, but the difference in speed is
163// usually insignificant.
164#if 0
165static void set_jump_target_fillslot(int addr,u_int target,int copy)
166{
167 u_char *ptr=(u_char *)addr;
168 u_int *ptr2=(u_int *)ptr;
169 assert(!copy||ptr2[-1]==0xe28dd000);
170 if(ptr[3]==0xe2) {
171 assert(!copy);
172 assert((target-(u_int)ptr2-8)<4096);
173 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
174 }
175 else {
176 assert((ptr[3]&0x0e)==0xa);
177 u_int target_insn=*(u_int *)target;
178 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
179 copy=0;
180 }
181 if((target_insn&0x0c100000)==0x04100000) { // Load
182 copy=0;
183 }
184 if(target_insn&0x08000000) {
185 copy=0;
186 }
187 if(copy) {
188 ptr2[-1]=target_insn;
189 target+=4;
190 }
191 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
192 }
193}
194#endif
195
196/* Literal pool */
197static void add_literal(int addr,int val)
198{
199 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
200 literals[literalcount][0]=addr;
201 literals[literalcount][1]=val;
202 literalcount++;
203}
204
205// from a pointer to external jump stub (which was produced by emit_extjump2)
206// find where the jumping insn is
207static void *find_extjump_insn(void *stub)
208{
209 int *ptr=(int *)(stub+4);
210 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
211 u_int offset=*ptr&0xfff;
212 void **l_ptr=(void *)ptr+offset+8;
213 return *l_ptr;
214}
215
216// find where external branch is liked to using addr of it's stub:
217// get address that insn one after stub loads (dyna_linker arg1),
218// treat it as a pointer to branch insn,
219// return addr where that branch jumps to
220static int get_pointer(void *stub)
221{
222 //printf("get_pointer(%x)\n",(int)stub);
223 int *i_ptr=find_extjump_insn(stub);
224 assert((*i_ptr&0x0f000000)==0x0a000000);
225 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
226}
227
228// Find the "clean" entry point from a "dirty" entry point
229// by skipping past the call to verify_code
230static u_int get_clean_addr(int addr)
231{
232 int *ptr=(int *)addr;
233 #ifndef HAVE_ARMV7
234 ptr+=4;
235 #else
236 ptr+=6;
237 #endif
238 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
239 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
240 ptr++;
241 if((*ptr&0xFF000000)==0xea000000) {
242 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
243 }
244 return (u_int)ptr;
245}
246
247static int verify_dirty(u_int *ptr)
248{
249 #ifndef HAVE_ARMV7
250 u_int offset;
251 // get from literal pool
252 assert((*ptr&0xFFFF0000)==0xe59f0000);
253 offset=*ptr&0xfff;
254 u_int source=*(u_int*)((void *)ptr+offset+8);
255 ptr++;
256 assert((*ptr&0xFFFF0000)==0xe59f0000);
257 offset=*ptr&0xfff;
258 u_int copy=*(u_int*)((void *)ptr+offset+8);
259 ptr++;
260 assert((*ptr&0xFFFF0000)==0xe59f0000);
261 offset=*ptr&0xfff;
262 u_int len=*(u_int*)((void *)ptr+offset+8);
263 ptr++;
264 ptr++;
265 #else
266 // ARMv7 movw/movt
267 assert((*ptr&0xFFF00000)==0xe3000000);
268 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
269 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
270 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
271 ptr+=6;
272 #endif
273 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
274 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
275 //printf("verify_dirty: %x %x %x\n",source,copy,len);
276 return !memcmp((void *)source,(void *)copy,len);
277}
278
279// This doesn't necessarily find all clean entry points, just
280// guarantees that it's not dirty
281static int isclean(int addr)
282{
283 #ifndef HAVE_ARMV7
284 u_int *ptr=((u_int *)addr)+4;
285 #else
286 u_int *ptr=((u_int *)addr)+6;
287 #endif
288 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
289 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
290 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
291 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
292 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
293 return 1;
294}
295
296// get source that block at addr was compiled from (host pointers)
297static void get_bounds(int addr,u_int *start,u_int *end)
298{
299 u_int *ptr=(u_int *)addr;
300 #ifndef HAVE_ARMV7
301 u_int offset;
302 // get from literal pool
303 assert((*ptr&0xFFFF0000)==0xe59f0000);
304 offset=*ptr&0xfff;
305 u_int source=*(u_int*)((void *)ptr+offset+8);
306 ptr++;
307 //assert((*ptr&0xFFFF0000)==0xe59f0000);
308 //offset=*ptr&0xfff;
309 //u_int copy=*(u_int*)((void *)ptr+offset+8);
310 ptr++;
311 assert((*ptr&0xFFFF0000)==0xe59f0000);
312 offset=*ptr&0xfff;
313 u_int len=*(u_int*)((void *)ptr+offset+8);
314 ptr++;
315 ptr++;
316 #else
317 // ARMv7 movw/movt
318 assert((*ptr&0xFFF00000)==0xe3000000);
319 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
320 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
321 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
322 ptr+=6;
323 #endif
324 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
325 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
326 *start=source;
327 *end=source+len;
328}
329
330/* Register allocation */
331
332// Note: registers are allocated clean (unmodified state)
333// if you intend to modify the register, you must call dirty_reg().
334static void alloc_reg(struct regstat *cur,int i,signed char reg)
335{
336 int r,hr;
337 int preferred_reg = (reg&7);
338 if(reg==CCREG) preferred_reg=HOST_CCREG;
339 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
340
341 // Don't allocate unused registers
342 if((cur->u>>reg)&1) return;
343
344 // see if it's already allocated
345 for(hr=0;hr<HOST_REGS;hr++)
346 {
347 if(cur->regmap[hr]==reg) return;
348 }
349
350 // Keep the same mapping if the register was already allocated in a loop
351 preferred_reg = loop_reg(i,reg,preferred_reg);
352
353 // Try to allocate the preferred register
354 if(cur->regmap[preferred_reg]==-1) {
355 cur->regmap[preferred_reg]=reg;
356 cur->dirty&=~(1<<preferred_reg);
357 cur->isconst&=~(1<<preferred_reg);
358 return;
359 }
360 r=cur->regmap[preferred_reg];
361 if(r<64&&((cur->u>>r)&1)) {
362 cur->regmap[preferred_reg]=reg;
363 cur->dirty&=~(1<<preferred_reg);
364 cur->isconst&=~(1<<preferred_reg);
365 return;
366 }
367 if(r>=64&&((cur->uu>>(r&63))&1)) {
368 cur->regmap[preferred_reg]=reg;
369 cur->dirty&=~(1<<preferred_reg);
370 cur->isconst&=~(1<<preferred_reg);
371 return;
372 }
373
374 // Clear any unneeded registers
375 // We try to keep the mapping consistent, if possible, because it
376 // makes branches easier (especially loops). So we try to allocate
377 // first (see above) before removing old mappings. If this is not
378 // possible then go ahead and clear out the registers that are no
379 // longer needed.
380 for(hr=0;hr<HOST_REGS;hr++)
381 {
382 r=cur->regmap[hr];
383 if(r>=0) {
384 if(r<64) {
385 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
386 }
387 else
388 {
389 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
390 }
391 }
392 }
393 // Try to allocate any available register, but prefer
394 // registers that have not been used recently.
395 if(i>0) {
396 for(hr=0;hr<HOST_REGS;hr++) {
397 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
398 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
399 cur->regmap[hr]=reg;
400 cur->dirty&=~(1<<hr);
401 cur->isconst&=~(1<<hr);
402 return;
403 }
404 }
405 }
406 }
407 // Try to allocate any available register
408 for(hr=0;hr<HOST_REGS;hr++) {
409 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
410 cur->regmap[hr]=reg;
411 cur->dirty&=~(1<<hr);
412 cur->isconst&=~(1<<hr);
413 return;
414 }
415 }
416
417 // Ok, now we have to evict someone
418 // Pick a register we hopefully won't need soon
419 u_char hsn[MAXREG+1];
420 memset(hsn,10,sizeof(hsn));
421 int j;
422 lsn(hsn,i,&preferred_reg);
423 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
424 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
425 if(i>0) {
426 // Don't evict the cycle count at entry points, otherwise the entry
427 // stub will have to write it.
428 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
429 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
430 for(j=10;j>=3;j--)
431 {
432 // Alloc preferred register if available
433 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
434 for(hr=0;hr<HOST_REGS;hr++) {
435 // Evict both parts of a 64-bit register
436 if((cur->regmap[hr]&63)==r) {
437 cur->regmap[hr]=-1;
438 cur->dirty&=~(1<<hr);
439 cur->isconst&=~(1<<hr);
440 }
441 }
442 cur->regmap[preferred_reg]=reg;
443 return;
444 }
445 for(r=1;r<=MAXREG;r++)
446 {
447 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
448 for(hr=0;hr<HOST_REGS;hr++) {
449 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
450 if(cur->regmap[hr]==r+64) {
451 cur->regmap[hr]=reg;
452 cur->dirty&=~(1<<hr);
453 cur->isconst&=~(1<<hr);
454 return;
455 }
456 }
457 }
458 for(hr=0;hr<HOST_REGS;hr++) {
459 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
460 if(cur->regmap[hr]==r) {
461 cur->regmap[hr]=reg;
462 cur->dirty&=~(1<<hr);
463 cur->isconst&=~(1<<hr);
464 return;
465 }
466 }
467 }
468 }
469 }
470 }
471 }
472 for(j=10;j>=0;j--)
473 {
474 for(r=1;r<=MAXREG;r++)
475 {
476 if(hsn[r]==j) {
477 for(hr=0;hr<HOST_REGS;hr++) {
478 if(cur->regmap[hr]==r+64) {
479 cur->regmap[hr]=reg;
480 cur->dirty&=~(1<<hr);
481 cur->isconst&=~(1<<hr);
482 return;
483 }
484 }
485 for(hr=0;hr<HOST_REGS;hr++) {
486 if(cur->regmap[hr]==r) {
487 cur->regmap[hr]=reg;
488 cur->dirty&=~(1<<hr);
489 cur->isconst&=~(1<<hr);
490 return;
491 }
492 }
493 }
494 }
495 }
496 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
497}
498
499static void alloc_reg64(struct regstat *cur,int i,signed char reg)
500{
501 int preferred_reg = 8+(reg&1);
502 int r,hr;
503
504 // allocate the lower 32 bits
505 alloc_reg(cur,i,reg);
506
507 // Don't allocate unused registers
508 if((cur->uu>>reg)&1) return;
509
510 // see if the upper half is already allocated
511 for(hr=0;hr<HOST_REGS;hr++)
512 {
513 if(cur->regmap[hr]==reg+64) return;
514 }
515
516 // Keep the same mapping if the register was already allocated in a loop
517 preferred_reg = loop_reg(i,reg,preferred_reg);
518
519 // Try to allocate the preferred register
520 if(cur->regmap[preferred_reg]==-1) {
521 cur->regmap[preferred_reg]=reg|64;
522 cur->dirty&=~(1<<preferred_reg);
523 cur->isconst&=~(1<<preferred_reg);
524 return;
525 }
526 r=cur->regmap[preferred_reg];
527 if(r<64&&((cur->u>>r)&1)) {
528 cur->regmap[preferred_reg]=reg|64;
529 cur->dirty&=~(1<<preferred_reg);
530 cur->isconst&=~(1<<preferred_reg);
531 return;
532 }
533 if(r>=64&&((cur->uu>>(r&63))&1)) {
534 cur->regmap[preferred_reg]=reg|64;
535 cur->dirty&=~(1<<preferred_reg);
536 cur->isconst&=~(1<<preferred_reg);
537 return;
538 }
539
540 // Clear any unneeded registers
541 // We try to keep the mapping consistent, if possible, because it
542 // makes branches easier (especially loops). So we try to allocate
543 // first (see above) before removing old mappings. If this is not
544 // possible then go ahead and clear out the registers that are no
545 // longer needed.
546 for(hr=HOST_REGS-1;hr>=0;hr--)
547 {
548 r=cur->regmap[hr];
549 if(r>=0) {
550 if(r<64) {
551 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
552 }
553 else
554 {
555 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
556 }
557 }
558 }
559 // Try to allocate any available register, but prefer
560 // registers that have not been used recently.
561 if(i>0) {
562 for(hr=0;hr<HOST_REGS;hr++) {
563 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
564 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
565 cur->regmap[hr]=reg|64;
566 cur->dirty&=~(1<<hr);
567 cur->isconst&=~(1<<hr);
568 return;
569 }
570 }
571 }
572 }
573 // Try to allocate any available register
574 for(hr=0;hr<HOST_REGS;hr++) {
575 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
576 cur->regmap[hr]=reg|64;
577 cur->dirty&=~(1<<hr);
578 cur->isconst&=~(1<<hr);
579 return;
580 }
581 }
582
583 // Ok, now we have to evict someone
584 // Pick a register we hopefully won't need soon
585 u_char hsn[MAXREG+1];
586 memset(hsn,10,sizeof(hsn));
587 int j;
588 lsn(hsn,i,&preferred_reg);
589 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
590 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
591 if(i>0) {
592 // Don't evict the cycle count at entry points, otherwise the entry
593 // stub will have to write it.
594 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
595 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
596 for(j=10;j>=3;j--)
597 {
598 // Alloc preferred register if available
599 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
600 for(hr=0;hr<HOST_REGS;hr++) {
601 // Evict both parts of a 64-bit register
602 if((cur->regmap[hr]&63)==r) {
603 cur->regmap[hr]=-1;
604 cur->dirty&=~(1<<hr);
605 cur->isconst&=~(1<<hr);
606 }
607 }
608 cur->regmap[preferred_reg]=reg|64;
609 return;
610 }
611 for(r=1;r<=MAXREG;r++)
612 {
613 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
614 for(hr=0;hr<HOST_REGS;hr++) {
615 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
616 if(cur->regmap[hr]==r+64) {
617 cur->regmap[hr]=reg|64;
618 cur->dirty&=~(1<<hr);
619 cur->isconst&=~(1<<hr);
620 return;
621 }
622 }
623 }
624 for(hr=0;hr<HOST_REGS;hr++) {
625 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
626 if(cur->regmap[hr]==r) {
627 cur->regmap[hr]=reg|64;
628 cur->dirty&=~(1<<hr);
629 cur->isconst&=~(1<<hr);
630 return;
631 }
632 }
633 }
634 }
635 }
636 }
637 }
638 for(j=10;j>=0;j--)
639 {
640 for(r=1;r<=MAXREG;r++)
641 {
642 if(hsn[r]==j) {
643 for(hr=0;hr<HOST_REGS;hr++) {
644 if(cur->regmap[hr]==r+64) {
645 cur->regmap[hr]=reg|64;
646 cur->dirty&=~(1<<hr);
647 cur->isconst&=~(1<<hr);
648 return;
649 }
650 }
651 for(hr=0;hr<HOST_REGS;hr++) {
652 if(cur->regmap[hr]==r) {
653 cur->regmap[hr]=reg|64;
654 cur->dirty&=~(1<<hr);
655 cur->isconst&=~(1<<hr);
656 return;
657 }
658 }
659 }
660 }
661 }
662 SysPrintf("This shouldn't happen");exit(1);
663}
664
665// Allocate a temporary register. This is done without regard to
666// dirty status or whether the register we request is on the unneeded list
667// Note: This will only allocate one register, even if called multiple times
668static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
669{
670 int r,hr;
671 int preferred_reg = -1;
672
673 // see if it's already allocated
674 for(hr=0;hr<HOST_REGS;hr++)
675 {
676 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
677 }
678
679 // Try to allocate any available register
680 for(hr=HOST_REGS-1;hr>=0;hr--) {
681 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
682 cur->regmap[hr]=reg;
683 cur->dirty&=~(1<<hr);
684 cur->isconst&=~(1<<hr);
685 return;
686 }
687 }
688
689 // Find an unneeded register
690 for(hr=HOST_REGS-1;hr>=0;hr--)
691 {
692 r=cur->regmap[hr];
693 if(r>=0) {
694 if(r<64) {
695 if((cur->u>>r)&1) {
696 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
697 cur->regmap[hr]=reg;
698 cur->dirty&=~(1<<hr);
699 cur->isconst&=~(1<<hr);
700 return;
701 }
702 }
703 }
704 else
705 {
706 if((cur->uu>>(r&63))&1) {
707 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
708 cur->regmap[hr]=reg;
709 cur->dirty&=~(1<<hr);
710 cur->isconst&=~(1<<hr);
711 return;
712 }
713 }
714 }
715 }
716 }
717
718 // Ok, now we have to evict someone
719 // Pick a register we hopefully won't need soon
720 // TODO: we might want to follow unconditional jumps here
721 // TODO: get rid of dupe code and make this into a function
722 u_char hsn[MAXREG+1];
723 memset(hsn,10,sizeof(hsn));
724 int j;
725 lsn(hsn,i,&preferred_reg);
726 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
727 if(i>0) {
728 // Don't evict the cycle count at entry points, otherwise the entry
729 // stub will have to write it.
730 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
731 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
732 for(j=10;j>=3;j--)
733 {
734 for(r=1;r<=MAXREG;r++)
735 {
736 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
737 for(hr=0;hr<HOST_REGS;hr++) {
738 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
739 if(cur->regmap[hr]==r+64) {
740 cur->regmap[hr]=reg;
741 cur->dirty&=~(1<<hr);
742 cur->isconst&=~(1<<hr);
743 return;
744 }
745 }
746 }
747 for(hr=0;hr<HOST_REGS;hr++) {
748 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
749 if(cur->regmap[hr]==r) {
750 cur->regmap[hr]=reg;
751 cur->dirty&=~(1<<hr);
752 cur->isconst&=~(1<<hr);
753 return;
754 }
755 }
756 }
757 }
758 }
759 }
760 }
761 for(j=10;j>=0;j--)
762 {
763 for(r=1;r<=MAXREG;r++)
764 {
765 if(hsn[r]==j) {
766 for(hr=0;hr<HOST_REGS;hr++) {
767 if(cur->regmap[hr]==r+64) {
768 cur->regmap[hr]=reg;
769 cur->dirty&=~(1<<hr);
770 cur->isconst&=~(1<<hr);
771 return;
772 }
773 }
774 for(hr=0;hr<HOST_REGS;hr++) {
775 if(cur->regmap[hr]==r) {
776 cur->regmap[hr]=reg;
777 cur->dirty&=~(1<<hr);
778 cur->isconst&=~(1<<hr);
779 return;
780 }
781 }
782 }
783 }
784 }
785 SysPrintf("This shouldn't happen");exit(1);
786}
787
788// Allocate a specific ARM register.
789static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
790{
791 int n;
792 int dirty=0;
793
794 // see if it's already allocated (and dealloc it)
795 for(n=0;n<HOST_REGS;n++)
796 {
797 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
798 dirty=(cur->dirty>>n)&1;
799 cur->regmap[n]=-1;
800 }
801 }
802
803 cur->regmap[hr]=reg;
804 cur->dirty&=~(1<<hr);
805 cur->dirty|=dirty<<hr;
806 cur->isconst&=~(1<<hr);
807}
808
809// Alloc cycle count into dedicated register
810static void alloc_cc(struct regstat *cur,int i)
811{
812 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
813}
814
815/* Special alloc */
816
817
818/* Assembler */
819
820static unused char regname[16][4] = {
821 "r0",
822 "r1",
823 "r2",
824 "r3",
825 "r4",
826 "r5",
827 "r6",
828 "r7",
829 "r8",
830 "r9",
831 "r10",
832 "fp",
833 "r12",
834 "sp",
835 "lr",
836 "pc"};
837
838static void output_w32(u_int word)
839{
840 *((u_int *)out)=word;
841 out+=4;
842}
843
844static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
845{
846 assert(rd<16);
847 assert(rn<16);
848 assert(rm<16);
849 return((rn<<16)|(rd<<12)|rm);
850}
851
852static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
853{
854 assert(rd<16);
855 assert(rn<16);
856 assert(imm<256);
857 assert((shift&1)==0);
858 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
859}
860
861static u_int genimm(u_int imm,u_int *encoded)
862{
863 *encoded=0;
864 if(imm==0) return 1;
865 int i=32;
866 while(i>0)
867 {
868 if(imm<256) {
869 *encoded=((i&30)<<7)|imm;
870 return 1;
871 }
872 imm=(imm>>2)|(imm<<30);i-=2;
873 }
874 return 0;
875}
876
877static void genimm_checked(u_int imm,u_int *encoded)
878{
879 u_int ret=genimm(imm,encoded);
880 assert(ret);
881 (void)ret;
882}
883
884static u_int genjmp(u_int addr)
885{
886 int offset=addr-(int)out-8;
887 if(offset<-33554432||offset>=33554432) {
888 if (addr>2) {
889 SysPrintf("genjmp: out of range: %08x\n", offset);
890 exit(1);
891 }
892 return 0;
893 }
894 return ((u_int)offset>>2)&0xffffff;
895}
896
897static void emit_mov(int rs,int rt)
898{
899 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
900 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
901}
902
903static void emit_movs(int rs,int rt)
904{
905 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
906 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
907}
908
909static void emit_add(int rs1,int rs2,int rt)
910{
911 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
912 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
913}
914
915static void emit_adds(int rs1,int rs2,int rt)
916{
917 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
918 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
919}
920
921static void emit_adcs(int rs1,int rs2,int rt)
922{
923 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
924 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
925}
926
927static void emit_sbc(int rs1,int rs2,int rt)
928{
929 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
930 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
931}
932
933static void emit_sbcs(int rs1,int rs2,int rt)
934{
935 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
936 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
937}
938
939static void emit_neg(int rs, int rt)
940{
941 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
942 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
943}
944
945static void emit_negs(int rs, int rt)
946{
947 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
948 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
949}
950
951static void emit_sub(int rs1,int rs2,int rt)
952{
953 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
954 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
955}
956
957static void emit_subs(int rs1,int rs2,int rt)
958{
959 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
960 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
961}
962
963static void emit_zeroreg(int rt)
964{
965 assem_debug("mov %s,#0\n",regname[rt]);
966 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
967}
968
969static void emit_loadlp(u_int imm,u_int rt)
970{
971 add_literal((int)out,imm);
972 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
973 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
974}
975
976static void emit_movw(u_int imm,u_int rt)
977{
978 assert(imm<65536);
979 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
980 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
981}
982
983static void emit_movt(u_int imm,u_int rt)
984{
985 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
986 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
987}
988
989static void emit_movimm(u_int imm,u_int rt)
990{
991 u_int armval;
992 if(genimm(imm,&armval)) {
993 assem_debug("mov %s,#%d\n",regname[rt],imm);
994 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
995 }else if(genimm(~imm,&armval)) {
996 assem_debug("mvn %s,#%d\n",regname[rt],imm);
997 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
998 }else if(imm<65536) {
999 #ifndef HAVE_ARMV7
1000 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1001 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1002 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1003 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1004 #else
1005 emit_movw(imm,rt);
1006 #endif
1007 }else{
1008 #ifndef HAVE_ARMV7
1009 emit_loadlp(imm,rt);
1010 #else
1011 emit_movw(imm&0x0000FFFF,rt);
1012 emit_movt(imm&0xFFFF0000,rt);
1013 #endif
1014 }
1015}
1016
1017static void emit_pcreladdr(u_int rt)
1018{
1019 assem_debug("add %s,pc,#?\n",regname[rt]);
1020 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1021}
1022
1023static void emit_loadreg(int r, int hr)
1024{
1025 if(r&64) {
1026 SysPrintf("64bit load in 32bit mode!\n");
1027 assert(0);
1028 return;
1029 }
1030 if((r&63)==0)
1031 emit_zeroreg(hr);
1032 else {
1033 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1034 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1035 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1036 if(r==CCREG) addr=(int)&cycle_count;
1037 if(r==CSREG) addr=(int)&Status;
1038 if(r==FSREG) addr=(int)&FCR31;
1039 if(r==INVCP) addr=(int)&invc_ptr;
1040 u_int offset = addr-(u_int)&dynarec_local;
1041 assert(offset<4096);
1042 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1043 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1044 }
1045}
1046
1047static void emit_storereg(int r, int hr)
1048{
1049 if(r&64) {
1050 SysPrintf("64bit store in 32bit mode!\n");
1051 assert(0);
1052 return;
1053 }
1054 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1055 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1056 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1057 if(r==CCREG) addr=(int)&cycle_count;
1058 if(r==FSREG) addr=(int)&FCR31;
1059 u_int offset = addr-(u_int)&dynarec_local;
1060 assert(offset<4096);
1061 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1062 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1063}
1064
1065static void emit_test(int rs, int rt)
1066{
1067 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1068 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1069}
1070
1071static void emit_testimm(int rs,int imm)
1072{
1073 u_int armval;
1074 assem_debug("tst %s,#%d\n",regname[rs],imm);
1075 genimm_checked(imm,&armval);
1076 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1077}
1078
1079static void emit_testeqimm(int rs,int imm)
1080{
1081 u_int armval;
1082 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
1083 genimm_checked(imm,&armval);
1084 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1085}
1086
1087static void emit_not(int rs,int rt)
1088{
1089 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1090 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1091}
1092
1093static void emit_mvnmi(int rs,int rt)
1094{
1095 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1096 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1097}
1098
1099static void emit_and(u_int rs1,u_int rs2,u_int rt)
1100{
1101 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1102 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1103}
1104
1105static void emit_or(u_int rs1,u_int rs2,u_int rt)
1106{
1107 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1108 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1109}
1110
1111static void emit_or_and_set_flags(int rs1,int rs2,int rt)
1112{
1113 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1114 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1115}
1116
1117static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1118{
1119 assert(rs<16);
1120 assert(rt<16);
1121 assert(imm<32);
1122 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1123 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1124}
1125
1126static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1127{
1128 assert(rs<16);
1129 assert(rt<16);
1130 assert(imm<32);
1131 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1132 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1133}
1134
1135static void emit_xor(u_int rs1,u_int rs2,u_int rt)
1136{
1137 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1138 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1139}
1140
1141static void emit_addimm(u_int rs,int imm,u_int rt)
1142{
1143 assert(rs<16);
1144 assert(rt<16);
1145 if(imm!=0) {
1146 u_int armval;
1147 if(genimm(imm,&armval)) {
1148 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1149 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1150 }else if(genimm(-imm,&armval)) {
1151 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
1152 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1153 #ifdef HAVE_ARMV7
1154 }else if(rt!=rs&&(u_int)imm<65536) {
1155 emit_movw(imm&0x0000ffff,rt);
1156 emit_add(rs,rt,rt);
1157 }else if(rt!=rs&&(u_int)-imm<65536) {
1158 emit_movw(-imm&0x0000ffff,rt);
1159 emit_sub(rs,rt,rt);
1160 #endif
1161 }else if((u_int)-imm<65536) {
1162 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1163 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1164 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1165 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1166 }else {
1167 do {
1168 int shift = (ffs(imm) - 1) & ~1;
1169 int imm8 = imm & (0xff << shift);
1170 genimm_checked(imm8,&armval);
1171 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1172 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1173 rs = rt;
1174 imm &= ~imm8;
1175 }
1176 while (imm != 0);
1177 }
1178 }
1179 else if(rs!=rt) emit_mov(rs,rt);
1180}
1181
1182static void emit_addimm_and_set_flags(int imm,int rt)
1183{
1184 assert(imm>-65536&&imm<65536);
1185 u_int armval;
1186 if(genimm(imm,&armval)) {
1187 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1188 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1189 }else if(genimm(-imm,&armval)) {
1190 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1191 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1192 }else if(imm<0) {
1193 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1194 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1195 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1196 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1197 }else{
1198 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1199 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1200 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1201 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1202 }
1203}
1204
1205static void emit_addimm_no_flags(u_int imm,u_int rt)
1206{
1207 emit_addimm(rt,imm,rt);
1208}
1209
1210static void emit_addnop(u_int r)
1211{
1212 assert(r<16);
1213 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1214 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1215}
1216
1217static void emit_adcimm(u_int rs,int imm,u_int rt)
1218{
1219 u_int armval;
1220 genimm_checked(imm,&armval);
1221 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1222 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1223}
1224
1225static void emit_rscimm(int rs,int imm,u_int rt)
1226{
1227 assert(0);
1228 u_int armval;
1229 genimm_checked(imm,&armval);
1230 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1231 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1232}
1233
1234static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1235{
1236 // TODO: if(genimm(imm,&armval)) ...
1237 // else
1238 emit_movimm(imm,HOST_TEMPREG);
1239 emit_adds(HOST_TEMPREG,rsl,rtl);
1240 emit_adcimm(rsh,0,rth);
1241}
1242
1243static void emit_andimm(int rs,int imm,int rt)
1244{
1245 u_int armval;
1246 if(imm==0) {
1247 emit_zeroreg(rt);
1248 }else if(genimm(imm,&armval)) {
1249 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1250 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1251 }else if(genimm(~imm,&armval)) {
1252 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1253 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1254 }else if(imm==65535) {
1255 #ifndef HAVE_ARMV6
1256 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1257 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1258 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1259 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1260 #else
1261 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1262 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1263 #endif
1264 }else{
1265 assert(imm>0&&imm<65535);
1266 #ifndef HAVE_ARMV7
1267 assem_debug("mov r14,#%d\n",imm&0xFF00);
1268 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1269 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1270 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1271 #else
1272 emit_movw(imm,HOST_TEMPREG);
1273 #endif
1274 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1275 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1276 }
1277}
1278
1279static void emit_orimm(int rs,int imm,int rt)
1280{
1281 u_int armval;
1282 if(imm==0) {
1283 if(rs!=rt) emit_mov(rs,rt);
1284 }else if(genimm(imm,&armval)) {
1285 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1286 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1287 }else{
1288 assert(imm>0&&imm<65536);
1289 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1290 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1291 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1292 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1293 }
1294}
1295
1296static void emit_xorimm(int rs,int imm,int rt)
1297{
1298 u_int armval;
1299 if(imm==0) {
1300 if(rs!=rt) emit_mov(rs,rt);
1301 }else if(genimm(imm,&armval)) {
1302 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1303 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1304 }else{
1305 assert(imm>0&&imm<65536);
1306 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1307 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1308 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1309 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1310 }
1311}
1312
1313static void emit_shlimm(int rs,u_int imm,int rt)
1314{
1315 assert(imm>0);
1316 assert(imm<32);
1317 //if(imm==1) ...
1318 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1319 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1320}
1321
1322static void emit_lsls_imm(int rs,int imm,int rt)
1323{
1324 assert(imm>0);
1325 assert(imm<32);
1326 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1327 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1328}
1329
1330static unused void emit_lslpls_imm(int rs,int imm,int rt)
1331{
1332 assert(imm>0);
1333 assert(imm<32);
1334 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1335 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1336}
1337
1338static void emit_shrimm(int rs,u_int imm,int rt)
1339{
1340 assert(imm>0);
1341 assert(imm<32);
1342 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1343 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1344}
1345
1346static void emit_sarimm(int rs,u_int imm,int rt)
1347{
1348 assert(imm>0);
1349 assert(imm<32);
1350 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1351 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1352}
1353
1354static void emit_rorimm(int rs,u_int imm,int rt)
1355{
1356 assert(imm>0);
1357 assert(imm<32);
1358 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1359 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1360}
1361
1362static void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1363{
1364 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1365 assert(imm>0);
1366 assert(imm<32);
1367 //if(imm==1) ...
1368 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1369 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1370 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1371 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1372}
1373
1374static void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1375{
1376 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1377 assert(imm>0);
1378 assert(imm<32);
1379 //if(imm==1) ...
1380 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1381 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1382 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1383 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1384}
1385
1386static void emit_signextend16(int rs,int rt)
1387{
1388 #ifndef HAVE_ARMV6
1389 emit_shlimm(rs,16,rt);
1390 emit_sarimm(rt,16,rt);
1391 #else
1392 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1393 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1394 #endif
1395}
1396
1397static void emit_signextend8(int rs,int rt)
1398{
1399 #ifndef HAVE_ARMV6
1400 emit_shlimm(rs,24,rt);
1401 emit_sarimm(rt,24,rt);
1402 #else
1403 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1404 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1405 #endif
1406}
1407
1408static void emit_shl(u_int rs,u_int shift,u_int rt)
1409{
1410 assert(rs<16);
1411 assert(rt<16);
1412 assert(shift<16);
1413 //if(imm==1) ...
1414 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1415 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1416}
1417
1418static void emit_shr(u_int rs,u_int shift,u_int rt)
1419{
1420 assert(rs<16);
1421 assert(rt<16);
1422 assert(shift<16);
1423 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1424 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1425}
1426
1427static void emit_sar(u_int rs,u_int shift,u_int rt)
1428{
1429 assert(rs<16);
1430 assert(rt<16);
1431 assert(shift<16);
1432 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1433 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1434}
1435
1436static void emit_orrshl(u_int rs,u_int shift,u_int rt)
1437{
1438 assert(rs<16);
1439 assert(rt<16);
1440 assert(shift<16);
1441 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1442 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1443}
1444
1445static void emit_orrshr(u_int rs,u_int shift,u_int rt)
1446{
1447 assert(rs<16);
1448 assert(rt<16);
1449 assert(shift<16);
1450 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1451 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1452}
1453
1454static void emit_cmpimm(int rs,int imm)
1455{
1456 u_int armval;
1457 if(genimm(imm,&armval)) {
1458 assem_debug("cmp %s,#%d\n",regname[rs],imm);
1459 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1460 }else if(genimm(-imm,&armval)) {
1461 assem_debug("cmn %s,#%d\n",regname[rs],imm);
1462 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1463 }else if(imm>0) {
1464 assert(imm<65536);
1465 emit_movimm(imm,HOST_TEMPREG);
1466 assem_debug("cmp %s,r14\n",regname[rs]);
1467 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1468 }else{
1469 assert(imm>-65536);
1470 emit_movimm(-imm,HOST_TEMPREG);
1471 assem_debug("cmn %s,r14\n",regname[rs]);
1472 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1473 }
1474}
1475
1476static void emit_cmovne_imm(int imm,int rt)
1477{
1478 assem_debug("movne %s,#%d\n",regname[rt],imm);
1479 u_int armval;
1480 genimm_checked(imm,&armval);
1481 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1482}
1483
1484static void emit_cmovl_imm(int imm,int rt)
1485{
1486 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1487 u_int armval;
1488 genimm_checked(imm,&armval);
1489 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1490}
1491
1492static void emit_cmovb_imm(int imm,int rt)
1493{
1494 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1495 u_int armval;
1496 genimm_checked(imm,&armval);
1497 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1498}
1499
1500static void emit_cmovs_imm(int imm,int rt)
1501{
1502 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1503 u_int armval;
1504 genimm_checked(imm,&armval);
1505 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1506}
1507
1508static void emit_cmove_reg(int rs,int rt)
1509{
1510 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1511 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1512}
1513
1514static void emit_cmovne_reg(int rs,int rt)
1515{
1516 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1517 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1518}
1519
1520static void emit_cmovl_reg(int rs,int rt)
1521{
1522 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1523 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1524}
1525
1526static void emit_cmovs_reg(int rs,int rt)
1527{
1528 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1529 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1530}
1531
1532static void emit_slti32(int rs,int imm,int rt)
1533{
1534 if(rs!=rt) emit_zeroreg(rt);
1535 emit_cmpimm(rs,imm);
1536 if(rs==rt) emit_movimm(0,rt);
1537 emit_cmovl_imm(1,rt);
1538}
1539
1540static void emit_sltiu32(int rs,int imm,int rt)
1541{
1542 if(rs!=rt) emit_zeroreg(rt);
1543 emit_cmpimm(rs,imm);
1544 if(rs==rt) emit_movimm(0,rt);
1545 emit_cmovb_imm(1,rt);
1546}
1547
1548static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1549{
1550 assert(rsh!=rt);
1551 emit_slti32(rsl,imm,rt);
1552 if(imm>=0)
1553 {
1554 emit_test(rsh,rsh);
1555 emit_cmovne_imm(0,rt);
1556 emit_cmovs_imm(1,rt);
1557 }
1558 else
1559 {
1560 emit_cmpimm(rsh,-1);
1561 emit_cmovne_imm(0,rt);
1562 emit_cmovl_imm(1,rt);
1563 }
1564}
1565
1566static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1567{
1568 assert(rsh!=rt);
1569 emit_sltiu32(rsl,imm,rt);
1570 if(imm>=0)
1571 {
1572 emit_test(rsh,rsh);
1573 emit_cmovne_imm(0,rt);
1574 }
1575 else
1576 {
1577 emit_cmpimm(rsh,-1);
1578 emit_cmovne_imm(1,rt);
1579 }
1580}
1581
1582static void emit_cmp(int rs,int rt)
1583{
1584 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1585 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1586}
1587
1588static void emit_set_gz32(int rs, int rt)
1589{
1590 //assem_debug("set_gz32\n");
1591 emit_cmpimm(rs,1);
1592 emit_movimm(1,rt);
1593 emit_cmovl_imm(0,rt);
1594}
1595
1596static void emit_set_nz32(int rs, int rt)
1597{
1598 //assem_debug("set_nz32\n");
1599 if(rs!=rt) emit_movs(rs,rt);
1600 else emit_test(rs,rs);
1601 emit_cmovne_imm(1,rt);
1602}
1603
1604static void emit_set_gz64_32(int rsh, int rsl, int rt)
1605{
1606 //assem_debug("set_gz64\n");
1607 emit_set_gz32(rsl,rt);
1608 emit_test(rsh,rsh);
1609 emit_cmovne_imm(1,rt);
1610 emit_cmovs_imm(0,rt);
1611}
1612
1613static void emit_set_nz64_32(int rsh, int rsl, int rt)
1614{
1615 //assem_debug("set_nz64\n");
1616 emit_or_and_set_flags(rsh,rsl,rt);
1617 emit_cmovne_imm(1,rt);
1618}
1619
1620static void emit_set_if_less32(int rs1, int rs2, int rt)
1621{
1622 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1623 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1624 emit_cmp(rs1,rs2);
1625 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1626 emit_cmovl_imm(1,rt);
1627}
1628
1629static void emit_set_if_carry32(int rs1, int rs2, int rt)
1630{
1631 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1632 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1633 emit_cmp(rs1,rs2);
1634 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1635 emit_cmovb_imm(1,rt);
1636}
1637
1638static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1639{
1640 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1641 assert(u1!=rt);
1642 assert(u2!=rt);
1643 emit_cmp(l1,l2);
1644 emit_movimm(0,rt);
1645 emit_sbcs(u1,u2,HOST_TEMPREG);
1646 emit_cmovl_imm(1,rt);
1647}
1648
1649static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1650{
1651 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1652 assert(u1!=rt);
1653 assert(u2!=rt);
1654 emit_cmp(l1,l2);
1655 emit_movimm(0,rt);
1656 emit_sbcs(u1,u2,HOST_TEMPREG);
1657 emit_cmovb_imm(1,rt);
1658}
1659
1660#ifdef DRC_DBG
1661extern void gen_interupt();
1662extern void do_insn_cmp();
1663#define FUNCNAME(f) { (intptr_t)f, " " #f }
1664static const struct {
1665 intptr_t addr;
1666 const char *name;
1667} function_names[] = {
1668 FUNCNAME(cc_interrupt),
1669 FUNCNAME(gen_interupt),
1670 FUNCNAME(get_addr_ht),
1671 FUNCNAME(get_addr),
1672 FUNCNAME(jump_handler_read8),
1673 FUNCNAME(jump_handler_read16),
1674 FUNCNAME(jump_handler_read32),
1675 FUNCNAME(jump_handler_write8),
1676 FUNCNAME(jump_handler_write16),
1677 FUNCNAME(jump_handler_write32),
1678 FUNCNAME(invalidate_addr),
1679 FUNCNAME(verify_code_vm),
1680 FUNCNAME(verify_code),
1681 FUNCNAME(jump_hlecall),
1682 FUNCNAME(jump_syscall_hle),
1683 FUNCNAME(new_dyna_leave),
1684 FUNCNAME(pcsx_mtc0),
1685 FUNCNAME(pcsx_mtc0_ds),
1686 FUNCNAME(do_insn_cmp),
1687};
1688
1689static const char *func_name(intptr_t a)
1690{
1691 int i;
1692 for (i = 0; i < sizeof(function_names)/sizeof(function_names[0]); i++)
1693 if (function_names[i].addr == a)
1694 return function_names[i].name;
1695 return "";
1696}
1697#else
1698#define func_name(x) ""
1699#endif
1700
1701static void emit_call(int a)
1702{
1703 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1704 u_int offset=genjmp(a);
1705 output_w32(0xeb000000|offset);
1706}
1707
1708static void emit_jmp(int a)
1709{
1710 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1711 u_int offset=genjmp(a);
1712 output_w32(0xea000000|offset);
1713}
1714
1715static void emit_jne(int a)
1716{
1717 assem_debug("bne %x\n",a);
1718 u_int offset=genjmp(a);
1719 output_w32(0x1a000000|offset);
1720}
1721
1722static void emit_jeq(int a)
1723{
1724 assem_debug("beq %x\n",a);
1725 u_int offset=genjmp(a);
1726 output_w32(0x0a000000|offset);
1727}
1728
1729static void emit_js(int a)
1730{
1731 assem_debug("bmi %x\n",a);
1732 u_int offset=genjmp(a);
1733 output_w32(0x4a000000|offset);
1734}
1735
1736static void emit_jns(int a)
1737{
1738 assem_debug("bpl %x\n",a);
1739 u_int offset=genjmp(a);
1740 output_w32(0x5a000000|offset);
1741}
1742
1743static void emit_jl(int a)
1744{
1745 assem_debug("blt %x\n",a);
1746 u_int offset=genjmp(a);
1747 output_w32(0xba000000|offset);
1748}
1749
1750static void emit_jge(int a)
1751{
1752 assem_debug("bge %x\n",a);
1753 u_int offset=genjmp(a);
1754 output_w32(0xaa000000|offset);
1755}
1756
1757static void emit_jno(int a)
1758{
1759 assem_debug("bvc %x\n",a);
1760 u_int offset=genjmp(a);
1761 output_w32(0x7a000000|offset);
1762}
1763
1764static void emit_jc(int a)
1765{
1766 assem_debug("bcs %x\n",a);
1767 u_int offset=genjmp(a);
1768 output_w32(0x2a000000|offset);
1769}
1770
1771static void emit_jcc(int a)
1772{
1773 assem_debug("bcc %x\n",a);
1774 u_int offset=genjmp(a);
1775 output_w32(0x3a000000|offset);
1776}
1777
1778static void emit_callreg(u_int r)
1779{
1780 assert(r<15);
1781 assem_debug("blx %s\n",regname[r]);
1782 output_w32(0xe12fff30|r);
1783}
1784
1785static void emit_jmpreg(u_int r)
1786{
1787 assem_debug("mov pc,%s\n",regname[r]);
1788 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1789}
1790
1791static void emit_readword_indexed(int offset, int rs, int rt)
1792{
1793 assert(offset>-4096&&offset<4096);
1794 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1795 if(offset>=0) {
1796 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1797 }else{
1798 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1799 }
1800}
1801
1802static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1803{
1804 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1805 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1806}
1807
1808static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1809{
1810 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1811 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1812}
1813
1814static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1815{
1816 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1817 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1818}
1819
1820static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1821{
1822 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1823 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1824}
1825
1826static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1827{
1828 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1829 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1830}
1831
1832static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1833{
1834 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1835 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1836}
1837
1838static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1839{
1840 if(map<0) emit_readword_indexed(addr, rs, rt);
1841 else {
1842 assert(addr==0);
1843 emit_readword_dualindexedx4(rs, map, rt);
1844 }
1845}
1846
1847static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1848{
1849 if(map<0) {
1850 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1851 emit_readword_indexed(addr+4, rs, rl);
1852 }else{
1853 assert(rh!=rs);
1854 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1855 emit_addimm(map,1,map);
1856 emit_readword_indexed_tlb(addr, rs, map, rl);
1857 }
1858}
1859
1860static void emit_movsbl_indexed(int offset, int rs, int rt)
1861{
1862 assert(offset>-256&&offset<256);
1863 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1864 if(offset>=0) {
1865 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1866 }else{
1867 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1868 }
1869}
1870
1871static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1872{
1873 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1874 else {
1875 if(addr==0) {
1876 emit_shlimm(map,2,map);
1877 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1878 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1879 }else{
1880 assert(addr>-256&&addr<256);
1881 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1882 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1883 emit_movsbl_indexed(addr, rt, rt);
1884 }
1885 }
1886}
1887
1888static void emit_movswl_indexed(int offset, int rs, int rt)
1889{
1890 assert(offset>-256&&offset<256);
1891 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1892 if(offset>=0) {
1893 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1894 }else{
1895 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1896 }
1897}
1898
1899static void emit_movzbl_indexed(int offset, int rs, int rt)
1900{
1901 assert(offset>-4096&&offset<4096);
1902 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1903 if(offset>=0) {
1904 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1905 }else{
1906 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1907 }
1908}
1909
1910static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1911{
1912 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1913 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1914}
1915
1916static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1917{
1918 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1919 else {
1920 if(addr==0) {
1921 emit_movzbl_dualindexedx4(rs, map, rt);
1922 }else{
1923 emit_addimm(rs,addr,rt);
1924 emit_movzbl_dualindexedx4(rt, map, rt);
1925 }
1926 }
1927}
1928
1929static void emit_movzwl_indexed(int offset, int rs, int rt)
1930{
1931 assert(offset>-256&&offset<256);
1932 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1933 if(offset>=0) {
1934 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1935 }else{
1936 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1937 }
1938}
1939
1940static void emit_ldrd(int offset, int rs, int rt)
1941{
1942 assert(offset>-256&&offset<256);
1943 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1944 if(offset>=0) {
1945 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1946 }else{
1947 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1948 }
1949}
1950
1951static void emit_readword(int addr, int rt)
1952{
1953 u_int offset = addr-(u_int)&dynarec_local;
1954 assert(offset<4096);
1955 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1956 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1957}
1958
1959static unused void emit_movsbl(int addr, int rt)
1960{
1961 u_int offset = addr-(u_int)&dynarec_local;
1962 assert(offset<256);
1963 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1964 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1965}
1966
1967static unused void emit_movswl(int addr, int rt)
1968{
1969 u_int offset = addr-(u_int)&dynarec_local;
1970 assert(offset<256);
1971 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1972 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1973}
1974
1975static unused void emit_movzbl(int addr, int rt)
1976{
1977 u_int offset = addr-(u_int)&dynarec_local;
1978 assert(offset<4096);
1979 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1980 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1981}
1982
1983static unused void emit_movzwl(int addr, int rt)
1984{
1985 u_int offset = addr-(u_int)&dynarec_local;
1986 assert(offset<256);
1987 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1988 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1989}
1990
1991static void emit_writeword_indexed(int rt, int offset, int rs)
1992{
1993 assert(offset>-4096&&offset<4096);
1994 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1995 if(offset>=0) {
1996 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1997 }else{
1998 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1999 }
2000}
2001
2002static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
2003{
2004 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2005 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2006}
2007
2008static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2009{
2010 if(map<0) emit_writeword_indexed(rt, addr, rs);
2011 else {
2012 assert(addr==0);
2013 emit_writeword_dualindexedx4(rt, rs, map);
2014 }
2015}
2016
2017static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2018{
2019 if(map<0) {
2020 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2021 emit_writeword_indexed(rl, addr+4, rs);
2022 }else{
2023 assert(rh>=0);
2024 if(temp!=rs) emit_addimm(map,1,temp);
2025 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2026 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2027 else {
2028 emit_addimm(rs,4,rs);
2029 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2030 }
2031 }
2032}
2033
2034static void emit_writehword_indexed(int rt, int offset, int rs)
2035{
2036 assert(offset>-256&&offset<256);
2037 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2038 if(offset>=0) {
2039 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2040 }else{
2041 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2042 }
2043}
2044
2045static void emit_writebyte_indexed(int rt, int offset, int rs)
2046{
2047 assert(offset>-4096&&offset<4096);
2048 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2049 if(offset>=0) {
2050 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2051 }else{
2052 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2053 }
2054}
2055
2056static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2057{
2058 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2059 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2060}
2061
2062static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2063{
2064 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2065 else {
2066 if(addr==0) {
2067 emit_writebyte_dualindexedx4(rt, rs, map);
2068 }else{
2069 emit_addimm(rs,addr,temp);
2070 emit_writebyte_dualindexedx4(rt, temp, map);
2071 }
2072 }
2073}
2074
2075static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2076{
2077 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2078 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2079}
2080
2081static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2082{
2083 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2084 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2085}
2086
2087static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2088{
2089 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2090 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2091}
2092
2093static void emit_writeword(int rt, int addr)
2094{
2095 u_int offset = addr-(u_int)&dynarec_local;
2096 assert(offset<4096);
2097 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2098 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2099}
2100
2101static unused void emit_writehword(int rt, int addr)
2102{
2103 u_int offset = addr-(u_int)&dynarec_local;
2104 assert(offset<256);
2105 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2106 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2107}
2108
2109static unused void emit_writebyte(int rt, int addr)
2110{
2111 u_int offset = addr-(u_int)&dynarec_local;
2112 assert(offset<4096);
2113 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
2114 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2115}
2116
2117static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2118{
2119 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2120 assert(rs1<16);
2121 assert(rs2<16);
2122 assert(hi<16);
2123 assert(lo<16);
2124 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2125}
2126
2127static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2128{
2129 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2130 assert(rs1<16);
2131 assert(rs2<16);
2132 assert(hi<16);
2133 assert(lo<16);
2134 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2135}
2136
2137static void emit_clz(int rs,int rt)
2138{
2139 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2140 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2141}
2142
2143static void emit_subcs(int rs1,int rs2,int rt)
2144{
2145 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2146 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2147}
2148
2149static void emit_shrcc_imm(int rs,u_int imm,int rt)
2150{
2151 assert(imm>0);
2152 assert(imm<32);
2153 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2154 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2155}
2156
2157static void emit_shrne_imm(int rs,u_int imm,int rt)
2158{
2159 assert(imm>0);
2160 assert(imm<32);
2161 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2162 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2163}
2164
2165static void emit_negmi(int rs, int rt)
2166{
2167 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2168 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2169}
2170
2171static void emit_negsmi(int rs, int rt)
2172{
2173 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2174 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2175}
2176
2177static void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2178{
2179 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2180 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2181}
2182
2183static void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2184{
2185 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2186 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2187}
2188
2189static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2190{
2191 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2192 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2193}
2194
2195static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2196{
2197 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2198 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2199}
2200
2201static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2202{
2203 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2204 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2205}
2206
2207static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2208{
2209 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2210 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2211}
2212
2213static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2214{
2215 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2216 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2217}
2218
2219static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2220{
2221 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2222 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2223}
2224
2225static void emit_teq(int rs, int rt)
2226{
2227 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2228 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2229}
2230
2231static void emit_rsbimm(int rs, int imm, int rt)
2232{
2233 u_int armval;
2234 genimm_checked(imm,&armval);
2235 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2236 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2237}
2238
2239// Load 2 immediates optimizing for small code size
2240static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2241{
2242 emit_movimm(imm1,rt1);
2243 u_int armval;
2244 if(genimm(imm2-imm1,&armval)) {
2245 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2246 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2247 }else if(genimm(imm1-imm2,&armval)) {
2248 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2249 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2250 }
2251 else emit_movimm(imm2,rt2);
2252}
2253
2254// Conditionally select one of two immediates, optimizing for small code size
2255// This will only be called if HAVE_CMOV_IMM is defined
2256static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2257{
2258 u_int armval;
2259 if(genimm(imm2-imm1,&armval)) {
2260 emit_movimm(imm1,rt);
2261 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2262 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2263 }else if(genimm(imm1-imm2,&armval)) {
2264 emit_movimm(imm1,rt);
2265 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2266 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2267 }
2268 else {
2269 #ifndef HAVE_ARMV7
2270 emit_movimm(imm1,rt);
2271 add_literal((int)out,imm2);
2272 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2273 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2274 #else
2275 emit_movw(imm1&0x0000FFFF,rt);
2276 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2277 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2278 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2279 }
2280 emit_movt(imm1&0xFFFF0000,rt);
2281 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2282 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2283 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2284 }
2285 #endif
2286 }
2287}
2288
2289// special case for checking invalid_code
2290static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2291{
2292 assert(imm<128&&imm>=0);
2293 assert(r>=0&&r<16);
2294 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2295 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2296 emit_cmpimm(HOST_TEMPREG,imm);
2297}
2298
2299static void emit_callne(int a)
2300{
2301 assem_debug("blne %x\n",a);
2302 u_int offset=genjmp(a);
2303 output_w32(0x1b000000|offset);
2304}
2305
2306// Used to preload hash table entries
2307static unused void emit_prefetchreg(int r)
2308{
2309 assem_debug("pld %s\n",regname[r]);
2310 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2311}
2312
2313// Special case for mini_ht
2314static void emit_ldreq_indexed(int rs, u_int offset, int rt)
2315{
2316 assert(offset<4096);
2317 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2318 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2319}
2320
2321static unused void emit_bicne_imm(int rs,int imm,int rt)
2322{
2323 u_int armval;
2324 genimm_checked(imm,&armval);
2325 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2326 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2327}
2328
2329static unused void emit_biccs_imm(int rs,int imm,int rt)
2330{
2331 u_int armval;
2332 genimm_checked(imm,&armval);
2333 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2334 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2335}
2336
2337static unused void emit_bicvc_imm(int rs,int imm,int rt)
2338{
2339 u_int armval;
2340 genimm_checked(imm,&armval);
2341 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2342 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2343}
2344
2345static unused void emit_bichi_imm(int rs,int imm,int rt)
2346{
2347 u_int armval;
2348 genimm_checked(imm,&armval);
2349 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2350 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2351}
2352
2353static unused void emit_orrvs_imm(int rs,int imm,int rt)
2354{
2355 u_int armval;
2356 genimm_checked(imm,&armval);
2357 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2358 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2359}
2360
2361static void emit_orrne_imm(int rs,int imm,int rt)
2362{
2363 u_int armval;
2364 genimm_checked(imm,&armval);
2365 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2366 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2367}
2368
2369static void emit_andne_imm(int rs,int imm,int rt)
2370{
2371 u_int armval;
2372 genimm_checked(imm,&armval);
2373 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2374 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2375}
2376
2377static unused void emit_addpl_imm(int rs,int imm,int rt)
2378{
2379 u_int armval;
2380 genimm_checked(imm,&armval);
2381 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2382 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2383}
2384
2385static void emit_jno_unlikely(int a)
2386{
2387 //emit_jno(a);
2388 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2389 output_w32(0x72800000|rd_rn_rm(15,15,0));
2390}
2391
2392static void save_regs_all(u_int reglist)
2393{
2394 int i;
2395 if(!reglist) return;
2396 assem_debug("stmia fp,{");
2397 for(i=0;i<16;i++)
2398 if(reglist&(1<<i))
2399 assem_debug("r%d,",i);
2400 assem_debug("}\n");
2401 output_w32(0xe88b0000|reglist);
2402}
2403
2404static void restore_regs_all(u_int reglist)
2405{
2406 int i;
2407 if(!reglist) return;
2408 assem_debug("ldmia fp,{");
2409 for(i=0;i<16;i++)
2410 if(reglist&(1<<i))
2411 assem_debug("r%d,",i);
2412 assem_debug("}\n");
2413 output_w32(0xe89b0000|reglist);
2414}
2415
2416// Save registers before function call
2417static void save_regs(u_int reglist)
2418{
2419 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
2420 save_regs_all(reglist);
2421}
2422
2423// Restore registers after function call
2424static void restore_regs(u_int reglist)
2425{
2426 reglist&=CALLER_SAVE_REGS;
2427 restore_regs_all(reglist);
2428}
2429
2430/* Stubs/epilogue */
2431
2432static void literal_pool(int n)
2433{
2434 if(!literalcount) return;
2435 if(n) {
2436 if((int)out-literals[0][0]<4096-n) return;
2437 }
2438 u_int *ptr;
2439 int i;
2440 for(i=0;i<literalcount;i++)
2441 {
2442 u_int l_addr=(u_int)out;
2443 int j;
2444 for(j=0;j<i;j++) {
2445 if(literals[j][1]==literals[i][1]) {
2446 //printf("dup %08x\n",literals[i][1]);
2447 l_addr=literals[j][0];
2448 break;
2449 }
2450 }
2451 ptr=(u_int *)literals[i][0];
2452 u_int offset=l_addr-(u_int)ptr-8;
2453 assert(offset<4096);
2454 assert(!(offset&3));
2455 *ptr|=offset;
2456 if(l_addr==(u_int)out) {
2457 literals[i][0]=l_addr; // remember for dupes
2458 output_w32(literals[i][1]);
2459 }
2460 }
2461 literalcount=0;
2462}
2463
2464static void literal_pool_jumpover(int n)
2465{
2466 if(!literalcount) return;
2467 if(n) {
2468 if((int)out-literals[0][0]<4096-n) return;
2469 }
2470 int jaddr=(int)out;
2471 emit_jmp(0);
2472 literal_pool(0);
2473 set_jump_target(jaddr,(int)out);
2474}
2475
2476static void emit_extjump2(u_int addr, int target, int linker)
2477{
2478 u_char *ptr=(u_char *)addr;
2479 assert((ptr[3]&0x0e)==0xa);
2480 (void)ptr;
2481
2482 emit_loadlp(target,0);
2483 emit_loadlp(addr,1);
2484 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
2485 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2486//DEBUG >
2487#ifdef DEBUG_CYCLE_COUNT
2488 emit_readword((int)&last_count,ECX);
2489 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2490 emit_readword((int)&next_interupt,ECX);
2491 emit_writeword(HOST_CCREG,(int)&Count);
2492 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2493 emit_writeword(ECX,(int)&last_count);
2494#endif
2495//DEBUG <
2496 emit_jmp(linker);
2497}
2498
2499static void emit_extjump(int addr, int target)
2500{
2501 emit_extjump2(addr, target, (int)dyna_linker);
2502}
2503
2504static void emit_extjump_ds(int addr, int target)
2505{
2506 emit_extjump2(addr, target, (int)dyna_linker_ds);
2507}
2508
2509// put rt_val into rt, potentially making use of rs with value rs_val
2510static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2511{
2512 u_int armval;
2513 int diff;
2514 if(genimm(rt_val,&armval)) {
2515 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2516 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2517 return;
2518 }
2519 if(genimm(~rt_val,&armval)) {
2520 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2521 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2522 return;
2523 }
2524 diff=rt_val-rs_val;
2525 if(genimm(diff,&armval)) {
2526 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2527 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2528 return;
2529 }else if(genimm(-diff,&armval)) {
2530 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2531 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2532 return;
2533 }
2534 emit_movimm(rt_val,rt);
2535}
2536
2537// return 1 if above function can do it's job cheaply
2538static int is_similar_value(u_int v1,u_int v2)
2539{
2540 u_int xs;
2541 int diff;
2542 if(v1==v2) return 1;
2543 diff=v2-v1;
2544 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
2545 ;
2546 if(xs<0x100) return 1;
2547 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2548 ;
2549 if(xs<0x100) return 1;
2550 return 0;
2551}
2552
2553// trashes r2
2554static void pass_args(int a0, int a1)
2555{
2556 if(a0==1&&a1==0) {
2557 // must swap
2558 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2559 }
2560 else if(a0!=0&&a1==0) {
2561 emit_mov(a1,1);
2562 if (a0>=0) emit_mov(a0,0);
2563 }
2564 else {
2565 if(a0>=0&&a0!=0) emit_mov(a0,0);
2566 if(a1>=0&&a1!=1) emit_mov(a1,1);
2567 }
2568}
2569
2570static void mov_loadtype_adj(int type,int rs,int rt)
2571{
2572 switch(type) {
2573 case LOADB_STUB: emit_signextend8(rs,rt); break;
2574 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2575 case LOADH_STUB: emit_signextend16(rs,rt); break;
2576 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2577 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2578 default: assert(0);
2579 }
2580}
2581
2582#include "pcsxmem.h"
2583#include "pcsxmem_inline.c"
2584
2585static void do_readstub(int n)
2586{
2587 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2588 literal_pool(256);
2589 set_jump_target(stubs[n][1],(int)out);
2590 int type=stubs[n][0];
2591 int i=stubs[n][3];
2592 int rs=stubs[n][4];
2593 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2594 u_int reglist=stubs[n][7];
2595 signed char *i_regmap=i_regs->regmap;
2596 int rt;
2597 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
2598 rt=get_reg(i_regmap,FTEMP);
2599 }else{
2600 rt=get_reg(i_regmap,rt1[i]);
2601 }
2602 assert(rs>=0);
2603 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2604 reglist|=(1<<rs);
2605 for(r=0;r<=12;r++) {
2606 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2607 temp=r; break;
2608 }
2609 }
2610 if(rt>=0&&rt1[i]!=0)
2611 reglist&=~(1<<rt);
2612 if(temp==-1) {
2613 save_regs(reglist);
2614 regs_saved=1;
2615 temp=(rs==0)?2:0;
2616 }
2617 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2618 temp2=1;
2619 emit_readword((int)&mem_rtab,temp);
2620 emit_shrimm(rs,12,temp2);
2621 emit_readword_dualindexedx4(temp,temp2,temp2);
2622 emit_lsls_imm(temp2,1,temp2);
2623 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2624 switch(type) {
2625 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2626 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2627 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2628 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2629 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2630 }
2631 }
2632 if(regs_saved) {
2633 restore_jump=(int)out;
2634 emit_jcc(0); // jump to reg restore
2635 }
2636 else
2637 emit_jcc(stubs[n][2]); // return address
2638
2639 if(!regs_saved)
2640 save_regs(reglist);
2641 int handler=0;
2642 if(type==LOADB_STUB||type==LOADBU_STUB)
2643 handler=(int)jump_handler_read8;
2644 if(type==LOADH_STUB||type==LOADHU_STUB)
2645 handler=(int)jump_handler_read16;
2646 if(type==LOADW_STUB)
2647 handler=(int)jump_handler_read32;
2648 assert(handler!=0);
2649 pass_args(rs,temp2);
2650 int cc=get_reg(i_regmap,CCREG);
2651 if(cc<0)
2652 emit_loadreg(CCREG,2);
2653 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
2654 emit_call(handler);
2655 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2656 mov_loadtype_adj(type,0,rt);
2657 }
2658 if(restore_jump)
2659 set_jump_target(restore_jump,(int)out);
2660 restore_regs(reglist);
2661 emit_jmp(stubs[n][2]); // return address
2662}
2663
2664// return memhandler, or get directly accessable address and return 0
2665static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2666{
2667 u_int l1,l2=0;
2668 l1=((u_int *)table)[addr>>12];
2669 if((l1&(1<<31))==0) {
2670 u_int v=l1<<1;
2671 *addr_host=v+addr;
2672 return 0;
2673 }
2674 else {
2675 l1<<=1;
2676 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2677 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
2678 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
2679 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2680 else
2681 l2=((u_int *)l1)[(addr&0xfff)/4];
2682 if((l2&(1<<31))==0) {
2683 u_int v=l2<<1;
2684 *addr_host=v+(addr&0xfff);
2685 return 0;
2686 }
2687 return l2<<1;
2688 }
2689}
2690
2691static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2692{
2693 int rs=get_reg(regmap,target);
2694 int rt=get_reg(regmap,target);
2695 if(rs<0) rs=get_reg(regmap,-1);
2696 assert(rs>=0);
2697 u_int handler,host_addr=0,is_dynamic,far_call=0;
2698 int cc=get_reg(regmap,CCREG);
2699 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2700 return;
2701 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2702 if (handler==0) {
2703 if(rt<0||rt1[i]==0)
2704 return;
2705 if(addr!=host_addr)
2706 emit_movimm_from(addr,rs,host_addr,rs);
2707 switch(type) {
2708 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2709 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2710 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2711 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2712 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2713 default: assert(0);
2714 }
2715 return;
2716 }
2717 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2718 if(is_dynamic) {
2719 if(type==LOADB_STUB||type==LOADBU_STUB)
2720 handler=(int)jump_handler_read8;
2721 if(type==LOADH_STUB||type==LOADHU_STUB)
2722 handler=(int)jump_handler_read16;
2723 if(type==LOADW_STUB)
2724 handler=(int)jump_handler_read32;
2725 }
2726
2727 // call a memhandler
2728 if(rt>=0&&rt1[i]!=0)
2729 reglist&=~(1<<rt);
2730 save_regs(reglist);
2731 if(target==0)
2732 emit_movimm(addr,0);
2733 else if(rs!=0)
2734 emit_mov(rs,0);
2735 int offset=(int)handler-(int)out-8;
2736 if(offset<-33554432||offset>=33554432) {
2737 // unreachable memhandler, a plugin func perhaps
2738 emit_movimm(handler,12);
2739 far_call=1;
2740 }
2741 if(cc<0)
2742 emit_loadreg(CCREG,2);
2743 if(is_dynamic) {
2744 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2745 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2746 }
2747 else {
2748 emit_readword((int)&last_count,3);
2749 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2750 emit_add(2,3,2);
2751 emit_writeword(2,(int)&Count);
2752 }
2753
2754 if(far_call)
2755 emit_callreg(12);
2756 else
2757 emit_call(handler);
2758
2759 if(rt>=0&&rt1[i]!=0) {
2760 switch(type) {
2761 case LOADB_STUB: emit_signextend8(0,rt); break;
2762 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2763 case LOADH_STUB: emit_signextend16(0,rt); break;
2764 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2765 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2766 default: assert(0);
2767 }
2768 }
2769 restore_regs(reglist);
2770}
2771
2772static void do_writestub(int n)
2773{
2774 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2775 literal_pool(256);
2776 set_jump_target(stubs[n][1],(int)out);
2777 int type=stubs[n][0];
2778 int i=stubs[n][3];
2779 int rs=stubs[n][4];
2780 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2781 u_int reglist=stubs[n][7];
2782 signed char *i_regmap=i_regs->regmap;
2783 int rt,r;
2784 if(itype[i]==C1LS||itype[i]==C2LS) {
2785 rt=get_reg(i_regmap,r=FTEMP);
2786 }else{
2787 rt=get_reg(i_regmap,r=rs2[i]);
2788 }
2789 assert(rs>=0);
2790 assert(rt>=0);
2791 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
2792 int reglist2=reglist|(1<<rs)|(1<<rt);
2793 for(rtmp=0;rtmp<=12;rtmp++) {
2794 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2795 temp=rtmp; break;
2796 }
2797 }
2798 if(temp==-1) {
2799 save_regs(reglist);
2800 regs_saved=1;
2801 for(rtmp=0;rtmp<=3;rtmp++)
2802 if(rtmp!=rs&&rtmp!=rt)
2803 {temp=rtmp;break;}
2804 }
2805 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2806 temp2=3;
2807 emit_readword((int)&mem_wtab,temp);
2808 emit_shrimm(rs,12,temp2);
2809 emit_readword_dualindexedx4(temp,temp2,temp2);
2810 emit_lsls_imm(temp2,1,temp2);
2811 switch(type) {
2812 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2813 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2814 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2815 default: assert(0);
2816 }
2817 if(regs_saved) {
2818 restore_jump=(int)out;
2819 emit_jcc(0); // jump to reg restore
2820 }
2821 else
2822 emit_jcc(stubs[n][2]); // return address (invcode check)
2823
2824 if(!regs_saved)
2825 save_regs(reglist);
2826 int handler=0;
2827 switch(type) {
2828 case STOREB_STUB: handler=(int)jump_handler_write8; break;
2829 case STOREH_STUB: handler=(int)jump_handler_write16; break;
2830 case STOREW_STUB: handler=(int)jump_handler_write32; break;
2831 }
2832 assert(handler!=0);
2833 pass_args(rs,rt);
2834 if(temp2!=3)
2835 emit_mov(temp2,3);
2836 int cc=get_reg(i_regmap,CCREG);
2837 if(cc<0)
2838 emit_loadreg(CCREG,2);
2839 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
2840 // returns new cycle_count
2841 emit_call(handler);
2842 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
2843 if(cc<0)
2844 emit_storereg(CCREG,2);
2845 if(restore_jump)
2846 set_jump_target(restore_jump,(int)out);
2847 restore_regs(reglist);
2848 ra=stubs[n][2];
2849 emit_jmp(ra);
2850}
2851
2852static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2853{
2854 int rs=get_reg(regmap,-1);
2855 int rt=get_reg(regmap,target);
2856 assert(rs>=0);
2857 assert(rt>=0);
2858 u_int handler,host_addr=0;
2859 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
2860 if (handler==0) {
2861 if(addr!=host_addr)
2862 emit_movimm_from(addr,rs,host_addr,rs);
2863 switch(type) {
2864 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2865 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2866 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2867 default: assert(0);
2868 }
2869 return;
2870 }
2871
2872 // call a memhandler
2873 save_regs(reglist);
2874 pass_args(rs,rt);
2875 int cc=get_reg(regmap,CCREG);
2876 if(cc<0)
2877 emit_loadreg(CCREG,2);
2878 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2879 emit_movimm(handler,3);
2880 // returns new cycle_count
2881 emit_call((int)jump_handler_write_h);
2882 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
2883 if(cc<0)
2884 emit_storereg(CCREG,2);
2885 restore_regs(reglist);
2886}
2887
2888static void do_unalignedwritestub(int n)
2889{
2890 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2891 literal_pool(256);
2892 set_jump_target(stubs[n][1],(int)out);
2893
2894 int i=stubs[n][3];
2895 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2896 int addr=stubs[n][5];
2897 u_int reglist=stubs[n][7];
2898 signed char *i_regmap=i_regs->regmap;
2899 int temp2=get_reg(i_regmap,FTEMP);
2900 int rt;
2901 rt=get_reg(i_regmap,rs2[i]);
2902 assert(rt>=0);
2903 assert(addr>=0);
2904 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2905 reglist|=(1<<addr);
2906 reglist&=~(1<<temp2);
2907
2908#if 1
2909 // don't bother with it and call write handler
2910 save_regs(reglist);
2911 pass_args(addr,rt);
2912 int cc=get_reg(i_regmap,CCREG);
2913 if(cc<0)
2914 emit_loadreg(CCREG,2);
2915 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
2916 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2917 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
2918 if(cc<0)
2919 emit_storereg(CCREG,2);
2920 restore_regs(reglist);
2921 emit_jmp(stubs[n][2]); // return address
2922#else
2923 emit_andimm(addr,0xfffffffc,temp2);
2924 emit_writeword(temp2,(int)&address);
2925
2926 save_regs(reglist);
2927 emit_shrimm(addr,16,1);
2928 int cc=get_reg(i_regmap,CCREG);
2929 if(cc<0) {
2930 emit_loadreg(CCREG,2);
2931 }
2932 emit_movimm((u_int)readmem,0);
2933 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2934 emit_call((int)&indirect_jump_indexed);
2935 restore_regs(reglist);
2936
2937 emit_readword((int)&readmem_dword,temp2);
2938 int temp=addr; //hmh
2939 emit_shlimm(addr,3,temp);
2940 emit_andimm(temp,24,temp);
2941#ifdef BIG_ENDIAN_MIPS
2942 if (opcode[i]==0x2e) // SWR
2943#else
2944 if (opcode[i]==0x2a) // SWL
2945#endif
2946 emit_xorimm(temp,24,temp);
2947 emit_movimm(-1,HOST_TEMPREG);
2948 if (opcode[i]==0x2a) { // SWL
2949 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2950 emit_orrshr(rt,temp,temp2);
2951 }else{
2952 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2953 emit_orrshl(rt,temp,temp2);
2954 }
2955 emit_readword((int)&address,addr);
2956 emit_writeword(temp2,(int)&word);
2957 //save_regs(reglist); // don't need to, no state changes
2958 emit_shrimm(addr,16,1);
2959 emit_movimm((u_int)writemem,0);
2960 //emit_call((int)&indirect_jump_indexed);
2961 emit_mov(15,14);
2962 emit_readword_dualindexedx4(0,1,15);
2963 emit_readword((int)&Count,HOST_TEMPREG);
2964 emit_readword((int)&next_interupt,2);
2965 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2966 emit_writeword(2,(int)&last_count);
2967 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2968 if(cc<0) {
2969 emit_storereg(CCREG,HOST_TEMPREG);
2970 }
2971 restore_regs(reglist);
2972 emit_jmp(stubs[n][2]); // return address
2973#endif
2974}
2975
2976static void do_invstub(int n)
2977{
2978 literal_pool(20);
2979 u_int reglist=stubs[n][3];
2980 set_jump_target(stubs[n][1],(int)out);
2981 save_regs(reglist);
2982 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2983 emit_call((int)&invalidate_addr);
2984 restore_regs(reglist);
2985 emit_jmp(stubs[n][2]); // return address
2986}
2987
2988int do_dirty_stub(int i)
2989{
2990 assem_debug("do_dirty_stub %x\n",start+i*4);
2991 u_int addr=(u_int)source;
2992 // Careful about the code output here, verify_dirty needs to parse it.
2993 #ifndef HAVE_ARMV7
2994 emit_loadlp(addr,1);
2995 emit_loadlp((int)copy,2);
2996 emit_loadlp(slen*4,3);
2997 #else
2998 emit_movw(addr&0x0000FFFF,1);
2999 emit_movw(((u_int)copy)&0x0000FFFF,2);
3000 emit_movt(addr&0xFFFF0000,1);
3001 emit_movt(((u_int)copy)&0xFFFF0000,2);
3002 emit_movw(slen*4,3);
3003 #endif
3004 emit_movimm(start+i*4,0);
3005 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3006 int entry=(int)out;
3007 load_regs_entry(i);
3008 if(entry==(int)out) entry=instr_addr[i];
3009 emit_jmp(instr_addr[i]);
3010 return entry;
3011}
3012
3013static void do_dirty_stub_ds()
3014{
3015 // Careful about the code output here, verify_dirty needs to parse it.
3016 #ifndef HAVE_ARMV7
3017 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3018 emit_loadlp((int)copy,2);
3019 emit_loadlp(slen*4,3);
3020 #else
3021 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3022 emit_movw(((u_int)copy)&0x0000FFFF,2);
3023 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3024 emit_movt(((u_int)copy)&0xFFFF0000,2);
3025 emit_movw(slen*4,3);
3026 #endif
3027 emit_movimm(start+1,0);
3028 emit_call((int)&verify_code_ds);
3029}
3030
3031static void do_cop1stub(int n)
3032{
3033 literal_pool(256);
3034 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3035 set_jump_target(stubs[n][1],(int)out);
3036 int i=stubs[n][3];
3037// int rs=stubs[n][4];
3038 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3039 int ds=stubs[n][6];
3040 if(!ds) {
3041 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3042 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3043 }
3044 //else {printf("fp exception in delay slot\n");}
3045 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3046 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3047 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3048 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3049 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3050}
3051
3052/* Special assem */
3053
3054static void shift_assemble_arm(int i,struct regstat *i_regs)
3055{
3056 if(rt1[i]) {
3057 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3058 {
3059 signed char s,t,shift;
3060 t=get_reg(i_regs->regmap,rt1[i]);
3061 s=get_reg(i_regs->regmap,rs1[i]);
3062 shift=get_reg(i_regs->regmap,rs2[i]);
3063 if(t>=0){
3064 if(rs1[i]==0)
3065 {
3066 emit_zeroreg(t);
3067 }
3068 else if(rs2[i]==0)
3069 {
3070 assert(s>=0);
3071 if(s!=t) emit_mov(s,t);
3072 }
3073 else
3074 {
3075 emit_andimm(shift,31,HOST_TEMPREG);
3076 if(opcode2[i]==4) // SLLV
3077 {
3078 emit_shl(s,HOST_TEMPREG,t);
3079 }
3080 if(opcode2[i]==6) // SRLV
3081 {
3082 emit_shr(s,HOST_TEMPREG,t);
3083 }
3084 if(opcode2[i]==7) // SRAV
3085 {
3086 emit_sar(s,HOST_TEMPREG,t);
3087 }
3088 }
3089 }
3090 } else { // DSLLV/DSRLV/DSRAV
3091 signed char sh,sl,th,tl,shift;
3092 th=get_reg(i_regs->regmap,rt1[i]|64);
3093 tl=get_reg(i_regs->regmap,rt1[i]);
3094 sh=get_reg(i_regs->regmap,rs1[i]|64);
3095 sl=get_reg(i_regs->regmap,rs1[i]);
3096 shift=get_reg(i_regs->regmap,rs2[i]);
3097 if(tl>=0){
3098 if(rs1[i]==0)
3099 {
3100 emit_zeroreg(tl);
3101 if(th>=0) emit_zeroreg(th);
3102 }
3103 else if(rs2[i]==0)
3104 {
3105 assert(sl>=0);
3106 if(sl!=tl) emit_mov(sl,tl);
3107 if(th>=0&&sh!=th) emit_mov(sh,th);
3108 }
3109 else
3110 {
3111 // FIXME: What if shift==tl ?
3112 assert(shift!=tl);
3113 int temp=get_reg(i_regs->regmap,-1);
3114 int real_th=th;
3115 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3116 assert(sl>=0);
3117 assert(sh>=0);
3118 emit_andimm(shift,31,HOST_TEMPREG);
3119 if(opcode2[i]==0x14) // DSLLV
3120 {
3121 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3122 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3123 emit_orrshr(sl,HOST_TEMPREG,th);
3124 emit_andimm(shift,31,HOST_TEMPREG);
3125 emit_testimm(shift,32);
3126 emit_shl(sl,HOST_TEMPREG,tl);
3127 if(th>=0) emit_cmovne_reg(tl,th);
3128 emit_cmovne_imm(0,tl);
3129 }
3130 if(opcode2[i]==0x16) // DSRLV
3131 {
3132 assert(th>=0);
3133 emit_shr(sl,HOST_TEMPREG,tl);
3134 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3135 emit_orrshl(sh,HOST_TEMPREG,tl);
3136 emit_andimm(shift,31,HOST_TEMPREG);
3137 emit_testimm(shift,32);
3138 emit_shr(sh,HOST_TEMPREG,th);
3139 emit_cmovne_reg(th,tl);
3140 if(real_th>=0) emit_cmovne_imm(0,th);
3141 }
3142 if(opcode2[i]==0x17) // DSRAV
3143 {
3144 assert(th>=0);
3145 emit_shr(sl,HOST_TEMPREG,tl);
3146 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3147 if(real_th>=0) {
3148 assert(temp>=0);
3149 emit_sarimm(th,31,temp);
3150 }
3151 emit_orrshl(sh,HOST_TEMPREG,tl);
3152 emit_andimm(shift,31,HOST_TEMPREG);
3153 emit_testimm(shift,32);
3154 emit_sar(sh,HOST_TEMPREG,th);
3155 emit_cmovne_reg(th,tl);
3156 if(real_th>=0) emit_cmovne_reg(temp,th);
3157 }
3158 }
3159 }
3160 }
3161 }
3162}
3163
3164static void speculate_mov(int rs,int rt)
3165{
3166 if(rt!=0) {
3167 smrv_strong_next|=1<<rt;
3168 smrv[rt]=smrv[rs];
3169 }
3170}
3171
3172static void speculate_mov_weak(int rs,int rt)
3173{
3174 if(rt!=0) {
3175 smrv_weak_next|=1<<rt;
3176 smrv[rt]=smrv[rs];
3177 }
3178}
3179
3180static void speculate_register_values(int i)
3181{
3182 if(i==0) {
3183 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3184 // gp,sp are likely to stay the same throughout the block
3185 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3186 smrv_weak_next=~smrv_strong_next;
3187 //printf(" llr %08x\n", smrv[4]);
3188 }
3189 smrv_strong=smrv_strong_next;
3190 smrv_weak=smrv_weak_next;
3191 switch(itype[i]) {
3192 case ALU:
3193 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3194 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3195 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3196 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3197 else {
3198 smrv_strong_next&=~(1<<rt1[i]);
3199 smrv_weak_next&=~(1<<rt1[i]);
3200 }
3201 break;
3202 case SHIFTIMM:
3203 smrv_strong_next&=~(1<<rt1[i]);
3204 smrv_weak_next&=~(1<<rt1[i]);
3205 // fallthrough
3206 case IMM16:
3207 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3208 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3209 if(hr>=0) {
3210 if(get_final_value(hr,i,&value))
3211 smrv[rt1[i]]=value;
3212 else smrv[rt1[i]]=constmap[i][hr];
3213 smrv_strong_next|=1<<rt1[i];
3214 }
3215 }
3216 else {
3217 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3218 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3219 }
3220 break;
3221 case LOAD:
3222 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3223 // special case for BIOS
3224 smrv[rt1[i]]=0xa0000000;
3225 smrv_strong_next|=1<<rt1[i];
3226 break;
3227 }
3228 // fallthrough
3229 case SHIFT:
3230 case LOADLR:
3231 case MOV:
3232 smrv_strong_next&=~(1<<rt1[i]);
3233 smrv_weak_next&=~(1<<rt1[i]);
3234 break;
3235 case COP0:
3236 case COP2:
3237 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3238 smrv_strong_next&=~(1<<rt1[i]);
3239 smrv_weak_next&=~(1<<rt1[i]);
3240 }
3241 break;
3242 case C2LS:
3243 if (opcode[i]==0x32) { // LWC2
3244 smrv_strong_next&=~(1<<rt1[i]);
3245 smrv_weak_next&=~(1<<rt1[i]);
3246 }
3247 break;
3248 }
3249#if 0
3250 int r=4;
3251 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3252 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3253#endif
3254}
3255
3256enum {
3257 MTYPE_8000 = 0,
3258 MTYPE_8020,
3259 MTYPE_0000,
3260 MTYPE_A000,
3261 MTYPE_1F80,
3262};
3263
3264static int get_ptr_mem_type(u_int a)
3265{
3266 if(a < 0x00200000) {
3267 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3268 // return wrong, must use memhandler for BIOS self-test to pass
3269 // 007 does similar stuff from a00 mirror, weird stuff
3270 return MTYPE_8000;
3271 return MTYPE_0000;
3272 }
3273 if(0x1f800000 <= a && a < 0x1f801000)
3274 return MTYPE_1F80;
3275 if(0x80200000 <= a && a < 0x80800000)
3276 return MTYPE_8020;
3277 if(0xa0000000 <= a && a < 0xa0200000)
3278 return MTYPE_A000;
3279 return MTYPE_8000;
3280}
3281
3282static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3283{
3284 int jaddr=0,type=0;
3285 int mr=rs1[i];
3286 if(((smrv_strong|smrv_weak)>>mr)&1) {
3287 type=get_ptr_mem_type(smrv[mr]);
3288 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3289 }
3290 else {
3291 // use the mirror we are running on
3292 type=get_ptr_mem_type(start);
3293 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3294 }
3295
3296 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3297 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3298 addr=*addr_reg_override=HOST_TEMPREG;
3299 type=0;
3300 }
3301 else if(type==MTYPE_0000) { // RAM 0 mirror
3302 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3303 addr=*addr_reg_override=HOST_TEMPREG;
3304 type=0;
3305 }
3306 else if(type==MTYPE_A000) { // RAM A mirror
3307 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3308 addr=*addr_reg_override=HOST_TEMPREG;
3309 type=0;
3310 }
3311 else if(type==MTYPE_1F80) { // scratchpad
3312 if (psxH == (void *)0x1f800000) {
3313 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3314 emit_cmpimm(HOST_TEMPREG,0x1000);
3315 jaddr=(int)out;
3316 emit_jc(0);
3317 }
3318 else {
3319 // do usual RAM check, jump will go to the right handler
3320 type=0;
3321 }
3322 }
3323
3324 if(type==0)
3325 {
3326 emit_cmpimm(addr,RAM_SIZE);
3327 jaddr=(int)out;
3328 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3329 // Hint to branch predictor that the branch is unlikely to be taken
3330 if(rs1[i]>=28)
3331 emit_jno_unlikely(0);
3332 else
3333 #endif
3334 emit_jno(0);
3335 if(ram_offset!=0) {
3336 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3337 addr=*addr_reg_override=HOST_TEMPREG;
3338 }
3339 }
3340
3341 return jaddr;
3342}
3343
3344#define shift_assemble shift_assemble_arm
3345
3346static void loadlr_assemble_arm(int i,struct regstat *i_regs)
3347{
3348 int s,th,tl,temp,temp2,addr,map=-1;
3349 int offset;
3350 int jaddr=0;
3351 int memtarget=0,c=0;
3352 int fastload_reg_override=0;
3353 u_int hr,reglist=0;
3354 th=get_reg(i_regs->regmap,rt1[i]|64);
3355 tl=get_reg(i_regs->regmap,rt1[i]);
3356 s=get_reg(i_regs->regmap,rs1[i]);
3357 temp=get_reg(i_regs->regmap,-1);
3358 temp2=get_reg(i_regs->regmap,FTEMP);
3359 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3360 assert(addr<0);
3361 offset=imm[i];
3362 for(hr=0;hr<HOST_REGS;hr++) {
3363 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3364 }
3365 reglist|=1<<temp;
3366 if(offset||s<0||c) addr=temp2;
3367 else addr=s;
3368 if(s>=0) {
3369 c=(i_regs->wasconst>>s)&1;
3370 if(c) {
3371 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3372 }
3373 }
3374 if(!c) {
3375 #ifdef RAM_OFFSET
3376 map=get_reg(i_regs->regmap,ROREG);
3377 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3378 #endif
3379 emit_shlimm(addr,3,temp);
3380 if (opcode[i]==0x22||opcode[i]==0x26) {
3381 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3382 }else{
3383 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3384 }
3385 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3386 }
3387 else {
3388 if(ram_offset&&memtarget) {
3389 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
3390 fastload_reg_override=HOST_TEMPREG;
3391 }
3392 if (opcode[i]==0x22||opcode[i]==0x26) {
3393 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3394 }else{
3395 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3396 }
3397 }
3398 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3399 if(!c||memtarget) {
3400 int a=temp2;
3401 if(fastload_reg_override) a=fastload_reg_override;
3402 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3403 emit_readword_indexed_tlb(0,a,map,temp2);
3404 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3405 }
3406 else
3407 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3408 if(rt1[i]) {
3409 assert(tl>=0);
3410 emit_andimm(temp,24,temp);
3411#ifdef BIG_ENDIAN_MIPS
3412 if (opcode[i]==0x26) // LWR
3413#else
3414 if (opcode[i]==0x22) // LWL
3415#endif
3416 emit_xorimm(temp,24,temp);
3417 emit_movimm(-1,HOST_TEMPREG);
3418 if (opcode[i]==0x26) {
3419 emit_shr(temp2,temp,temp2);
3420 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3421 }else{
3422 emit_shl(temp2,temp,temp2);
3423 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3424 }
3425 emit_or(temp2,tl,tl);
3426 }
3427 //emit_storereg(rt1[i],tl); // DEBUG
3428 }
3429 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3430 // FIXME: little endian, fastload_reg_override
3431 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3432 if(!c||memtarget) {
3433 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3434 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3435 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3436 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3437 }
3438 else
3439 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3440 if(rt1[i]) {
3441 assert(th>=0);
3442 assert(tl>=0);
3443 emit_testimm(temp,32);
3444 emit_andimm(temp,24,temp);
3445 if (opcode[i]==0x1A) { // LDL
3446 emit_rsbimm(temp,32,HOST_TEMPREG);
3447 emit_shl(temp2h,temp,temp2h);
3448 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3449 emit_movimm(-1,HOST_TEMPREG);
3450 emit_shl(temp2,temp,temp2);
3451 emit_cmove_reg(temp2h,th);
3452 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3453 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3454 emit_orreq(temp2,tl,tl);
3455 emit_orrne(temp2,th,th);
3456 }
3457 if (opcode[i]==0x1B) { // LDR
3458 emit_xorimm(temp,24,temp);
3459 emit_rsbimm(temp,32,HOST_TEMPREG);
3460 emit_shr(temp2,temp,temp2);
3461 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3462 emit_movimm(-1,HOST_TEMPREG);
3463 emit_shr(temp2h,temp,temp2h);
3464 emit_cmovne_reg(temp2,tl);
3465 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3466 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3467 emit_orrne(temp2h,th,th);
3468 emit_orreq(temp2h,tl,tl);
3469 }
3470 }
3471 }
3472}
3473#define loadlr_assemble loadlr_assemble_arm
3474
3475static void cop0_assemble(int i,struct regstat *i_regs)
3476{
3477 if(opcode2[i]==0) // MFC0
3478 {
3479 signed char t=get_reg(i_regs->regmap,rt1[i]);
3480 char copr=(source[i]>>11)&0x1f;
3481 //assert(t>=0); // Why does this happen? OOT is weird
3482 if(t>=0&&rt1[i]!=0) {
3483 emit_readword((int)&reg_cop0+copr*4,t);
3484 }
3485 }
3486 else if(opcode2[i]==4) // MTC0
3487 {
3488 signed char s=get_reg(i_regs->regmap,rs1[i]);
3489 char copr=(source[i]>>11)&0x1f;
3490 assert(s>=0);
3491 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3492 if(copr==9||copr==11||copr==12||copr==13) {
3493 emit_readword((int)&last_count,HOST_TEMPREG);
3494 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3495 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3496 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
3497 emit_writeword(HOST_CCREG,(int)&Count);
3498 }
3499 // What a mess. The status register (12) can enable interrupts,
3500 // so needs a special case to handle a pending interrupt.
3501 // The interrupt must be taken immediately, because a subsequent
3502 // instruction might disable interrupts again.
3503 if(copr==12||copr==13) {
3504 if (is_delayslot) {
3505 // burn cycles to cause cc_interrupt, which will
3506 // reschedule next_interupt. Relies on CCREG from above.
3507 assem_debug("MTC0 DS %d\n", copr);
3508 emit_writeword(HOST_CCREG,(int)&last_count);
3509 emit_movimm(0,HOST_CCREG);
3510 emit_storereg(CCREG,HOST_CCREG);
3511 emit_loadreg(rs1[i],1);
3512 emit_movimm(copr,0);
3513 emit_call((int)pcsx_mtc0_ds);
3514 emit_loadreg(rs1[i],s);
3515 return;
3516 }
3517 emit_movimm(start+i*4+4,HOST_TEMPREG);
3518 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
3519 emit_movimm(0,HOST_TEMPREG);
3520 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
3521 }
3522 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3523 //else
3524 if(s==HOST_CCREG)
3525 emit_loadreg(rs1[i],1);
3526 else if(s!=1)
3527 emit_mov(s,1);
3528 emit_movimm(copr,0);
3529 emit_call((int)pcsx_mtc0);
3530 if(copr==9||copr==11||copr==12||copr==13) {
3531 emit_readword((int)&Count,HOST_CCREG);
3532 emit_readword((int)&next_interupt,HOST_TEMPREG);
3533 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
3534 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3535 emit_writeword(HOST_TEMPREG,(int)&last_count);
3536 emit_storereg(CCREG,HOST_CCREG);
3537 }
3538 if(copr==12||copr==13) {
3539 assert(!is_delayslot);
3540 emit_readword((int)&pending_exception,14);
3541 emit_test(14,14);
3542 emit_jne((int)&do_interrupt);
3543 }
3544 emit_loadreg(rs1[i],s);
3545 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3546 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3547 cop1_usable=0;
3548 }
3549 else
3550 {
3551 assert(opcode2[i]==0x10);
3552 if((source[i]&0x3f)==0x10) // RFE
3553 {
3554 emit_readword((int)&Status,0);
3555 emit_andimm(0,0x3c,1);
3556 emit_andimm(0,~0xf,0);
3557 emit_orrshr_imm(1,2,0);
3558 emit_writeword(0,(int)&Status);
3559 }
3560 }
3561}
3562
3563static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3564{
3565 switch (copr) {
3566 case 1:
3567 case 3:
3568 case 5:
3569 case 8:
3570 case 9:
3571 case 10:
3572 case 11:
3573 emit_readword((int)&reg_cop2d[copr],tl);
3574 emit_signextend16(tl,tl);
3575 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3576 break;
3577 case 7:
3578 case 16:
3579 case 17:
3580 case 18:
3581 case 19:
3582 emit_readword((int)&reg_cop2d[copr],tl);
3583 emit_andimm(tl,0xffff,tl);
3584 emit_writeword(tl,(int)&reg_cop2d[copr]);
3585 break;
3586 case 15:
3587 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3588 emit_writeword(tl,(int)&reg_cop2d[copr]);
3589 break;
3590 case 28:
3591 case 29:
3592 emit_readword((int)&reg_cop2d[9],temp);
3593 emit_testimm(temp,0x8000); // do we need this?
3594 emit_andimm(temp,0xf80,temp);
3595 emit_andne_imm(temp,0,temp);
3596 emit_shrimm(temp,7,tl);
3597 emit_readword((int)&reg_cop2d[10],temp);
3598 emit_testimm(temp,0x8000);
3599 emit_andimm(temp,0xf80,temp);
3600 emit_andne_imm(temp,0,temp);
3601 emit_orrshr_imm(temp,2,tl);
3602 emit_readword((int)&reg_cop2d[11],temp);
3603 emit_testimm(temp,0x8000);
3604 emit_andimm(temp,0xf80,temp);
3605 emit_andne_imm(temp,0,temp);
3606 emit_orrshl_imm(temp,3,tl);
3607 emit_writeword(tl,(int)&reg_cop2d[copr]);
3608 break;
3609 default:
3610 emit_readword((int)&reg_cop2d[copr],tl);
3611 break;
3612 }
3613}
3614
3615static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3616{
3617 switch (copr) {
3618 case 15:
3619 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3620 emit_writeword(sl,(int)&reg_cop2d[copr]);
3621 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3622 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3623 emit_writeword(sl,(int)&reg_cop2d[14]);
3624 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3625 break;
3626 case 28:
3627 emit_andimm(sl,0x001f,temp);
3628 emit_shlimm(temp,7,temp);
3629 emit_writeword(temp,(int)&reg_cop2d[9]);
3630 emit_andimm(sl,0x03e0,temp);
3631 emit_shlimm(temp,2,temp);
3632 emit_writeword(temp,(int)&reg_cop2d[10]);
3633 emit_andimm(sl,0x7c00,temp);
3634 emit_shrimm(temp,3,temp);
3635 emit_writeword(temp,(int)&reg_cop2d[11]);
3636 emit_writeword(sl,(int)&reg_cop2d[28]);
3637 break;
3638 case 30:
3639 emit_movs(sl,temp);
3640 emit_mvnmi(temp,temp);
3641#ifdef HAVE_ARMV5
3642 emit_clz(temp,temp);
3643#else
3644 emit_movs(temp,HOST_TEMPREG);
3645 emit_movimm(0,temp);
3646 emit_jeq((int)out+4*4);
3647 emit_addpl_imm(temp,1,temp);
3648 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3649 emit_jns((int)out-2*4);
3650#endif
3651 emit_writeword(sl,(int)&reg_cop2d[30]);
3652 emit_writeword(temp,(int)&reg_cop2d[31]);
3653 break;
3654 case 31:
3655 break;
3656 default:
3657 emit_writeword(sl,(int)&reg_cop2d[copr]);
3658 break;
3659 }
3660}
3661
3662static void cop2_assemble(int i,struct regstat *i_regs)
3663{
3664 u_int copr=(source[i]>>11)&0x1f;
3665 signed char temp=get_reg(i_regs->regmap,-1);
3666 if (opcode2[i]==0) { // MFC2
3667 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3668 if(tl>=0&&rt1[i]!=0)
3669 cop2_get_dreg(copr,tl,temp);
3670 }
3671 else if (opcode2[i]==4) { // MTC2
3672 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3673 cop2_put_dreg(copr,sl,temp);
3674 }
3675 else if (opcode2[i]==2) // CFC2
3676 {
3677 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3678 if(tl>=0&&rt1[i]!=0)
3679 emit_readword((int)&reg_cop2c[copr],tl);
3680 }
3681 else if (opcode2[i]==6) // CTC2
3682 {
3683 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3684 switch(copr) {
3685 case 4:
3686 case 12:
3687 case 20:
3688 case 26:
3689 case 27:
3690 case 29:
3691 case 30:
3692 emit_signextend16(sl,temp);
3693 break;
3694 case 31:
3695 //value = value & 0x7ffff000;
3696 //if (value & 0x7f87e000) value |= 0x80000000;
3697 emit_shrimm(sl,12,temp);
3698 emit_shlimm(temp,12,temp);
3699 emit_testimm(temp,0x7f000000);
3700 emit_testeqimm(temp,0x00870000);
3701 emit_testeqimm(temp,0x0000e000);
3702 emit_orrne_imm(temp,0x80000000,temp);
3703 break;
3704 default:
3705 temp=sl;
3706 break;
3707 }
3708 emit_writeword(temp,(int)&reg_cop2c[copr]);
3709 assert(sl>=0);
3710 }
3711}
3712
3713static void c2op_prologue(u_int op,u_int reglist)
3714{
3715 save_regs_all(reglist);
3716#ifdef PCNT
3717 emit_movimm(op,0);
3718 emit_call((int)pcnt_gte_start);
3719#endif
3720 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3721}
3722
3723static void c2op_epilogue(u_int op,u_int reglist)
3724{
3725#ifdef PCNT
3726 emit_movimm(op,0);
3727 emit_call((int)pcnt_gte_end);
3728#endif
3729 restore_regs_all(reglist);
3730}
3731
3732static void c2op_call_MACtoIR(int lm,int need_flags)
3733{
3734 if(need_flags)
3735 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
3736 else
3737 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
3738}
3739
3740static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3741{
3742 emit_call((int)func);
3743 // func is C code and trashes r0
3744 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3745 if(need_flags||need_ir)
3746 c2op_call_MACtoIR(lm,need_flags);
3747 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
3748}
3749
3750static void c2op_assemble(int i,struct regstat *i_regs)
3751{
3752 u_int c2op=source[i]&0x3f;
3753 u_int hr,reglist_full=0,reglist;
3754 int need_flags,need_ir;
3755 for(hr=0;hr<HOST_REGS;hr++) {
3756 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
3757 }
3758 reglist=reglist_full&CALLER_SAVE_REGS;
3759
3760 if (gte_handlers[c2op]!=NULL) {
3761 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
3762 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
3763 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3764 source[i],gte_unneeded[i+1],need_flags,need_ir);
3765 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3766 need_flags=0;
3767 int shift = (source[i] >> 19) & 1;
3768 int lm = (source[i] >> 10) & 1;
3769 switch(c2op) {
3770#ifndef DRC_DBG
3771 case GTE_MVMVA: {
3772#ifdef HAVE_ARMV5
3773 int v = (source[i] >> 15) & 3;
3774 int cv = (source[i] >> 13) & 3;
3775 int mx = (source[i] >> 17) & 3;
3776 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
3777 c2op_prologue(c2op,reglist);
3778 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3779 if(v<3)
3780 emit_ldrd(v*8,0,4);
3781 else {
3782 emit_movzwl_indexed(9*4,0,4); // gteIR
3783 emit_movzwl_indexed(10*4,0,6);
3784 emit_movzwl_indexed(11*4,0,5);
3785 emit_orrshl_imm(6,16,4);
3786 }
3787 if(mx<3)
3788 emit_addimm(0,32*4+mx*8*4,6);
3789 else
3790 emit_readword((int)&zeromem_ptr,6);
3791 if(cv<3)
3792 emit_addimm(0,32*4+(cv*8+5)*4,7);
3793 else
3794 emit_readword((int)&zeromem_ptr,7);
3795#ifdef __ARM_NEON__
3796 emit_movimm(source[i],1); // opcode
3797 emit_call((int)gteMVMVA_part_neon);
3798 if(need_flags) {
3799 emit_movimm(lm,1);
3800 emit_call((int)gteMACtoIR_flags_neon);
3801 }
3802#else
3803 if(cv==3&&shift)
3804 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3805 else {
3806 emit_movimm(shift,1);
3807 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3808 }
3809 if(need_flags||need_ir)
3810 c2op_call_MACtoIR(lm,need_flags);
3811#endif
3812#else /* if not HAVE_ARMV5 */
3813 c2op_prologue(c2op,reglist);
3814 emit_movimm(source[i],1); // opcode
3815 emit_writeword(1,(int)&psxRegs.code);
3816 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3817#endif
3818 break;
3819 }
3820 case GTE_OP:
3821 c2op_prologue(c2op,reglist);
3822 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
3823 if(need_flags||need_ir) {
3824 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3825 c2op_call_MACtoIR(lm,need_flags);
3826 }
3827 break;
3828 case GTE_DPCS:
3829 c2op_prologue(c2op,reglist);
3830 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3831 break;
3832 case GTE_INTPL:
3833 c2op_prologue(c2op,reglist);
3834 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3835 break;
3836 case GTE_SQR:
3837 c2op_prologue(c2op,reglist);
3838 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
3839 if(need_flags||need_ir) {
3840 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3841 c2op_call_MACtoIR(lm,need_flags);
3842 }
3843 break;
3844 case GTE_DCPL:
3845 c2op_prologue(c2op,reglist);
3846 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3847 break;
3848 case GTE_GPF:
3849 c2op_prologue(c2op,reglist);
3850 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3851 break;
3852 case GTE_GPL:
3853 c2op_prologue(c2op,reglist);
3854 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3855 break;
3856#endif
3857 default:
3858 c2op_prologue(c2op,reglist);
3859#ifdef DRC_DBG
3860 emit_movimm(source[i],1); // opcode
3861 emit_writeword(1,(int)&psxRegs.code);
3862#endif
3863 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3864 break;
3865 }
3866 c2op_epilogue(c2op,reglist);
3867 }
3868}
3869
3870static void cop1_unusable(int i,struct regstat *i_regs)
3871{
3872 // XXX: should just just do the exception instead
3873 if(!cop1_usable) {
3874 int jaddr=(int)out;
3875 emit_jmp(0);
3876 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3877 cop1_usable=1;
3878 }
3879}
3880
3881static void cop1_assemble(int i,struct regstat *i_regs)
3882{
3883 cop1_unusable(i, i_regs);
3884}
3885
3886static void fconv_assemble_arm(int i,struct regstat *i_regs)
3887{
3888 cop1_unusable(i, i_regs);
3889}
3890#define fconv_assemble fconv_assemble_arm
3891
3892static void fcomp_assemble(int i,struct regstat *i_regs)
3893{
3894 cop1_unusable(i, i_regs);
3895}
3896
3897static void float_assemble(int i,struct regstat *i_regs)
3898{
3899 cop1_unusable(i, i_regs);
3900}
3901
3902static void multdiv_assemble_arm(int i,struct regstat *i_regs)
3903{
3904 // case 0x18: MULT
3905 // case 0x19: MULTU
3906 // case 0x1A: DIV
3907 // case 0x1B: DIVU
3908 // case 0x1C: DMULT
3909 // case 0x1D: DMULTU
3910 // case 0x1E: DDIV
3911 // case 0x1F: DDIVU
3912 if(rs1[i]&&rs2[i])
3913 {
3914 if((opcode2[i]&4)==0) // 32-bit
3915 {
3916 if(opcode2[i]==0x18) // MULT
3917 {
3918 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3919 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3920 signed char hi=get_reg(i_regs->regmap,HIREG);
3921 signed char lo=get_reg(i_regs->regmap,LOREG);
3922 assert(m1>=0);
3923 assert(m2>=0);
3924 assert(hi>=0);
3925 assert(lo>=0);
3926 emit_smull(m1,m2,hi,lo);
3927 }
3928 if(opcode2[i]==0x19) // MULTU
3929 {
3930 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3931 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3932 signed char hi=get_reg(i_regs->regmap,HIREG);
3933 signed char lo=get_reg(i_regs->regmap,LOREG);
3934 assert(m1>=0);
3935 assert(m2>=0);
3936 assert(hi>=0);
3937 assert(lo>=0);
3938 emit_umull(m1,m2,hi,lo);
3939 }
3940 if(opcode2[i]==0x1A) // DIV
3941 {
3942 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3943 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3944 assert(d1>=0);
3945 assert(d2>=0);
3946 signed char quotient=get_reg(i_regs->regmap,LOREG);
3947 signed char remainder=get_reg(i_regs->regmap,HIREG);
3948 assert(quotient>=0);
3949 assert(remainder>=0);
3950 emit_movs(d1,remainder);
3951 emit_movimm(0xffffffff,quotient);
3952 emit_negmi(quotient,quotient); // .. quotient and ..
3953 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
3954 emit_movs(d2,HOST_TEMPREG);
3955 emit_jeq((int)out+52); // Division by zero
3956 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
3957#ifdef HAVE_ARMV5
3958 emit_clz(HOST_TEMPREG,quotient);
3959 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
3960#else
3961 emit_movimm(0,quotient);
3962 emit_addpl_imm(quotient,1,quotient);
3963 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3964 emit_jns((int)out-2*4);
3965#endif
3966 emit_orimm(quotient,1<<31,quotient);
3967 emit_shr(quotient,quotient,quotient);
3968 emit_cmp(remainder,HOST_TEMPREG);
3969 emit_subcs(remainder,HOST_TEMPREG,remainder);
3970 emit_adcs(quotient,quotient,quotient);
3971 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
3972 emit_jcc((int)out-16); // -4
3973 emit_teq(d1,d2);
3974 emit_negmi(quotient,quotient);
3975 emit_test(d1,d1);
3976 emit_negmi(remainder,remainder);
3977 }
3978 if(opcode2[i]==0x1B) // DIVU
3979 {
3980 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3981 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3982 assert(d1>=0);
3983 assert(d2>=0);
3984 signed char quotient=get_reg(i_regs->regmap,LOREG);
3985 signed char remainder=get_reg(i_regs->regmap,HIREG);
3986 assert(quotient>=0);
3987 assert(remainder>=0);
3988 emit_mov(d1,remainder);
3989 emit_movimm(0xffffffff,quotient); // div0 case
3990 emit_test(d2,d2);
3991 emit_jeq((int)out+40); // Division by zero
3992#ifdef HAVE_ARMV5
3993 emit_clz(d2,HOST_TEMPREG);
3994 emit_movimm(1<<31,quotient);
3995 emit_shl(d2,HOST_TEMPREG,d2);
3996#else
3997 emit_movimm(0,HOST_TEMPREG);
3998 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3999 emit_lslpls_imm(d2,1,d2);
4000 emit_jns((int)out-2*4);
4001 emit_movimm(1<<31,quotient);
4002#endif
4003 emit_shr(quotient,HOST_TEMPREG,quotient);
4004 emit_cmp(remainder,d2);
4005 emit_subcs(remainder,d2,remainder);
4006 emit_adcs(quotient,quotient,quotient);
4007 emit_shrcc_imm(d2,1,d2);
4008 emit_jcc((int)out-16); // -4
4009 }
4010 }
4011 else // 64-bit
4012 assert(0);
4013 }
4014 else
4015 {
4016 // Multiply by zero is zero.
4017 // MIPS does not have a divide by zero exception.
4018 // The result is undefined, we return zero.
4019 signed char hr=get_reg(i_regs->regmap,HIREG);
4020 signed char lr=get_reg(i_regs->regmap,LOREG);
4021 if(hr>=0) emit_zeroreg(hr);
4022 if(lr>=0) emit_zeroreg(lr);
4023 }
4024}
4025#define multdiv_assemble multdiv_assemble_arm
4026
4027static void do_preload_rhash(int r) {
4028 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4029 // register. On ARM the hash can be done with a single instruction (below)
4030}
4031
4032static void do_preload_rhtbl(int ht) {
4033 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4034}
4035
4036static void do_rhash(int rs,int rh) {
4037 emit_andimm(rs,0xf8,rh);
4038}
4039
4040static void do_miniht_load(int ht,int rh) {
4041 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4042 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4043}
4044
4045static void do_miniht_jump(int rs,int rh,int ht) {
4046 emit_cmp(rh,rs);
4047 emit_ldreq_indexed(ht,4,15);
4048 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4049 emit_mov(rs,7);
4050 emit_jmp(jump_vaddr_reg[7]);
4051 #else
4052 emit_jmp(jump_vaddr_reg[rs]);
4053 #endif
4054}
4055
4056static void do_miniht_insert(u_int return_address,int rt,int temp) {
4057 #ifndef HAVE_ARMV7
4058 emit_movimm(return_address,rt); // PC into link register
4059 add_to_linker((int)out,return_address,1);
4060 emit_pcreladdr(temp);
4061 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4062 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4063 #else
4064 emit_movw(return_address&0x0000FFFF,rt);
4065 add_to_linker((int)out,return_address,1);
4066 emit_pcreladdr(temp);
4067 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4068 emit_movt(return_address&0xFFFF0000,rt);
4069 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4070 #endif
4071}
4072
4073static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4074{
4075 //if(dirty_pre==dirty) return;
4076 int hr,reg;
4077 for(hr=0;hr<HOST_REGS;hr++) {
4078 if(hr!=EXCLUDE_REG) {
4079 reg=pre[hr];
4080 if(((~u)>>(reg&63))&1) {
4081 if(reg>0) {
4082 if(((dirty_pre&~dirty)>>hr)&1) {
4083 if(reg>0&&reg<34) {
4084 emit_storereg(reg,hr);
4085 if( ((is32_pre&~uu)>>reg)&1 ) {
4086 emit_sarimm(hr,31,HOST_TEMPREG);
4087 emit_storereg(reg|64,HOST_TEMPREG);
4088 }
4089 }
4090 else if(reg>=64) {
4091 emit_storereg(reg,hr);
4092 }
4093 }
4094 }
4095 }
4096 }
4097 }
4098}
4099
4100
4101/* using strd could possibly help but you'd have to allocate registers in pairs
4102static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4103{
4104 int hr;
4105 int wrote=-1;
4106 for(hr=HOST_REGS-1;hr>=0;hr--) {
4107 if(hr!=EXCLUDE_REG) {
4108 if(pre[hr]!=entry[hr]) {
4109 if(pre[hr]>=0) {
4110 if((dirty>>hr)&1) {
4111 if(get_reg(entry,pre[hr])<0) {
4112 if(pre[hr]<64) {
4113 if(!((u>>pre[hr])&1)) {
4114 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4115 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4116 emit_sarimm(hr,31,hr+1);
4117 emit_strdreg(pre[hr],hr);
4118 }
4119 else
4120 emit_storereg(pre[hr],hr);
4121 }else{
4122 emit_storereg(pre[hr],hr);
4123 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4124 emit_sarimm(hr,31,hr);
4125 emit_storereg(pre[hr]|64,hr);
4126 }
4127 }
4128 }
4129 }else{
4130 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4131 emit_storereg(pre[hr],hr);
4132 }
4133 }
4134 wrote=hr;
4135 }
4136 }
4137 }
4138 }
4139 }
4140 }
4141 for(hr=0;hr<HOST_REGS;hr++) {
4142 if(hr!=EXCLUDE_REG) {
4143 if(pre[hr]!=entry[hr]) {
4144 if(pre[hr]>=0) {
4145 int nr;
4146 if((nr=get_reg(entry,pre[hr]))>=0) {
4147 emit_mov(hr,nr);
4148 }
4149 }
4150 }
4151 }
4152 }
4153}
4154#define wb_invalidate wb_invalidate_arm
4155*/
4156
4157static void mark_clear_cache(void *target)
4158{
4159 u_long offset = (char *)target - (char *)BASE_ADDR;
4160 u_int mask = 1u << ((offset >> 12) & 31);
4161 if (!(needs_clear_cache[offset >> 17] & mask)) {
4162 char *start = (char *)((u_long)target & ~4095ul);
4163 start_tcache_write(start, start + 4096);
4164 needs_clear_cache[offset >> 17] |= mask;
4165 }
4166}
4167
4168// Clearing the cache is rather slow on ARM Linux, so mark the areas
4169// that need to be cleared, and then only clear these areas once.
4170static void do_clear_cache()
4171{
4172 int i,j;
4173 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4174 {
4175 u_int bitmap=needs_clear_cache[i];
4176 if(bitmap) {
4177 u_int start,end;
4178 for(j=0;j<32;j++)
4179 {
4180 if(bitmap&(1<<j)) {
4181 start=(u_int)BASE_ADDR+i*131072+j*4096;
4182 end=start+4095;
4183 j++;
4184 while(j<32) {
4185 if(bitmap&(1<<j)) {
4186 end+=4096;
4187 j++;
4188 }else{
4189 end_tcache_write((void *)start,(void *)end);
4190 break;
4191 }
4192 }
4193 }
4194 }
4195 needs_clear_cache[i]=0;
4196 }
4197 }
4198}
4199
4200// CPU-architecture-specific initialization
4201static void arch_init() {
4202}
4203
4204// vim:shiftwidth=2:expandtab