drc: rework for 64bit, part 1
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
29#include "arm_features.h"
30
31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33char *translation_cache;
34#else
35char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
44#define unused __attribute__((unused))
45
46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
52extern int cycle_count;
53extern int last_count;
54extern int pcaddr;
55extern int pending_exception;
56extern int branch_target;
57extern uint64_t readmem_dword;
58extern void *dynarec_local;
59extern u_int mini_ht[32][2];
60
61void indirect_jump_indexed();
62void indirect_jump();
63void do_interrupt();
64void jump_vaddr_r0();
65void jump_vaddr_r1();
66void jump_vaddr_r2();
67void jump_vaddr_r3();
68void jump_vaddr_r4();
69void jump_vaddr_r5();
70void jump_vaddr_r6();
71void jump_vaddr_r7();
72void jump_vaddr_r8();
73void jump_vaddr_r9();
74void jump_vaddr_r10();
75void jump_vaddr_r12();
76
77const u_int jump_vaddr_reg[16] = {
78 (int)jump_vaddr_r0,
79 (int)jump_vaddr_r1,
80 (int)jump_vaddr_r2,
81 (int)jump_vaddr_r3,
82 (int)jump_vaddr_r4,
83 (int)jump_vaddr_r5,
84 (int)jump_vaddr_r6,
85 (int)jump_vaddr_r7,
86 (int)jump_vaddr_r8,
87 (int)jump_vaddr_r9,
88 (int)jump_vaddr_r10,
89 0,
90 (int)jump_vaddr_r12,
91 0,
92 0,
93 0};
94
95void invalidate_addr_r0();
96void invalidate_addr_r1();
97void invalidate_addr_r2();
98void invalidate_addr_r3();
99void invalidate_addr_r4();
100void invalidate_addr_r5();
101void invalidate_addr_r6();
102void invalidate_addr_r7();
103void invalidate_addr_r8();
104void invalidate_addr_r9();
105void invalidate_addr_r10();
106void invalidate_addr_r12();
107
108const u_int invalidate_addr_reg[16] = {
109 (int)invalidate_addr_r0,
110 (int)invalidate_addr_r1,
111 (int)invalidate_addr_r2,
112 (int)invalidate_addr_r3,
113 (int)invalidate_addr_r4,
114 (int)invalidate_addr_r5,
115 (int)invalidate_addr_r6,
116 (int)invalidate_addr_r7,
117 (int)invalidate_addr_r8,
118 (int)invalidate_addr_r9,
119 (int)invalidate_addr_r10,
120 0,
121 (int)invalidate_addr_r12,
122 0,
123 0,
124 0};
125
126static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
127
128/* Linker */
129
130static void set_jump_target(void *addr, void *target_)
131{
132 u_int target = (u_int)target_;
133 u_char *ptr = addr;
134 u_int *ptr2=(u_int *)ptr;
135 if(ptr[3]==0xe2) {
136 assert((target-(u_int)ptr2-8)<1024);
137 assert(((uintptr_t)addr&3)==0);
138 assert((target&3)==0);
139 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
140 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
141 }
142 else if(ptr[3]==0x72) {
143 // generated by emit_jno_unlikely
144 if((target-(u_int)ptr2-8)<1024) {
145 assert(((uintptr_t)addr&3)==0);
146 assert((target&3)==0);
147 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
148 }
149 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
150 assert(((uintptr_t)addr&3)==0);
151 assert((target&3)==0);
152 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
153 }
154 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
155 }
156 else {
157 assert((ptr[3]&0x0e)==0xa);
158 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
159 }
160}
161
162// This optionally copies the instruction from the target of the branch into
163// the space before the branch. Works, but the difference in speed is
164// usually insignificant.
165#if 0
166static void set_jump_target_fillslot(int addr,u_int target,int copy)
167{
168 u_char *ptr=(u_char *)addr;
169 u_int *ptr2=(u_int *)ptr;
170 assert(!copy||ptr2[-1]==0xe28dd000);
171 if(ptr[3]==0xe2) {
172 assert(!copy);
173 assert((target-(u_int)ptr2-8)<4096);
174 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
175 }
176 else {
177 assert((ptr[3]&0x0e)==0xa);
178 u_int target_insn=*(u_int *)target;
179 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
180 copy=0;
181 }
182 if((target_insn&0x0c100000)==0x04100000) { // Load
183 copy=0;
184 }
185 if(target_insn&0x08000000) {
186 copy=0;
187 }
188 if(copy) {
189 ptr2[-1]=target_insn;
190 target+=4;
191 }
192 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
193 }
194}
195#endif
196
197/* Literal pool */
198static void add_literal(int addr,int val)
199{
200 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
201 literals[literalcount][0]=addr;
202 literals[literalcount][1]=val;
203 literalcount++;
204}
205
206// from a pointer to external jump stub (which was produced by emit_extjump2)
207// find where the jumping insn is
208static void *find_extjump_insn(void *stub)
209{
210 int *ptr=(int *)(stub+4);
211 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
212 u_int offset=*ptr&0xfff;
213 void **l_ptr=(void *)ptr+offset+8;
214 return *l_ptr;
215}
216
217// find where external branch is liked to using addr of it's stub:
218// get address that insn one after stub loads (dyna_linker arg1),
219// treat it as a pointer to branch insn,
220// return addr where that branch jumps to
221static int get_pointer(void *stub)
222{
223 //printf("get_pointer(%x)\n",(int)stub);
224 int *i_ptr=find_extjump_insn(stub);
225 assert((*i_ptr&0x0f000000)==0x0a000000);
226 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
227}
228
229// Find the "clean" entry point from a "dirty" entry point
230// by skipping past the call to verify_code
231static void *get_clean_addr(void *addr)
232{
233 signed int *ptr = addr;
234 #ifndef HAVE_ARMV7
235 ptr+=4;
236 #else
237 ptr+=6;
238 #endif
239 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
240 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
241 ptr++;
242 if((*ptr&0xFF000000)==0xea000000) {
243 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
244 }
245 return ptr;
246}
247
248static int verify_dirty(u_int *ptr)
249{
250 #ifndef HAVE_ARMV7
251 u_int offset;
252 // get from literal pool
253 assert((*ptr&0xFFFF0000)==0xe59f0000);
254 offset=*ptr&0xfff;
255 u_int source=*(u_int*)((void *)ptr+offset+8);
256 ptr++;
257 assert((*ptr&0xFFFF0000)==0xe59f0000);
258 offset=*ptr&0xfff;
259 u_int copy=*(u_int*)((void *)ptr+offset+8);
260 ptr++;
261 assert((*ptr&0xFFFF0000)==0xe59f0000);
262 offset=*ptr&0xfff;
263 u_int len=*(u_int*)((void *)ptr+offset+8);
264 ptr++;
265 ptr++;
266 #else
267 // ARMv7 movw/movt
268 assert((*ptr&0xFFF00000)==0xe3000000);
269 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
270 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
271 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
272 ptr+=6;
273 #endif
274 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
275 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
276 //printf("verify_dirty: %x %x %x\n",source,copy,len);
277 return !memcmp((void *)source,(void *)copy,len);
278}
279
280// This doesn't necessarily find all clean entry points, just
281// guarantees that it's not dirty
282static int isclean(void *addr)
283{
284 #ifndef HAVE_ARMV7
285 u_int *ptr=((u_int *)addr)+4;
286 #else
287 u_int *ptr=((u_int *)addr)+6;
288 #endif
289 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
290 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
291 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
292 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
293 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
294 return 1;
295}
296
297// get source that block at addr was compiled from (host pointers)
298static void get_bounds(int addr,u_int *start,u_int *end)
299{
300 u_int *ptr=(u_int *)addr;
301 #ifndef HAVE_ARMV7
302 u_int offset;
303 // get from literal pool
304 assert((*ptr&0xFFFF0000)==0xe59f0000);
305 offset=*ptr&0xfff;
306 u_int source=*(u_int*)((void *)ptr+offset+8);
307 ptr++;
308 //assert((*ptr&0xFFFF0000)==0xe59f0000);
309 //offset=*ptr&0xfff;
310 //u_int copy=*(u_int*)((void *)ptr+offset+8);
311 ptr++;
312 assert((*ptr&0xFFFF0000)==0xe59f0000);
313 offset=*ptr&0xfff;
314 u_int len=*(u_int*)((void *)ptr+offset+8);
315 ptr++;
316 ptr++;
317 #else
318 // ARMv7 movw/movt
319 assert((*ptr&0xFFF00000)==0xe3000000);
320 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
321 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
322 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
323 ptr+=6;
324 #endif
325 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
326 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
327 *start=source;
328 *end=source+len;
329}
330
331/* Register allocation */
332
333// Note: registers are allocated clean (unmodified state)
334// if you intend to modify the register, you must call dirty_reg().
335static void alloc_reg(struct regstat *cur,int i,signed char reg)
336{
337 int r,hr;
338 int preferred_reg = (reg&7);
339 if(reg==CCREG) preferred_reg=HOST_CCREG;
340 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
341
342 // Don't allocate unused registers
343 if((cur->u>>reg)&1) return;
344
345 // see if it's already allocated
346 for(hr=0;hr<HOST_REGS;hr++)
347 {
348 if(cur->regmap[hr]==reg) return;
349 }
350
351 // Keep the same mapping if the register was already allocated in a loop
352 preferred_reg = loop_reg(i,reg,preferred_reg);
353
354 // Try to allocate the preferred register
355 if(cur->regmap[preferred_reg]==-1) {
356 cur->regmap[preferred_reg]=reg;
357 cur->dirty&=~(1<<preferred_reg);
358 cur->isconst&=~(1<<preferred_reg);
359 return;
360 }
361 r=cur->regmap[preferred_reg];
362 if(r<64&&((cur->u>>r)&1)) {
363 cur->regmap[preferred_reg]=reg;
364 cur->dirty&=~(1<<preferred_reg);
365 cur->isconst&=~(1<<preferred_reg);
366 return;
367 }
368 if(r>=64&&((cur->uu>>(r&63))&1)) {
369 cur->regmap[preferred_reg]=reg;
370 cur->dirty&=~(1<<preferred_reg);
371 cur->isconst&=~(1<<preferred_reg);
372 return;
373 }
374
375 // Clear any unneeded registers
376 // We try to keep the mapping consistent, if possible, because it
377 // makes branches easier (especially loops). So we try to allocate
378 // first (see above) before removing old mappings. If this is not
379 // possible then go ahead and clear out the registers that are no
380 // longer needed.
381 for(hr=0;hr<HOST_REGS;hr++)
382 {
383 r=cur->regmap[hr];
384 if(r>=0) {
385 if(r<64) {
386 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
387 }
388 else
389 {
390 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
391 }
392 }
393 }
394 // Try to allocate any available register, but prefer
395 // registers that have not been used recently.
396 if(i>0) {
397 for(hr=0;hr<HOST_REGS;hr++) {
398 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
399 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
400 cur->regmap[hr]=reg;
401 cur->dirty&=~(1<<hr);
402 cur->isconst&=~(1<<hr);
403 return;
404 }
405 }
406 }
407 }
408 // Try to allocate any available register
409 for(hr=0;hr<HOST_REGS;hr++) {
410 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
411 cur->regmap[hr]=reg;
412 cur->dirty&=~(1<<hr);
413 cur->isconst&=~(1<<hr);
414 return;
415 }
416 }
417
418 // Ok, now we have to evict someone
419 // Pick a register we hopefully won't need soon
420 u_char hsn[MAXREG+1];
421 memset(hsn,10,sizeof(hsn));
422 int j;
423 lsn(hsn,i,&preferred_reg);
424 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
425 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
426 if(i>0) {
427 // Don't evict the cycle count at entry points, otherwise the entry
428 // stub will have to write it.
429 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
430 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
431 for(j=10;j>=3;j--)
432 {
433 // Alloc preferred register if available
434 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
435 for(hr=0;hr<HOST_REGS;hr++) {
436 // Evict both parts of a 64-bit register
437 if((cur->regmap[hr]&63)==r) {
438 cur->regmap[hr]=-1;
439 cur->dirty&=~(1<<hr);
440 cur->isconst&=~(1<<hr);
441 }
442 }
443 cur->regmap[preferred_reg]=reg;
444 return;
445 }
446 for(r=1;r<=MAXREG;r++)
447 {
448 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
449 for(hr=0;hr<HOST_REGS;hr++) {
450 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
451 if(cur->regmap[hr]==r+64) {
452 cur->regmap[hr]=reg;
453 cur->dirty&=~(1<<hr);
454 cur->isconst&=~(1<<hr);
455 return;
456 }
457 }
458 }
459 for(hr=0;hr<HOST_REGS;hr++) {
460 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
461 if(cur->regmap[hr]==r) {
462 cur->regmap[hr]=reg;
463 cur->dirty&=~(1<<hr);
464 cur->isconst&=~(1<<hr);
465 return;
466 }
467 }
468 }
469 }
470 }
471 }
472 }
473 for(j=10;j>=0;j--)
474 {
475 for(r=1;r<=MAXREG;r++)
476 {
477 if(hsn[r]==j) {
478 for(hr=0;hr<HOST_REGS;hr++) {
479 if(cur->regmap[hr]==r+64) {
480 cur->regmap[hr]=reg;
481 cur->dirty&=~(1<<hr);
482 cur->isconst&=~(1<<hr);
483 return;
484 }
485 }
486 for(hr=0;hr<HOST_REGS;hr++) {
487 if(cur->regmap[hr]==r) {
488 cur->regmap[hr]=reg;
489 cur->dirty&=~(1<<hr);
490 cur->isconst&=~(1<<hr);
491 return;
492 }
493 }
494 }
495 }
496 }
497 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
498}
499
500static void alloc_reg64(struct regstat *cur,int i,signed char reg)
501{
502 int preferred_reg = 8+(reg&1);
503 int r,hr;
504
505 // allocate the lower 32 bits
506 alloc_reg(cur,i,reg);
507
508 // Don't allocate unused registers
509 if((cur->uu>>reg)&1) return;
510
511 // see if the upper half is already allocated
512 for(hr=0;hr<HOST_REGS;hr++)
513 {
514 if(cur->regmap[hr]==reg+64) return;
515 }
516
517 // Keep the same mapping if the register was already allocated in a loop
518 preferred_reg = loop_reg(i,reg,preferred_reg);
519
520 // Try to allocate the preferred register
521 if(cur->regmap[preferred_reg]==-1) {
522 cur->regmap[preferred_reg]=reg|64;
523 cur->dirty&=~(1<<preferred_reg);
524 cur->isconst&=~(1<<preferred_reg);
525 return;
526 }
527 r=cur->regmap[preferred_reg];
528 if(r<64&&((cur->u>>r)&1)) {
529 cur->regmap[preferred_reg]=reg|64;
530 cur->dirty&=~(1<<preferred_reg);
531 cur->isconst&=~(1<<preferred_reg);
532 return;
533 }
534 if(r>=64&&((cur->uu>>(r&63))&1)) {
535 cur->regmap[preferred_reg]=reg|64;
536 cur->dirty&=~(1<<preferred_reg);
537 cur->isconst&=~(1<<preferred_reg);
538 return;
539 }
540
541 // Clear any unneeded registers
542 // We try to keep the mapping consistent, if possible, because it
543 // makes branches easier (especially loops). So we try to allocate
544 // first (see above) before removing old mappings. If this is not
545 // possible then go ahead and clear out the registers that are no
546 // longer needed.
547 for(hr=HOST_REGS-1;hr>=0;hr--)
548 {
549 r=cur->regmap[hr];
550 if(r>=0) {
551 if(r<64) {
552 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
553 }
554 else
555 {
556 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
557 }
558 }
559 }
560 // Try to allocate any available register, but prefer
561 // registers that have not been used recently.
562 if(i>0) {
563 for(hr=0;hr<HOST_REGS;hr++) {
564 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
565 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
566 cur->regmap[hr]=reg|64;
567 cur->dirty&=~(1<<hr);
568 cur->isconst&=~(1<<hr);
569 return;
570 }
571 }
572 }
573 }
574 // Try to allocate any available register
575 for(hr=0;hr<HOST_REGS;hr++) {
576 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
577 cur->regmap[hr]=reg|64;
578 cur->dirty&=~(1<<hr);
579 cur->isconst&=~(1<<hr);
580 return;
581 }
582 }
583
584 // Ok, now we have to evict someone
585 // Pick a register we hopefully won't need soon
586 u_char hsn[MAXREG+1];
587 memset(hsn,10,sizeof(hsn));
588 int j;
589 lsn(hsn,i,&preferred_reg);
590 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
591 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
592 if(i>0) {
593 // Don't evict the cycle count at entry points, otherwise the entry
594 // stub will have to write it.
595 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
596 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
597 for(j=10;j>=3;j--)
598 {
599 // Alloc preferred register if available
600 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
601 for(hr=0;hr<HOST_REGS;hr++) {
602 // Evict both parts of a 64-bit register
603 if((cur->regmap[hr]&63)==r) {
604 cur->regmap[hr]=-1;
605 cur->dirty&=~(1<<hr);
606 cur->isconst&=~(1<<hr);
607 }
608 }
609 cur->regmap[preferred_reg]=reg|64;
610 return;
611 }
612 for(r=1;r<=MAXREG;r++)
613 {
614 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
615 for(hr=0;hr<HOST_REGS;hr++) {
616 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
617 if(cur->regmap[hr]==r+64) {
618 cur->regmap[hr]=reg|64;
619 cur->dirty&=~(1<<hr);
620 cur->isconst&=~(1<<hr);
621 return;
622 }
623 }
624 }
625 for(hr=0;hr<HOST_REGS;hr++) {
626 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
627 if(cur->regmap[hr]==r) {
628 cur->regmap[hr]=reg|64;
629 cur->dirty&=~(1<<hr);
630 cur->isconst&=~(1<<hr);
631 return;
632 }
633 }
634 }
635 }
636 }
637 }
638 }
639 for(j=10;j>=0;j--)
640 {
641 for(r=1;r<=MAXREG;r++)
642 {
643 if(hsn[r]==j) {
644 for(hr=0;hr<HOST_REGS;hr++) {
645 if(cur->regmap[hr]==r+64) {
646 cur->regmap[hr]=reg|64;
647 cur->dirty&=~(1<<hr);
648 cur->isconst&=~(1<<hr);
649 return;
650 }
651 }
652 for(hr=0;hr<HOST_REGS;hr++) {
653 if(cur->regmap[hr]==r) {
654 cur->regmap[hr]=reg|64;
655 cur->dirty&=~(1<<hr);
656 cur->isconst&=~(1<<hr);
657 return;
658 }
659 }
660 }
661 }
662 }
663 SysPrintf("This shouldn't happen");exit(1);
664}
665
666// Allocate a temporary register. This is done without regard to
667// dirty status or whether the register we request is on the unneeded list
668// Note: This will only allocate one register, even if called multiple times
669static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
670{
671 int r,hr;
672 int preferred_reg = -1;
673
674 // see if it's already allocated
675 for(hr=0;hr<HOST_REGS;hr++)
676 {
677 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
678 }
679
680 // Try to allocate any available register
681 for(hr=HOST_REGS-1;hr>=0;hr--) {
682 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
683 cur->regmap[hr]=reg;
684 cur->dirty&=~(1<<hr);
685 cur->isconst&=~(1<<hr);
686 return;
687 }
688 }
689
690 // Find an unneeded register
691 for(hr=HOST_REGS-1;hr>=0;hr--)
692 {
693 r=cur->regmap[hr];
694 if(r>=0) {
695 if(r<64) {
696 if((cur->u>>r)&1) {
697 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
698 cur->regmap[hr]=reg;
699 cur->dirty&=~(1<<hr);
700 cur->isconst&=~(1<<hr);
701 return;
702 }
703 }
704 }
705 else
706 {
707 if((cur->uu>>(r&63))&1) {
708 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
709 cur->regmap[hr]=reg;
710 cur->dirty&=~(1<<hr);
711 cur->isconst&=~(1<<hr);
712 return;
713 }
714 }
715 }
716 }
717 }
718
719 // Ok, now we have to evict someone
720 // Pick a register we hopefully won't need soon
721 // TODO: we might want to follow unconditional jumps here
722 // TODO: get rid of dupe code and make this into a function
723 u_char hsn[MAXREG+1];
724 memset(hsn,10,sizeof(hsn));
725 int j;
726 lsn(hsn,i,&preferred_reg);
727 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
728 if(i>0) {
729 // Don't evict the cycle count at entry points, otherwise the entry
730 // stub will have to write it.
731 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
732 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
733 for(j=10;j>=3;j--)
734 {
735 for(r=1;r<=MAXREG;r++)
736 {
737 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
738 for(hr=0;hr<HOST_REGS;hr++) {
739 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
740 if(cur->regmap[hr]==r+64) {
741 cur->regmap[hr]=reg;
742 cur->dirty&=~(1<<hr);
743 cur->isconst&=~(1<<hr);
744 return;
745 }
746 }
747 }
748 for(hr=0;hr<HOST_REGS;hr++) {
749 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
750 if(cur->regmap[hr]==r) {
751 cur->regmap[hr]=reg;
752 cur->dirty&=~(1<<hr);
753 cur->isconst&=~(1<<hr);
754 return;
755 }
756 }
757 }
758 }
759 }
760 }
761 }
762 for(j=10;j>=0;j--)
763 {
764 for(r=1;r<=MAXREG;r++)
765 {
766 if(hsn[r]==j) {
767 for(hr=0;hr<HOST_REGS;hr++) {
768 if(cur->regmap[hr]==r+64) {
769 cur->regmap[hr]=reg;
770 cur->dirty&=~(1<<hr);
771 cur->isconst&=~(1<<hr);
772 return;
773 }
774 }
775 for(hr=0;hr<HOST_REGS;hr++) {
776 if(cur->regmap[hr]==r) {
777 cur->regmap[hr]=reg;
778 cur->dirty&=~(1<<hr);
779 cur->isconst&=~(1<<hr);
780 return;
781 }
782 }
783 }
784 }
785 }
786 SysPrintf("This shouldn't happen");exit(1);
787}
788
789// Allocate a specific ARM register.
790static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
791{
792 int n;
793 int dirty=0;
794
795 // see if it's already allocated (and dealloc it)
796 for(n=0;n<HOST_REGS;n++)
797 {
798 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
799 dirty=(cur->dirty>>n)&1;
800 cur->regmap[n]=-1;
801 }
802 }
803
804 cur->regmap[hr]=reg;
805 cur->dirty&=~(1<<hr);
806 cur->dirty|=dirty<<hr;
807 cur->isconst&=~(1<<hr);
808}
809
810// Alloc cycle count into dedicated register
811static void alloc_cc(struct regstat *cur,int i)
812{
813 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
814}
815
816/* Special alloc */
817
818
819/* Assembler */
820
821static unused char regname[16][4] = {
822 "r0",
823 "r1",
824 "r2",
825 "r3",
826 "r4",
827 "r5",
828 "r6",
829 "r7",
830 "r8",
831 "r9",
832 "r10",
833 "fp",
834 "r12",
835 "sp",
836 "lr",
837 "pc"};
838
839static void output_w32(u_int word)
840{
841 *((u_int *)out)=word;
842 out+=4;
843}
844
845static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
846{
847 assert(rd<16);
848 assert(rn<16);
849 assert(rm<16);
850 return((rn<<16)|(rd<<12)|rm);
851}
852
853static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
854{
855 assert(rd<16);
856 assert(rn<16);
857 assert(imm<256);
858 assert((shift&1)==0);
859 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
860}
861
862static u_int genimm(u_int imm,u_int *encoded)
863{
864 *encoded=0;
865 if(imm==0) return 1;
866 int i=32;
867 while(i>0)
868 {
869 if(imm<256) {
870 *encoded=((i&30)<<7)|imm;
871 return 1;
872 }
873 imm=(imm>>2)|(imm<<30);i-=2;
874 }
875 return 0;
876}
877
878static void genimm_checked(u_int imm,u_int *encoded)
879{
880 u_int ret=genimm(imm,encoded);
881 assert(ret);
882 (void)ret;
883}
884
885static u_int genjmp(u_int addr)
886{
887 int offset=addr-(int)out-8;
888 if(offset<-33554432||offset>=33554432) {
889 if (addr>2) {
890 SysPrintf("genjmp: out of range: %08x\n", offset);
891 exit(1);
892 }
893 return 0;
894 }
895 return ((u_int)offset>>2)&0xffffff;
896}
897
898static void emit_mov(int rs,int rt)
899{
900 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
901 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
902}
903
904static void emit_movs(int rs,int rt)
905{
906 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
907 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
908}
909
910static void emit_add(int rs1,int rs2,int rt)
911{
912 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
913 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
914}
915
916static void emit_adds(int rs1,int rs2,int rt)
917{
918 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
919 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
920}
921
922static void emit_adcs(int rs1,int rs2,int rt)
923{
924 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
925 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
926}
927
928static void emit_sbc(int rs1,int rs2,int rt)
929{
930 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
931 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
932}
933
934static void emit_sbcs(int rs1,int rs2,int rt)
935{
936 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
937 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
938}
939
940static void emit_neg(int rs, int rt)
941{
942 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
943 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
944}
945
946static void emit_negs(int rs, int rt)
947{
948 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
949 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
950}
951
952static void emit_sub(int rs1,int rs2,int rt)
953{
954 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
955 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
956}
957
958static void emit_subs(int rs1,int rs2,int rt)
959{
960 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
961 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
962}
963
964static void emit_zeroreg(int rt)
965{
966 assem_debug("mov %s,#0\n",regname[rt]);
967 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
968}
969
970static void emit_loadlp(u_int imm,u_int rt)
971{
972 add_literal((int)out,imm);
973 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
974 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
975}
976
977static void emit_movw(u_int imm,u_int rt)
978{
979 assert(imm<65536);
980 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
981 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
982}
983
984static void emit_movt(u_int imm,u_int rt)
985{
986 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
987 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
988}
989
990static void emit_movimm(u_int imm,u_int rt)
991{
992 u_int armval;
993 if(genimm(imm,&armval)) {
994 assem_debug("mov %s,#%d\n",regname[rt],imm);
995 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
996 }else if(genimm(~imm,&armval)) {
997 assem_debug("mvn %s,#%d\n",regname[rt],imm);
998 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
999 }else if(imm<65536) {
1000 #ifndef HAVE_ARMV7
1001 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1002 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1003 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1004 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1005 #else
1006 emit_movw(imm,rt);
1007 #endif
1008 }else{
1009 #ifndef HAVE_ARMV7
1010 emit_loadlp(imm,rt);
1011 #else
1012 emit_movw(imm&0x0000FFFF,rt);
1013 emit_movt(imm&0xFFFF0000,rt);
1014 #endif
1015 }
1016}
1017
1018static void emit_pcreladdr(u_int rt)
1019{
1020 assem_debug("add %s,pc,#?\n",regname[rt]);
1021 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1022}
1023
1024static void emit_loadreg(int r, int hr)
1025{
1026 if(r&64) {
1027 SysPrintf("64bit load in 32bit mode!\n");
1028 assert(0);
1029 return;
1030 }
1031 if((r&63)==0)
1032 emit_zeroreg(hr);
1033 else {
1034 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1035 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1036 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1037 if(r==CCREG) addr=(int)&cycle_count;
1038 if(r==CSREG) addr=(int)&Status;
1039 if(r==FSREG) addr=(int)&FCR31;
1040 if(r==INVCP) addr=(int)&invc_ptr;
1041 u_int offset = addr-(u_int)&dynarec_local;
1042 assert(offset<4096);
1043 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1044 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1045 }
1046}
1047
1048static void emit_storereg(int r, int hr)
1049{
1050 if(r&64) {
1051 SysPrintf("64bit store in 32bit mode!\n");
1052 assert(0);
1053 return;
1054 }
1055 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1056 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1057 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1058 if(r==CCREG) addr=(int)&cycle_count;
1059 if(r==FSREG) addr=(int)&FCR31;
1060 u_int offset = addr-(u_int)&dynarec_local;
1061 assert(offset<4096);
1062 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1063 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1064}
1065
1066static void emit_test(int rs, int rt)
1067{
1068 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1069 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1070}
1071
1072static void emit_testimm(int rs,int imm)
1073{
1074 u_int armval;
1075 assem_debug("tst %s,#%d\n",regname[rs],imm);
1076 genimm_checked(imm,&armval);
1077 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1078}
1079
1080static void emit_testeqimm(int rs,int imm)
1081{
1082 u_int armval;
1083 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
1084 genimm_checked(imm,&armval);
1085 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1086}
1087
1088static void emit_not(int rs,int rt)
1089{
1090 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1091 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1092}
1093
1094static void emit_mvnmi(int rs,int rt)
1095{
1096 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1097 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1098}
1099
1100static void emit_and(u_int rs1,u_int rs2,u_int rt)
1101{
1102 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1103 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1104}
1105
1106static void emit_or(u_int rs1,u_int rs2,u_int rt)
1107{
1108 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1109 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1110}
1111
1112static void emit_or_and_set_flags(int rs1,int rs2,int rt)
1113{
1114 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1115 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1116}
1117
1118static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1119{
1120 assert(rs<16);
1121 assert(rt<16);
1122 assert(imm<32);
1123 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1124 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1125}
1126
1127static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1128{
1129 assert(rs<16);
1130 assert(rt<16);
1131 assert(imm<32);
1132 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1133 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1134}
1135
1136static void emit_xor(u_int rs1,u_int rs2,u_int rt)
1137{
1138 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1139 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1140}
1141
1142static void emit_addimm(u_int rs,int imm,u_int rt)
1143{
1144 assert(rs<16);
1145 assert(rt<16);
1146 if(imm!=0) {
1147 u_int armval;
1148 if(genimm(imm,&armval)) {
1149 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1150 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1151 }else if(genimm(-imm,&armval)) {
1152 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
1153 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1154 #ifdef HAVE_ARMV7
1155 }else if(rt!=rs&&(u_int)imm<65536) {
1156 emit_movw(imm&0x0000ffff,rt);
1157 emit_add(rs,rt,rt);
1158 }else if(rt!=rs&&(u_int)-imm<65536) {
1159 emit_movw(-imm&0x0000ffff,rt);
1160 emit_sub(rs,rt,rt);
1161 #endif
1162 }else if((u_int)-imm<65536) {
1163 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1164 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1165 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1166 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1167 }else {
1168 do {
1169 int shift = (ffs(imm) - 1) & ~1;
1170 int imm8 = imm & (0xff << shift);
1171 genimm_checked(imm8,&armval);
1172 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1173 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1174 rs = rt;
1175 imm &= ~imm8;
1176 }
1177 while (imm != 0);
1178 }
1179 }
1180 else if(rs!=rt) emit_mov(rs,rt);
1181}
1182
1183static void emit_addimm_and_set_flags(int imm,int rt)
1184{
1185 assert(imm>-65536&&imm<65536);
1186 u_int armval;
1187 if(genimm(imm,&armval)) {
1188 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1189 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1190 }else if(genimm(-imm,&armval)) {
1191 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1192 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1193 }else if(imm<0) {
1194 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1195 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1196 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1197 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1198 }else{
1199 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1200 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1201 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1202 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1203 }
1204}
1205
1206static void emit_addimm_no_flags(u_int imm,u_int rt)
1207{
1208 emit_addimm(rt,imm,rt);
1209}
1210
1211static void emit_addnop(u_int r)
1212{
1213 assert(r<16);
1214 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1215 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1216}
1217
1218static void emit_adcimm(u_int rs,int imm,u_int rt)
1219{
1220 u_int armval;
1221 genimm_checked(imm,&armval);
1222 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1223 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1224}
1225
1226static void emit_rscimm(int rs,int imm,u_int rt)
1227{
1228 assert(0);
1229 u_int armval;
1230 genimm_checked(imm,&armval);
1231 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1232 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1233}
1234
1235static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1236{
1237 // TODO: if(genimm(imm,&armval)) ...
1238 // else
1239 emit_movimm(imm,HOST_TEMPREG);
1240 emit_adds(HOST_TEMPREG,rsl,rtl);
1241 emit_adcimm(rsh,0,rth);
1242}
1243
1244static void emit_andimm(int rs,int imm,int rt)
1245{
1246 u_int armval;
1247 if(imm==0) {
1248 emit_zeroreg(rt);
1249 }else if(genimm(imm,&armval)) {
1250 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1251 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1252 }else if(genimm(~imm,&armval)) {
1253 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1254 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1255 }else if(imm==65535) {
1256 #ifndef HAVE_ARMV6
1257 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1258 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1259 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1260 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1261 #else
1262 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1263 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1264 #endif
1265 }else{
1266 assert(imm>0&&imm<65535);
1267 #ifndef HAVE_ARMV7
1268 assem_debug("mov r14,#%d\n",imm&0xFF00);
1269 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1270 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1271 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1272 #else
1273 emit_movw(imm,HOST_TEMPREG);
1274 #endif
1275 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1276 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1277 }
1278}
1279
1280static void emit_orimm(int rs,int imm,int rt)
1281{
1282 u_int armval;
1283 if(imm==0) {
1284 if(rs!=rt) emit_mov(rs,rt);
1285 }else if(genimm(imm,&armval)) {
1286 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1287 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1288 }else{
1289 assert(imm>0&&imm<65536);
1290 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1291 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1292 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1293 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1294 }
1295}
1296
1297static void emit_xorimm(int rs,int imm,int rt)
1298{
1299 u_int armval;
1300 if(imm==0) {
1301 if(rs!=rt) emit_mov(rs,rt);
1302 }else if(genimm(imm,&armval)) {
1303 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1304 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1305 }else{
1306 assert(imm>0&&imm<65536);
1307 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1308 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1309 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1310 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1311 }
1312}
1313
1314static void emit_shlimm(int rs,u_int imm,int rt)
1315{
1316 assert(imm>0);
1317 assert(imm<32);
1318 //if(imm==1) ...
1319 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1320 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1321}
1322
1323static void emit_lsls_imm(int rs,int imm,int rt)
1324{
1325 assert(imm>0);
1326 assert(imm<32);
1327 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1328 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1329}
1330
1331static unused void emit_lslpls_imm(int rs,int imm,int rt)
1332{
1333 assert(imm>0);
1334 assert(imm<32);
1335 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1336 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1337}
1338
1339static void emit_shrimm(int rs,u_int imm,int rt)
1340{
1341 assert(imm>0);
1342 assert(imm<32);
1343 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1344 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1345}
1346
1347static void emit_sarimm(int rs,u_int imm,int rt)
1348{
1349 assert(imm>0);
1350 assert(imm<32);
1351 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1352 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1353}
1354
1355static void emit_rorimm(int rs,u_int imm,int rt)
1356{
1357 assert(imm>0);
1358 assert(imm<32);
1359 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1360 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1361}
1362
1363static void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1364{
1365 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1366 assert(imm>0);
1367 assert(imm<32);
1368 //if(imm==1) ...
1369 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1370 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1371 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1372 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1373}
1374
1375static void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1376{
1377 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1378 assert(imm>0);
1379 assert(imm<32);
1380 //if(imm==1) ...
1381 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1382 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1383 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1384 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1385}
1386
1387static void emit_signextend16(int rs,int rt)
1388{
1389 #ifndef HAVE_ARMV6
1390 emit_shlimm(rs,16,rt);
1391 emit_sarimm(rt,16,rt);
1392 #else
1393 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1394 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1395 #endif
1396}
1397
1398static void emit_signextend8(int rs,int rt)
1399{
1400 #ifndef HAVE_ARMV6
1401 emit_shlimm(rs,24,rt);
1402 emit_sarimm(rt,24,rt);
1403 #else
1404 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1405 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1406 #endif
1407}
1408
1409static void emit_shl(u_int rs,u_int shift,u_int rt)
1410{
1411 assert(rs<16);
1412 assert(rt<16);
1413 assert(shift<16);
1414 //if(imm==1) ...
1415 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1416 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1417}
1418
1419static void emit_shr(u_int rs,u_int shift,u_int rt)
1420{
1421 assert(rs<16);
1422 assert(rt<16);
1423 assert(shift<16);
1424 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1425 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1426}
1427
1428static void emit_sar(u_int rs,u_int shift,u_int rt)
1429{
1430 assert(rs<16);
1431 assert(rt<16);
1432 assert(shift<16);
1433 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1434 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1435}
1436
1437static void emit_orrshl(u_int rs,u_int shift,u_int rt)
1438{
1439 assert(rs<16);
1440 assert(rt<16);
1441 assert(shift<16);
1442 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1443 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1444}
1445
1446static void emit_orrshr(u_int rs,u_int shift,u_int rt)
1447{
1448 assert(rs<16);
1449 assert(rt<16);
1450 assert(shift<16);
1451 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1452 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1453}
1454
1455static void emit_cmpimm(int rs,int imm)
1456{
1457 u_int armval;
1458 if(genimm(imm,&armval)) {
1459 assem_debug("cmp %s,#%d\n",regname[rs],imm);
1460 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1461 }else if(genimm(-imm,&armval)) {
1462 assem_debug("cmn %s,#%d\n",regname[rs],imm);
1463 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1464 }else if(imm>0) {
1465 assert(imm<65536);
1466 emit_movimm(imm,HOST_TEMPREG);
1467 assem_debug("cmp %s,r14\n",regname[rs]);
1468 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1469 }else{
1470 assert(imm>-65536);
1471 emit_movimm(-imm,HOST_TEMPREG);
1472 assem_debug("cmn %s,r14\n",regname[rs]);
1473 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1474 }
1475}
1476
1477static void emit_cmovne_imm(int imm,int rt)
1478{
1479 assem_debug("movne %s,#%d\n",regname[rt],imm);
1480 u_int armval;
1481 genimm_checked(imm,&armval);
1482 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1483}
1484
1485static void emit_cmovl_imm(int imm,int rt)
1486{
1487 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1488 u_int armval;
1489 genimm_checked(imm,&armval);
1490 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1491}
1492
1493static void emit_cmovb_imm(int imm,int rt)
1494{
1495 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1496 u_int armval;
1497 genimm_checked(imm,&armval);
1498 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1499}
1500
1501static void emit_cmovs_imm(int imm,int rt)
1502{
1503 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1504 u_int armval;
1505 genimm_checked(imm,&armval);
1506 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1507}
1508
1509static void emit_cmove_reg(int rs,int rt)
1510{
1511 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1512 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1513}
1514
1515static void emit_cmovne_reg(int rs,int rt)
1516{
1517 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1518 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1519}
1520
1521static void emit_cmovl_reg(int rs,int rt)
1522{
1523 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1524 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1525}
1526
1527static void emit_cmovs_reg(int rs,int rt)
1528{
1529 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1530 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1531}
1532
1533static void emit_slti32(int rs,int imm,int rt)
1534{
1535 if(rs!=rt) emit_zeroreg(rt);
1536 emit_cmpimm(rs,imm);
1537 if(rs==rt) emit_movimm(0,rt);
1538 emit_cmovl_imm(1,rt);
1539}
1540
1541static void emit_sltiu32(int rs,int imm,int rt)
1542{
1543 if(rs!=rt) emit_zeroreg(rt);
1544 emit_cmpimm(rs,imm);
1545 if(rs==rt) emit_movimm(0,rt);
1546 emit_cmovb_imm(1,rt);
1547}
1548
1549static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1550{
1551 assert(rsh!=rt);
1552 emit_slti32(rsl,imm,rt);
1553 if(imm>=0)
1554 {
1555 emit_test(rsh,rsh);
1556 emit_cmovne_imm(0,rt);
1557 emit_cmovs_imm(1,rt);
1558 }
1559 else
1560 {
1561 emit_cmpimm(rsh,-1);
1562 emit_cmovne_imm(0,rt);
1563 emit_cmovl_imm(1,rt);
1564 }
1565}
1566
1567static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1568{
1569 assert(rsh!=rt);
1570 emit_sltiu32(rsl,imm,rt);
1571 if(imm>=0)
1572 {
1573 emit_test(rsh,rsh);
1574 emit_cmovne_imm(0,rt);
1575 }
1576 else
1577 {
1578 emit_cmpimm(rsh,-1);
1579 emit_cmovne_imm(1,rt);
1580 }
1581}
1582
1583static void emit_cmp(int rs,int rt)
1584{
1585 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1586 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1587}
1588
1589static void emit_set_gz32(int rs, int rt)
1590{
1591 //assem_debug("set_gz32\n");
1592 emit_cmpimm(rs,1);
1593 emit_movimm(1,rt);
1594 emit_cmovl_imm(0,rt);
1595}
1596
1597static void emit_set_nz32(int rs, int rt)
1598{
1599 //assem_debug("set_nz32\n");
1600 if(rs!=rt) emit_movs(rs,rt);
1601 else emit_test(rs,rs);
1602 emit_cmovne_imm(1,rt);
1603}
1604
1605static void emit_set_gz64_32(int rsh, int rsl, int rt)
1606{
1607 //assem_debug("set_gz64\n");
1608 emit_set_gz32(rsl,rt);
1609 emit_test(rsh,rsh);
1610 emit_cmovne_imm(1,rt);
1611 emit_cmovs_imm(0,rt);
1612}
1613
1614static void emit_set_nz64_32(int rsh, int rsl, int rt)
1615{
1616 //assem_debug("set_nz64\n");
1617 emit_or_and_set_flags(rsh,rsl,rt);
1618 emit_cmovne_imm(1,rt);
1619}
1620
1621static void emit_set_if_less32(int rs1, int rs2, int rt)
1622{
1623 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1624 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1625 emit_cmp(rs1,rs2);
1626 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1627 emit_cmovl_imm(1,rt);
1628}
1629
1630static void emit_set_if_carry32(int rs1, int rs2, int rt)
1631{
1632 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1633 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1634 emit_cmp(rs1,rs2);
1635 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1636 emit_cmovb_imm(1,rt);
1637}
1638
1639static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1640{
1641 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1642 assert(u1!=rt);
1643 assert(u2!=rt);
1644 emit_cmp(l1,l2);
1645 emit_movimm(0,rt);
1646 emit_sbcs(u1,u2,HOST_TEMPREG);
1647 emit_cmovl_imm(1,rt);
1648}
1649
1650static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1651{
1652 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1653 assert(u1!=rt);
1654 assert(u2!=rt);
1655 emit_cmp(l1,l2);
1656 emit_movimm(0,rt);
1657 emit_sbcs(u1,u2,HOST_TEMPREG);
1658 emit_cmovb_imm(1,rt);
1659}
1660
1661#ifdef DRC_DBG
1662extern void gen_interupt();
1663extern void do_insn_cmp();
1664#define FUNCNAME(f) { (intptr_t)f, " " #f }
1665static const struct {
1666 intptr_t addr;
1667 const char *name;
1668} function_names[] = {
1669 FUNCNAME(cc_interrupt),
1670 FUNCNAME(gen_interupt),
1671 FUNCNAME(get_addr_ht),
1672 FUNCNAME(get_addr),
1673 FUNCNAME(jump_handler_read8),
1674 FUNCNAME(jump_handler_read16),
1675 FUNCNAME(jump_handler_read32),
1676 FUNCNAME(jump_handler_write8),
1677 FUNCNAME(jump_handler_write16),
1678 FUNCNAME(jump_handler_write32),
1679 FUNCNAME(invalidate_addr),
1680 FUNCNAME(verify_code_vm),
1681 FUNCNAME(verify_code),
1682 FUNCNAME(jump_hlecall),
1683 FUNCNAME(jump_syscall_hle),
1684 FUNCNAME(new_dyna_leave),
1685 FUNCNAME(pcsx_mtc0),
1686 FUNCNAME(pcsx_mtc0_ds),
1687 FUNCNAME(do_insn_cmp),
1688};
1689
1690static const char *func_name(intptr_t a)
1691{
1692 int i;
1693 for (i = 0; i < sizeof(function_names)/sizeof(function_names[0]); i++)
1694 if (function_names[i].addr == a)
1695 return function_names[i].name;
1696 return "";
1697}
1698#else
1699#define func_name(x) ""
1700#endif
1701
1702static void emit_call(int a)
1703{
1704 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1705 u_int offset=genjmp(a);
1706 output_w32(0xeb000000|offset);
1707}
1708
1709static void emit_jmp(int a)
1710{
1711 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1712 u_int offset=genjmp(a);
1713 output_w32(0xea000000|offset);
1714}
1715
1716static void emit_jne(int a)
1717{
1718 assem_debug("bne %x\n",a);
1719 u_int offset=genjmp(a);
1720 output_w32(0x1a000000|offset);
1721}
1722
1723static void emit_jeq(int a)
1724{
1725 assem_debug("beq %x\n",a);
1726 u_int offset=genjmp(a);
1727 output_w32(0x0a000000|offset);
1728}
1729
1730static void emit_js(int a)
1731{
1732 assem_debug("bmi %x\n",a);
1733 u_int offset=genjmp(a);
1734 output_w32(0x4a000000|offset);
1735}
1736
1737static void emit_jns(int a)
1738{
1739 assem_debug("bpl %x\n",a);
1740 u_int offset=genjmp(a);
1741 output_w32(0x5a000000|offset);
1742}
1743
1744static void emit_jl(int a)
1745{
1746 assem_debug("blt %x\n",a);
1747 u_int offset=genjmp(a);
1748 output_w32(0xba000000|offset);
1749}
1750
1751static void emit_jge(int a)
1752{
1753 assem_debug("bge %x\n",a);
1754 u_int offset=genjmp(a);
1755 output_w32(0xaa000000|offset);
1756}
1757
1758static void emit_jno(int a)
1759{
1760 assem_debug("bvc %x\n",a);
1761 u_int offset=genjmp(a);
1762 output_w32(0x7a000000|offset);
1763}
1764
1765static void emit_jc(int a)
1766{
1767 assem_debug("bcs %x\n",a);
1768 u_int offset=genjmp(a);
1769 output_w32(0x2a000000|offset);
1770}
1771
1772static void emit_jcc(int a)
1773{
1774 assem_debug("bcc %x\n",a);
1775 u_int offset=genjmp(a);
1776 output_w32(0x3a000000|offset);
1777}
1778
1779static void emit_callreg(u_int r)
1780{
1781 assert(r<15);
1782 assem_debug("blx %s\n",regname[r]);
1783 output_w32(0xe12fff30|r);
1784}
1785
1786static void emit_jmpreg(u_int r)
1787{
1788 assem_debug("mov pc,%s\n",regname[r]);
1789 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1790}
1791
1792static void emit_readword_indexed(int offset, int rs, int rt)
1793{
1794 assert(offset>-4096&&offset<4096);
1795 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1796 if(offset>=0) {
1797 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1798 }else{
1799 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1800 }
1801}
1802
1803static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1804{
1805 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1806 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1807}
1808
1809static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1810{
1811 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1812 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1813}
1814
1815static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1816{
1817 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1818 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1819}
1820
1821static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1822{
1823 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1824 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1825}
1826
1827static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1828{
1829 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1830 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1831}
1832
1833static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1834{
1835 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1836 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1837}
1838
1839static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1840{
1841 if(map<0) emit_readword_indexed(addr, rs, rt);
1842 else {
1843 assert(addr==0);
1844 emit_readword_dualindexedx4(rs, map, rt);
1845 }
1846}
1847
1848static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1849{
1850 if(map<0) {
1851 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1852 emit_readword_indexed(addr+4, rs, rl);
1853 }else{
1854 assert(rh!=rs);
1855 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1856 emit_addimm(map,1,map);
1857 emit_readword_indexed_tlb(addr, rs, map, rl);
1858 }
1859}
1860
1861static void emit_movsbl_indexed(int offset, int rs, int rt)
1862{
1863 assert(offset>-256&&offset<256);
1864 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1865 if(offset>=0) {
1866 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1867 }else{
1868 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1869 }
1870}
1871
1872static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1873{
1874 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1875 else {
1876 if(addr==0) {
1877 emit_shlimm(map,2,map);
1878 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1879 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1880 }else{
1881 assert(addr>-256&&addr<256);
1882 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1883 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1884 emit_movsbl_indexed(addr, rt, rt);
1885 }
1886 }
1887}
1888
1889static void emit_movswl_indexed(int offset, int rs, int rt)
1890{
1891 assert(offset>-256&&offset<256);
1892 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1893 if(offset>=0) {
1894 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1895 }else{
1896 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1897 }
1898}
1899
1900static void emit_movzbl_indexed(int offset, int rs, int rt)
1901{
1902 assert(offset>-4096&&offset<4096);
1903 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1904 if(offset>=0) {
1905 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1906 }else{
1907 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1908 }
1909}
1910
1911static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1912{
1913 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1914 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1915}
1916
1917static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1918{
1919 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1920 else {
1921 if(addr==0) {
1922 emit_movzbl_dualindexedx4(rs, map, rt);
1923 }else{
1924 emit_addimm(rs,addr,rt);
1925 emit_movzbl_dualindexedx4(rt, map, rt);
1926 }
1927 }
1928}
1929
1930static void emit_movzwl_indexed(int offset, int rs, int rt)
1931{
1932 assert(offset>-256&&offset<256);
1933 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1934 if(offset>=0) {
1935 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1936 }else{
1937 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1938 }
1939}
1940
1941static void emit_ldrd(int offset, int rs, int rt)
1942{
1943 assert(offset>-256&&offset<256);
1944 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1945 if(offset>=0) {
1946 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1947 }else{
1948 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1949 }
1950}
1951
1952static void emit_readword(int addr, int rt)
1953{
1954 u_int offset = addr-(u_int)&dynarec_local;
1955 assert(offset<4096);
1956 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1957 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1958}
1959
1960static unused void emit_movsbl(int addr, int rt)
1961{
1962 u_int offset = addr-(u_int)&dynarec_local;
1963 assert(offset<256);
1964 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1965 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1966}
1967
1968static unused void emit_movswl(int addr, int rt)
1969{
1970 u_int offset = addr-(u_int)&dynarec_local;
1971 assert(offset<256);
1972 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1973 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1974}
1975
1976static unused void emit_movzbl(int addr, int rt)
1977{
1978 u_int offset = addr-(u_int)&dynarec_local;
1979 assert(offset<4096);
1980 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1981 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1982}
1983
1984static unused void emit_movzwl(int addr, int rt)
1985{
1986 u_int offset = addr-(u_int)&dynarec_local;
1987 assert(offset<256);
1988 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1989 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1990}
1991
1992static void emit_writeword_indexed(int rt, int offset, int rs)
1993{
1994 assert(offset>-4096&&offset<4096);
1995 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1996 if(offset>=0) {
1997 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1998 }else{
1999 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
2000 }
2001}
2002
2003static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
2004{
2005 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2006 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2007}
2008
2009static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2010{
2011 if(map<0) emit_writeword_indexed(rt, addr, rs);
2012 else {
2013 assert(addr==0);
2014 emit_writeword_dualindexedx4(rt, rs, map);
2015 }
2016}
2017
2018static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2019{
2020 if(map<0) {
2021 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2022 emit_writeword_indexed(rl, addr+4, rs);
2023 }else{
2024 assert(rh>=0);
2025 if(temp!=rs) emit_addimm(map,1,temp);
2026 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2027 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2028 else {
2029 emit_addimm(rs,4,rs);
2030 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2031 }
2032 }
2033}
2034
2035static void emit_writehword_indexed(int rt, int offset, int rs)
2036{
2037 assert(offset>-256&&offset<256);
2038 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2039 if(offset>=0) {
2040 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2041 }else{
2042 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2043 }
2044}
2045
2046static void emit_writebyte_indexed(int rt, int offset, int rs)
2047{
2048 assert(offset>-4096&&offset<4096);
2049 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2050 if(offset>=0) {
2051 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2052 }else{
2053 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2054 }
2055}
2056
2057static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2058{
2059 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2060 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2061}
2062
2063static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2064{
2065 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2066 else {
2067 if(addr==0) {
2068 emit_writebyte_dualindexedx4(rt, rs, map);
2069 }else{
2070 emit_addimm(rs,addr,temp);
2071 emit_writebyte_dualindexedx4(rt, temp, map);
2072 }
2073 }
2074}
2075
2076static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2077{
2078 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2079 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2080}
2081
2082static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2083{
2084 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2085 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2086}
2087
2088static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2089{
2090 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2091 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2092}
2093
2094static void emit_writeword(int rt, int addr)
2095{
2096 u_int offset = addr-(u_int)&dynarec_local;
2097 assert(offset<4096);
2098 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2099 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2100}
2101
2102static unused void emit_writehword(int rt, int addr)
2103{
2104 u_int offset = addr-(u_int)&dynarec_local;
2105 assert(offset<256);
2106 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2107 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2108}
2109
2110static unused void emit_writebyte(int rt, int addr)
2111{
2112 u_int offset = addr-(u_int)&dynarec_local;
2113 assert(offset<4096);
2114 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
2115 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2116}
2117
2118static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2119{
2120 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2121 assert(rs1<16);
2122 assert(rs2<16);
2123 assert(hi<16);
2124 assert(lo<16);
2125 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2126}
2127
2128static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2129{
2130 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2131 assert(rs1<16);
2132 assert(rs2<16);
2133 assert(hi<16);
2134 assert(lo<16);
2135 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2136}
2137
2138static void emit_clz(int rs,int rt)
2139{
2140 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2141 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2142}
2143
2144static void emit_subcs(int rs1,int rs2,int rt)
2145{
2146 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2147 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2148}
2149
2150static void emit_shrcc_imm(int rs,u_int imm,int rt)
2151{
2152 assert(imm>0);
2153 assert(imm<32);
2154 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2155 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2156}
2157
2158static void emit_shrne_imm(int rs,u_int imm,int rt)
2159{
2160 assert(imm>0);
2161 assert(imm<32);
2162 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2163 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2164}
2165
2166static void emit_negmi(int rs, int rt)
2167{
2168 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2169 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2170}
2171
2172static void emit_negsmi(int rs, int rt)
2173{
2174 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2175 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2176}
2177
2178static void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2179{
2180 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2181 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2182}
2183
2184static void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2185{
2186 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2187 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2188}
2189
2190static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2191{
2192 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2193 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2194}
2195
2196static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2197{
2198 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2199 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2200}
2201
2202static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2203{
2204 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2205 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2206}
2207
2208static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2209{
2210 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2211 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2212}
2213
2214static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2215{
2216 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2217 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2218}
2219
2220static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2221{
2222 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2223 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2224}
2225
2226static void emit_teq(int rs, int rt)
2227{
2228 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2229 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2230}
2231
2232static void emit_rsbimm(int rs, int imm, int rt)
2233{
2234 u_int armval;
2235 genimm_checked(imm,&armval);
2236 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2237 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2238}
2239
2240// Load 2 immediates optimizing for small code size
2241static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2242{
2243 emit_movimm(imm1,rt1);
2244 u_int armval;
2245 if(genimm(imm2-imm1,&armval)) {
2246 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2247 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2248 }else if(genimm(imm1-imm2,&armval)) {
2249 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2250 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2251 }
2252 else emit_movimm(imm2,rt2);
2253}
2254
2255// Conditionally select one of two immediates, optimizing for small code size
2256// This will only be called if HAVE_CMOV_IMM is defined
2257static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2258{
2259 u_int armval;
2260 if(genimm(imm2-imm1,&armval)) {
2261 emit_movimm(imm1,rt);
2262 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2263 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2264 }else if(genimm(imm1-imm2,&armval)) {
2265 emit_movimm(imm1,rt);
2266 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2267 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2268 }
2269 else {
2270 #ifndef HAVE_ARMV7
2271 emit_movimm(imm1,rt);
2272 add_literal((int)out,imm2);
2273 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2274 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2275 #else
2276 emit_movw(imm1&0x0000FFFF,rt);
2277 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2278 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2279 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2280 }
2281 emit_movt(imm1&0xFFFF0000,rt);
2282 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2283 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2284 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2285 }
2286 #endif
2287 }
2288}
2289
2290// special case for checking invalid_code
2291static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2292{
2293 assert(imm<128&&imm>=0);
2294 assert(r>=0&&r<16);
2295 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2296 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2297 emit_cmpimm(HOST_TEMPREG,imm);
2298}
2299
2300static void emit_callne(int a)
2301{
2302 assem_debug("blne %x\n",a);
2303 u_int offset=genjmp(a);
2304 output_w32(0x1b000000|offset);
2305}
2306
2307// Used to preload hash table entries
2308static unused void emit_prefetchreg(int r)
2309{
2310 assem_debug("pld %s\n",regname[r]);
2311 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2312}
2313
2314// Special case for mini_ht
2315static void emit_ldreq_indexed(int rs, u_int offset, int rt)
2316{
2317 assert(offset<4096);
2318 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2319 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2320}
2321
2322static unused void emit_bicne_imm(int rs,int imm,int rt)
2323{
2324 u_int armval;
2325 genimm_checked(imm,&armval);
2326 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2327 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2328}
2329
2330static unused void emit_biccs_imm(int rs,int imm,int rt)
2331{
2332 u_int armval;
2333 genimm_checked(imm,&armval);
2334 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2335 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2336}
2337
2338static unused void emit_bicvc_imm(int rs,int imm,int rt)
2339{
2340 u_int armval;
2341 genimm_checked(imm,&armval);
2342 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2343 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2344}
2345
2346static unused void emit_bichi_imm(int rs,int imm,int rt)
2347{
2348 u_int armval;
2349 genimm_checked(imm,&armval);
2350 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2351 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2352}
2353
2354static unused void emit_orrvs_imm(int rs,int imm,int rt)
2355{
2356 u_int armval;
2357 genimm_checked(imm,&armval);
2358 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2359 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2360}
2361
2362static void emit_orrne_imm(int rs,int imm,int rt)
2363{
2364 u_int armval;
2365 genimm_checked(imm,&armval);
2366 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2367 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2368}
2369
2370static void emit_andne_imm(int rs,int imm,int rt)
2371{
2372 u_int armval;
2373 genimm_checked(imm,&armval);
2374 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2375 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2376}
2377
2378static unused void emit_addpl_imm(int rs,int imm,int rt)
2379{
2380 u_int armval;
2381 genimm_checked(imm,&armval);
2382 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2383 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2384}
2385
2386static void emit_jno_unlikely(int a)
2387{
2388 //emit_jno(a);
2389 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2390 output_w32(0x72800000|rd_rn_rm(15,15,0));
2391}
2392
2393static void save_regs_all(u_int reglist)
2394{
2395 int i;
2396 if(!reglist) return;
2397 assem_debug("stmia fp,{");
2398 for(i=0;i<16;i++)
2399 if(reglist&(1<<i))
2400 assem_debug("r%d,",i);
2401 assem_debug("}\n");
2402 output_w32(0xe88b0000|reglist);
2403}
2404
2405static void restore_regs_all(u_int reglist)
2406{
2407 int i;
2408 if(!reglist) return;
2409 assem_debug("ldmia fp,{");
2410 for(i=0;i<16;i++)
2411 if(reglist&(1<<i))
2412 assem_debug("r%d,",i);
2413 assem_debug("}\n");
2414 output_w32(0xe89b0000|reglist);
2415}
2416
2417// Save registers before function call
2418static void save_regs(u_int reglist)
2419{
2420 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
2421 save_regs_all(reglist);
2422}
2423
2424// Restore registers after function call
2425static void restore_regs(u_int reglist)
2426{
2427 reglist&=CALLER_SAVE_REGS;
2428 restore_regs_all(reglist);
2429}
2430
2431/* Stubs/epilogue */
2432
2433static void literal_pool(int n)
2434{
2435 if(!literalcount) return;
2436 if(n) {
2437 if((int)out-literals[0][0]<4096-n) return;
2438 }
2439 u_int *ptr;
2440 int i;
2441 for(i=0;i<literalcount;i++)
2442 {
2443 u_int l_addr=(u_int)out;
2444 int j;
2445 for(j=0;j<i;j++) {
2446 if(literals[j][1]==literals[i][1]) {
2447 //printf("dup %08x\n",literals[i][1]);
2448 l_addr=literals[j][0];
2449 break;
2450 }
2451 }
2452 ptr=(u_int *)literals[i][0];
2453 u_int offset=l_addr-(u_int)ptr-8;
2454 assert(offset<4096);
2455 assert(!(offset&3));
2456 *ptr|=offset;
2457 if(l_addr==(u_int)out) {
2458 literals[i][0]=l_addr; // remember for dupes
2459 output_w32(literals[i][1]);
2460 }
2461 }
2462 literalcount=0;
2463}
2464
2465static void literal_pool_jumpover(int n)
2466{
2467 if(!literalcount) return;
2468 if(n) {
2469 if((int)out-literals[0][0]<4096-n) return;
2470 }
2471 void *jaddr = out;
2472 emit_jmp(0);
2473 literal_pool(0);
2474 set_jump_target(jaddr, out);
2475}
2476
2477static void emit_extjump2(u_int addr, int target, int linker)
2478{
2479 u_char *ptr=(u_char *)addr;
2480 assert((ptr[3]&0x0e)==0xa);
2481 (void)ptr;
2482
2483 emit_loadlp(target,0);
2484 emit_loadlp(addr,1);
2485 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
2486 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2487//DEBUG >
2488#ifdef DEBUG_CYCLE_COUNT
2489 emit_readword((int)&last_count,ECX);
2490 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2491 emit_readword((int)&next_interupt,ECX);
2492 emit_writeword(HOST_CCREG,(int)&Count);
2493 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2494 emit_writeword(ECX,(int)&last_count);
2495#endif
2496//DEBUG <
2497 emit_jmp(linker);
2498}
2499
2500static void emit_extjump(int addr, int target)
2501{
2502 emit_extjump2(addr, target, (int)dyna_linker);
2503}
2504
2505static void emit_extjump_ds(int addr, int target)
2506{
2507 emit_extjump2(addr, target, (int)dyna_linker_ds);
2508}
2509
2510// put rt_val into rt, potentially making use of rs with value rs_val
2511static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2512{
2513 u_int armval;
2514 int diff;
2515 if(genimm(rt_val,&armval)) {
2516 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2517 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2518 return;
2519 }
2520 if(genimm(~rt_val,&armval)) {
2521 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2522 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2523 return;
2524 }
2525 diff=rt_val-rs_val;
2526 if(genimm(diff,&armval)) {
2527 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2528 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2529 return;
2530 }else if(genimm(-diff,&armval)) {
2531 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2532 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2533 return;
2534 }
2535 emit_movimm(rt_val,rt);
2536}
2537
2538// return 1 if above function can do it's job cheaply
2539static int is_similar_value(u_int v1,u_int v2)
2540{
2541 u_int xs;
2542 int diff;
2543 if(v1==v2) return 1;
2544 diff=v2-v1;
2545 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
2546 ;
2547 if(xs<0x100) return 1;
2548 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2549 ;
2550 if(xs<0x100) return 1;
2551 return 0;
2552}
2553
2554// trashes r2
2555static void pass_args(int a0, int a1)
2556{
2557 if(a0==1&&a1==0) {
2558 // must swap
2559 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2560 }
2561 else if(a0!=0&&a1==0) {
2562 emit_mov(a1,1);
2563 if (a0>=0) emit_mov(a0,0);
2564 }
2565 else {
2566 if(a0>=0&&a0!=0) emit_mov(a0,0);
2567 if(a1>=0&&a1!=1) emit_mov(a1,1);
2568 }
2569}
2570
2571static void mov_loadtype_adj(int type,int rs,int rt)
2572{
2573 switch(type) {
2574 case LOADB_STUB: emit_signextend8(rs,rt); break;
2575 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2576 case LOADH_STUB: emit_signextend16(rs,rt); break;
2577 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2578 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2579 default: assert(0);
2580 }
2581}
2582
2583#include "pcsxmem.h"
2584#include "pcsxmem_inline.c"
2585
2586static void do_readstub(int n)
2587{
2588 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2589 literal_pool(256);
2590 set_jump_target(stubs[n][1], out);
2591 int type=stubs[n][0];
2592 int i=stubs[n][3];
2593 int rs=stubs[n][4];
2594 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2595 u_int reglist=stubs[n][7];
2596 signed char *i_regmap=i_regs->regmap;
2597 int rt;
2598 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
2599 rt=get_reg(i_regmap,FTEMP);
2600 }else{
2601 rt=get_reg(i_regmap,rt1[i]);
2602 }
2603 assert(rs>=0);
2604 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
2605 void *restore_jump = NULL;
2606 reglist|=(1<<rs);
2607 for(r=0;r<=12;r++) {
2608 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2609 temp=r; break;
2610 }
2611 }
2612 if(rt>=0&&rt1[i]!=0)
2613 reglist&=~(1<<rt);
2614 if(temp==-1) {
2615 save_regs(reglist);
2616 regs_saved=1;
2617 temp=(rs==0)?2:0;
2618 }
2619 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2620 temp2=1;
2621 emit_readword((int)&mem_rtab,temp);
2622 emit_shrimm(rs,12,temp2);
2623 emit_readword_dualindexedx4(temp,temp2,temp2);
2624 emit_lsls_imm(temp2,1,temp2);
2625 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2626 switch(type) {
2627 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2628 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2629 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2630 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2631 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2632 }
2633 }
2634 if(regs_saved) {
2635 restore_jump=out;
2636 emit_jcc(0); // jump to reg restore
2637 }
2638 else
2639 emit_jcc(stubs[n][2]); // return address
2640
2641 if(!regs_saved)
2642 save_regs(reglist);
2643 int handler=0;
2644 if(type==LOADB_STUB||type==LOADBU_STUB)
2645 handler=(int)jump_handler_read8;
2646 if(type==LOADH_STUB||type==LOADHU_STUB)
2647 handler=(int)jump_handler_read16;
2648 if(type==LOADW_STUB)
2649 handler=(int)jump_handler_read32;
2650 assert(handler!=0);
2651 pass_args(rs,temp2);
2652 int cc=get_reg(i_regmap,CCREG);
2653 if(cc<0)
2654 emit_loadreg(CCREG,2);
2655 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
2656 emit_call(handler);
2657 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2658 mov_loadtype_adj(type,0,rt);
2659 }
2660 if(restore_jump)
2661 set_jump_target(restore_jump, out);
2662 restore_regs(reglist);
2663 emit_jmp(stubs[n][2]); // return address
2664}
2665
2666// return memhandler, or get directly accessable address and return 0
2667static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2668{
2669 u_int l1,l2=0;
2670 l1=((u_int *)table)[addr>>12];
2671 if((l1&(1<<31))==0) {
2672 u_int v=l1<<1;
2673 *addr_host=v+addr;
2674 return 0;
2675 }
2676 else {
2677 l1<<=1;
2678 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2679 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
2680 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
2681 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2682 else
2683 l2=((u_int *)l1)[(addr&0xfff)/4];
2684 if((l2&(1<<31))==0) {
2685 u_int v=l2<<1;
2686 *addr_host=v+(addr&0xfff);
2687 return 0;
2688 }
2689 return l2<<1;
2690 }
2691}
2692
2693static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2694{
2695 int rs=get_reg(regmap,target);
2696 int rt=get_reg(regmap,target);
2697 if(rs<0) rs=get_reg(regmap,-1);
2698 assert(rs>=0);
2699 u_int handler,host_addr=0,is_dynamic,far_call=0;
2700 int cc=get_reg(regmap,CCREG);
2701 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2702 return;
2703 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2704 if (handler==0) {
2705 if(rt<0||rt1[i]==0)
2706 return;
2707 if(addr!=host_addr)
2708 emit_movimm_from(addr,rs,host_addr,rs);
2709 switch(type) {
2710 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2711 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2712 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2713 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2714 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2715 default: assert(0);
2716 }
2717 return;
2718 }
2719 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2720 if(is_dynamic) {
2721 if(type==LOADB_STUB||type==LOADBU_STUB)
2722 handler=(int)jump_handler_read8;
2723 if(type==LOADH_STUB||type==LOADHU_STUB)
2724 handler=(int)jump_handler_read16;
2725 if(type==LOADW_STUB)
2726 handler=(int)jump_handler_read32;
2727 }
2728
2729 // call a memhandler
2730 if(rt>=0&&rt1[i]!=0)
2731 reglist&=~(1<<rt);
2732 save_regs(reglist);
2733 if(target==0)
2734 emit_movimm(addr,0);
2735 else if(rs!=0)
2736 emit_mov(rs,0);
2737 int offset=(int)handler-(int)out-8;
2738 if(offset<-33554432||offset>=33554432) {
2739 // unreachable memhandler, a plugin func perhaps
2740 emit_movimm(handler,12);
2741 far_call=1;
2742 }
2743 if(cc<0)
2744 emit_loadreg(CCREG,2);
2745 if(is_dynamic) {
2746 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2747 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2748 }
2749 else {
2750 emit_readword((int)&last_count,3);
2751 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2752 emit_add(2,3,2);
2753 emit_writeword(2,(int)&Count);
2754 }
2755
2756 if(far_call)
2757 emit_callreg(12);
2758 else
2759 emit_call(handler);
2760
2761 if(rt>=0&&rt1[i]!=0) {
2762 switch(type) {
2763 case LOADB_STUB: emit_signextend8(0,rt); break;
2764 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2765 case LOADH_STUB: emit_signextend16(0,rt); break;
2766 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2767 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2768 default: assert(0);
2769 }
2770 }
2771 restore_regs(reglist);
2772}
2773
2774static void do_writestub(int n)
2775{
2776 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2777 literal_pool(256);
2778 set_jump_target(stubs[n][1], out);
2779 int type=stubs[n][0];
2780 int i=stubs[n][3];
2781 int rs=stubs[n][4];
2782 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2783 u_int reglist=stubs[n][7];
2784 signed char *i_regmap=i_regs->regmap;
2785 int rt,r;
2786 if(itype[i]==C1LS||itype[i]==C2LS) {
2787 rt=get_reg(i_regmap,r=FTEMP);
2788 }else{
2789 rt=get_reg(i_regmap,r=rs2[i]);
2790 }
2791 assert(rs>=0);
2792 assert(rt>=0);
2793 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,ra;
2794 void *restore_jump = NULL;
2795 int reglist2=reglist|(1<<rs)|(1<<rt);
2796 for(rtmp=0;rtmp<=12;rtmp++) {
2797 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2798 temp=rtmp; break;
2799 }
2800 }
2801 if(temp==-1) {
2802 save_regs(reglist);
2803 regs_saved=1;
2804 for(rtmp=0;rtmp<=3;rtmp++)
2805 if(rtmp!=rs&&rtmp!=rt)
2806 {temp=rtmp;break;}
2807 }
2808 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2809 temp2=3;
2810 emit_readword((int)&mem_wtab,temp);
2811 emit_shrimm(rs,12,temp2);
2812 emit_readword_dualindexedx4(temp,temp2,temp2);
2813 emit_lsls_imm(temp2,1,temp2);
2814 switch(type) {
2815 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2816 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2817 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2818 default: assert(0);
2819 }
2820 if(regs_saved) {
2821 restore_jump=out;
2822 emit_jcc(0); // jump to reg restore
2823 }
2824 else
2825 emit_jcc(stubs[n][2]); // return address (invcode check)
2826
2827 if(!regs_saved)
2828 save_regs(reglist);
2829 int handler=0;
2830 switch(type) {
2831 case STOREB_STUB: handler=(int)jump_handler_write8; break;
2832 case STOREH_STUB: handler=(int)jump_handler_write16; break;
2833 case STOREW_STUB: handler=(int)jump_handler_write32; break;
2834 }
2835 assert(handler!=0);
2836 pass_args(rs,rt);
2837 if(temp2!=3)
2838 emit_mov(temp2,3);
2839 int cc=get_reg(i_regmap,CCREG);
2840 if(cc<0)
2841 emit_loadreg(CCREG,2);
2842 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
2843 // returns new cycle_count
2844 emit_call(handler);
2845 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
2846 if(cc<0)
2847 emit_storereg(CCREG,2);
2848 if(restore_jump)
2849 set_jump_target(restore_jump, out);
2850 restore_regs(reglist);
2851 ra=stubs[n][2];
2852 emit_jmp(ra);
2853}
2854
2855static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2856{
2857 int rs=get_reg(regmap,-1);
2858 int rt=get_reg(regmap,target);
2859 assert(rs>=0);
2860 assert(rt>=0);
2861 u_int handler,host_addr=0;
2862 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
2863 if (handler==0) {
2864 if(addr!=host_addr)
2865 emit_movimm_from(addr,rs,host_addr,rs);
2866 switch(type) {
2867 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2868 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2869 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2870 default: assert(0);
2871 }
2872 return;
2873 }
2874
2875 // call a memhandler
2876 save_regs(reglist);
2877 pass_args(rs,rt);
2878 int cc=get_reg(regmap,CCREG);
2879 if(cc<0)
2880 emit_loadreg(CCREG,2);
2881 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2882 emit_movimm(handler,3);
2883 // returns new cycle_count
2884 emit_call((int)jump_handler_write_h);
2885 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
2886 if(cc<0)
2887 emit_storereg(CCREG,2);
2888 restore_regs(reglist);
2889}
2890
2891static void do_unalignedwritestub(int n)
2892{
2893 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2894 literal_pool(256);
2895 set_jump_target(stubs[n][1], out);
2896
2897 int i=stubs[n][3];
2898 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2899 int addr=stubs[n][5];
2900 u_int reglist=stubs[n][7];
2901 signed char *i_regmap=i_regs->regmap;
2902 int temp2=get_reg(i_regmap,FTEMP);
2903 int rt;
2904 rt=get_reg(i_regmap,rs2[i]);
2905 assert(rt>=0);
2906 assert(addr>=0);
2907 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2908 reglist|=(1<<addr);
2909 reglist&=~(1<<temp2);
2910
2911#if 1
2912 // don't bother with it and call write handler
2913 save_regs(reglist);
2914 pass_args(addr,rt);
2915 int cc=get_reg(i_regmap,CCREG);
2916 if(cc<0)
2917 emit_loadreg(CCREG,2);
2918 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
2919 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2920 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
2921 if(cc<0)
2922 emit_storereg(CCREG,2);
2923 restore_regs(reglist);
2924 emit_jmp(stubs[n][2]); // return address
2925#else
2926 emit_andimm(addr,0xfffffffc,temp2);
2927 emit_writeword(temp2,(int)&address);
2928
2929 save_regs(reglist);
2930 emit_shrimm(addr,16,1);
2931 int cc=get_reg(i_regmap,CCREG);
2932 if(cc<0) {
2933 emit_loadreg(CCREG,2);
2934 }
2935 emit_movimm((u_int)readmem,0);
2936 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2937 emit_call((int)&indirect_jump_indexed);
2938 restore_regs(reglist);
2939
2940 emit_readword((int)&readmem_dword,temp2);
2941 int temp=addr; //hmh
2942 emit_shlimm(addr,3,temp);
2943 emit_andimm(temp,24,temp);
2944#ifdef BIG_ENDIAN_MIPS
2945 if (opcode[i]==0x2e) // SWR
2946#else
2947 if (opcode[i]==0x2a) // SWL
2948#endif
2949 emit_xorimm(temp,24,temp);
2950 emit_movimm(-1,HOST_TEMPREG);
2951 if (opcode[i]==0x2a) { // SWL
2952 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2953 emit_orrshr(rt,temp,temp2);
2954 }else{
2955 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2956 emit_orrshl(rt,temp,temp2);
2957 }
2958 emit_readword((int)&address,addr);
2959 emit_writeword(temp2,(int)&word);
2960 //save_regs(reglist); // don't need to, no state changes
2961 emit_shrimm(addr,16,1);
2962 emit_movimm((u_int)writemem,0);
2963 //emit_call((int)&indirect_jump_indexed);
2964 emit_mov(15,14);
2965 emit_readword_dualindexedx4(0,1,15);
2966 emit_readword((int)&Count,HOST_TEMPREG);
2967 emit_readword((int)&next_interupt,2);
2968 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2969 emit_writeword(2,(int)&last_count);
2970 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2971 if(cc<0) {
2972 emit_storereg(CCREG,HOST_TEMPREG);
2973 }
2974 restore_regs(reglist);
2975 emit_jmp(stubs[n][2]); // return address
2976#endif
2977}
2978
2979static void do_invstub(int n)
2980{
2981 literal_pool(20);
2982 u_int reglist=stubs[n][3];
2983 set_jump_target(stubs[n][1], out);
2984 save_regs(reglist);
2985 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2986 emit_call((int)&invalidate_addr);
2987 restore_regs(reglist);
2988 emit_jmp(stubs[n][2]); // return address
2989}
2990
2991void *do_dirty_stub(int i)
2992{
2993 assem_debug("do_dirty_stub %x\n",start+i*4);
2994 u_int addr=(u_int)source;
2995 // Careful about the code output here, verify_dirty needs to parse it.
2996 #ifndef HAVE_ARMV7
2997 emit_loadlp(addr,1);
2998 emit_loadlp((int)copy,2);
2999 emit_loadlp(slen*4,3);
3000 #else
3001 emit_movw(addr&0x0000FFFF,1);
3002 emit_movw(((u_int)copy)&0x0000FFFF,2);
3003 emit_movt(addr&0xFFFF0000,1);
3004 emit_movt(((u_int)copy)&0xFFFF0000,2);
3005 emit_movw(slen*4,3);
3006 #endif
3007 emit_movimm(start+i*4,0);
3008 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3009 void *entry = out;
3010 load_regs_entry(i);
3011 if (entry == out)
3012 entry = instr_addr[i];
3013 emit_jmp(instr_addr[i]);
3014 return entry;
3015}
3016
3017static void do_dirty_stub_ds()
3018{
3019 // Careful about the code output here, verify_dirty needs to parse it.
3020 #ifndef HAVE_ARMV7
3021 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3022 emit_loadlp((int)copy,2);
3023 emit_loadlp(slen*4,3);
3024 #else
3025 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3026 emit_movw(((u_int)copy)&0x0000FFFF,2);
3027 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3028 emit_movt(((u_int)copy)&0xFFFF0000,2);
3029 emit_movw(slen*4,3);
3030 #endif
3031 emit_movimm(start+1,0);
3032 emit_call((int)&verify_code_ds);
3033}
3034
3035static void do_cop1stub(int n)
3036{
3037 literal_pool(256);
3038 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3039 set_jump_target(stubs[n][1], out);
3040 int i=stubs[n][3];
3041// int rs=stubs[n][4];
3042 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3043 int ds=stubs[n][6];
3044 if(!ds) {
3045 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3046 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3047 }
3048 //else {printf("fp exception in delay slot\n");}
3049 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3050 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3051 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3052 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3053 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3054}
3055
3056/* Special assem */
3057
3058static void shift_assemble_arm(int i,struct regstat *i_regs)
3059{
3060 if(rt1[i]) {
3061 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3062 {
3063 signed char s,t,shift;
3064 t=get_reg(i_regs->regmap,rt1[i]);
3065 s=get_reg(i_regs->regmap,rs1[i]);
3066 shift=get_reg(i_regs->regmap,rs2[i]);
3067 if(t>=0){
3068 if(rs1[i]==0)
3069 {
3070 emit_zeroreg(t);
3071 }
3072 else if(rs2[i]==0)
3073 {
3074 assert(s>=0);
3075 if(s!=t) emit_mov(s,t);
3076 }
3077 else
3078 {
3079 emit_andimm(shift,31,HOST_TEMPREG);
3080 if(opcode2[i]==4) // SLLV
3081 {
3082 emit_shl(s,HOST_TEMPREG,t);
3083 }
3084 if(opcode2[i]==6) // SRLV
3085 {
3086 emit_shr(s,HOST_TEMPREG,t);
3087 }
3088 if(opcode2[i]==7) // SRAV
3089 {
3090 emit_sar(s,HOST_TEMPREG,t);
3091 }
3092 }
3093 }
3094 } else { // DSLLV/DSRLV/DSRAV
3095 signed char sh,sl,th,tl,shift;
3096 th=get_reg(i_regs->regmap,rt1[i]|64);
3097 tl=get_reg(i_regs->regmap,rt1[i]);
3098 sh=get_reg(i_regs->regmap,rs1[i]|64);
3099 sl=get_reg(i_regs->regmap,rs1[i]);
3100 shift=get_reg(i_regs->regmap,rs2[i]);
3101 if(tl>=0){
3102 if(rs1[i]==0)
3103 {
3104 emit_zeroreg(tl);
3105 if(th>=0) emit_zeroreg(th);
3106 }
3107 else if(rs2[i]==0)
3108 {
3109 assert(sl>=0);
3110 if(sl!=tl) emit_mov(sl,tl);
3111 if(th>=0&&sh!=th) emit_mov(sh,th);
3112 }
3113 else
3114 {
3115 // FIXME: What if shift==tl ?
3116 assert(shift!=tl);
3117 int temp=get_reg(i_regs->regmap,-1);
3118 int real_th=th;
3119 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3120 assert(sl>=0);
3121 assert(sh>=0);
3122 emit_andimm(shift,31,HOST_TEMPREG);
3123 if(opcode2[i]==0x14) // DSLLV
3124 {
3125 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3126 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3127 emit_orrshr(sl,HOST_TEMPREG,th);
3128 emit_andimm(shift,31,HOST_TEMPREG);
3129 emit_testimm(shift,32);
3130 emit_shl(sl,HOST_TEMPREG,tl);
3131 if(th>=0) emit_cmovne_reg(tl,th);
3132 emit_cmovne_imm(0,tl);
3133 }
3134 if(opcode2[i]==0x16) // DSRLV
3135 {
3136 assert(th>=0);
3137 emit_shr(sl,HOST_TEMPREG,tl);
3138 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3139 emit_orrshl(sh,HOST_TEMPREG,tl);
3140 emit_andimm(shift,31,HOST_TEMPREG);
3141 emit_testimm(shift,32);
3142 emit_shr(sh,HOST_TEMPREG,th);
3143 emit_cmovne_reg(th,tl);
3144 if(real_th>=0) emit_cmovne_imm(0,th);
3145 }
3146 if(opcode2[i]==0x17) // DSRAV
3147 {
3148 assert(th>=0);
3149 emit_shr(sl,HOST_TEMPREG,tl);
3150 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3151 if(real_th>=0) {
3152 assert(temp>=0);
3153 emit_sarimm(th,31,temp);
3154 }
3155 emit_orrshl(sh,HOST_TEMPREG,tl);
3156 emit_andimm(shift,31,HOST_TEMPREG);
3157 emit_testimm(shift,32);
3158 emit_sar(sh,HOST_TEMPREG,th);
3159 emit_cmovne_reg(th,tl);
3160 if(real_th>=0) emit_cmovne_reg(temp,th);
3161 }
3162 }
3163 }
3164 }
3165 }
3166}
3167
3168static void speculate_mov(int rs,int rt)
3169{
3170 if(rt!=0) {
3171 smrv_strong_next|=1<<rt;
3172 smrv[rt]=smrv[rs];
3173 }
3174}
3175
3176static void speculate_mov_weak(int rs,int rt)
3177{
3178 if(rt!=0) {
3179 smrv_weak_next|=1<<rt;
3180 smrv[rt]=smrv[rs];
3181 }
3182}
3183
3184static void speculate_register_values(int i)
3185{
3186 if(i==0) {
3187 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3188 // gp,sp are likely to stay the same throughout the block
3189 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3190 smrv_weak_next=~smrv_strong_next;
3191 //printf(" llr %08x\n", smrv[4]);
3192 }
3193 smrv_strong=smrv_strong_next;
3194 smrv_weak=smrv_weak_next;
3195 switch(itype[i]) {
3196 case ALU:
3197 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3198 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3199 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3200 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3201 else {
3202 smrv_strong_next&=~(1<<rt1[i]);
3203 smrv_weak_next&=~(1<<rt1[i]);
3204 }
3205 break;
3206 case SHIFTIMM:
3207 smrv_strong_next&=~(1<<rt1[i]);
3208 smrv_weak_next&=~(1<<rt1[i]);
3209 // fallthrough
3210 case IMM16:
3211 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3212 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3213 if(hr>=0) {
3214 if(get_final_value(hr,i,&value))
3215 smrv[rt1[i]]=value;
3216 else smrv[rt1[i]]=constmap[i][hr];
3217 smrv_strong_next|=1<<rt1[i];
3218 }
3219 }
3220 else {
3221 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3222 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3223 }
3224 break;
3225 case LOAD:
3226 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3227 // special case for BIOS
3228 smrv[rt1[i]]=0xa0000000;
3229 smrv_strong_next|=1<<rt1[i];
3230 break;
3231 }
3232 // fallthrough
3233 case SHIFT:
3234 case LOADLR:
3235 case MOV:
3236 smrv_strong_next&=~(1<<rt1[i]);
3237 smrv_weak_next&=~(1<<rt1[i]);
3238 break;
3239 case COP0:
3240 case COP2:
3241 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3242 smrv_strong_next&=~(1<<rt1[i]);
3243 smrv_weak_next&=~(1<<rt1[i]);
3244 }
3245 break;
3246 case C2LS:
3247 if (opcode[i]==0x32) { // LWC2
3248 smrv_strong_next&=~(1<<rt1[i]);
3249 smrv_weak_next&=~(1<<rt1[i]);
3250 }
3251 break;
3252 }
3253#if 0
3254 int r=4;
3255 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3256 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3257#endif
3258}
3259
3260enum {
3261 MTYPE_8000 = 0,
3262 MTYPE_8020,
3263 MTYPE_0000,
3264 MTYPE_A000,
3265 MTYPE_1F80,
3266};
3267
3268static int get_ptr_mem_type(u_int a)
3269{
3270 if(a < 0x00200000) {
3271 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3272 // return wrong, must use memhandler for BIOS self-test to pass
3273 // 007 does similar stuff from a00 mirror, weird stuff
3274 return MTYPE_8000;
3275 return MTYPE_0000;
3276 }
3277 if(0x1f800000 <= a && a < 0x1f801000)
3278 return MTYPE_1F80;
3279 if(0x80200000 <= a && a < 0x80800000)
3280 return MTYPE_8020;
3281 if(0xa0000000 <= a && a < 0xa0200000)
3282 return MTYPE_A000;
3283 return MTYPE_8000;
3284}
3285
3286static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3287{
3288 int jaddr=0,type=0;
3289 int mr=rs1[i];
3290 if(((smrv_strong|smrv_weak)>>mr)&1) {
3291 type=get_ptr_mem_type(smrv[mr]);
3292 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3293 }
3294 else {
3295 // use the mirror we are running on
3296 type=get_ptr_mem_type(start);
3297 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3298 }
3299
3300 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3301 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3302 addr=*addr_reg_override=HOST_TEMPREG;
3303 type=0;
3304 }
3305 else if(type==MTYPE_0000) { // RAM 0 mirror
3306 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3307 addr=*addr_reg_override=HOST_TEMPREG;
3308 type=0;
3309 }
3310 else if(type==MTYPE_A000) { // RAM A mirror
3311 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3312 addr=*addr_reg_override=HOST_TEMPREG;
3313 type=0;
3314 }
3315 else if(type==MTYPE_1F80) { // scratchpad
3316 if (psxH == (void *)0x1f800000) {
3317 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3318 emit_cmpimm(HOST_TEMPREG,0x1000);
3319 jaddr=(int)out;
3320 emit_jc(0);
3321 }
3322 else {
3323 // do usual RAM check, jump will go to the right handler
3324 type=0;
3325 }
3326 }
3327
3328 if(type==0)
3329 {
3330 emit_cmpimm(addr,RAM_SIZE);
3331 jaddr=(int)out;
3332 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3333 // Hint to branch predictor that the branch is unlikely to be taken
3334 if(rs1[i]>=28)
3335 emit_jno_unlikely(0);
3336 else
3337 #endif
3338 emit_jno(0);
3339 if(ram_offset!=0) {
3340 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3341 addr=*addr_reg_override=HOST_TEMPREG;
3342 }
3343 }
3344
3345 return jaddr;
3346}
3347
3348#define shift_assemble shift_assemble_arm
3349
3350static void loadlr_assemble_arm(int i,struct regstat *i_regs)
3351{
3352 int s,th,tl,temp,temp2,addr,map=-1;
3353 int offset;
3354 int jaddr=0;
3355 int memtarget=0,c=0;
3356 int fastload_reg_override=0;
3357 u_int hr,reglist=0;
3358 th=get_reg(i_regs->regmap,rt1[i]|64);
3359 tl=get_reg(i_regs->regmap,rt1[i]);
3360 s=get_reg(i_regs->regmap,rs1[i]);
3361 temp=get_reg(i_regs->regmap,-1);
3362 temp2=get_reg(i_regs->regmap,FTEMP);
3363 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3364 assert(addr<0);
3365 offset=imm[i];
3366 for(hr=0;hr<HOST_REGS;hr++) {
3367 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3368 }
3369 reglist|=1<<temp;
3370 if(offset||s<0||c) addr=temp2;
3371 else addr=s;
3372 if(s>=0) {
3373 c=(i_regs->wasconst>>s)&1;
3374 if(c) {
3375 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3376 }
3377 }
3378 if(!c) {
3379 #ifdef RAM_OFFSET
3380 map=get_reg(i_regs->regmap,ROREG);
3381 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3382 #endif
3383 emit_shlimm(addr,3,temp);
3384 if (opcode[i]==0x22||opcode[i]==0x26) {
3385 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3386 }else{
3387 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3388 }
3389 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3390 }
3391 else {
3392 if(ram_offset&&memtarget) {
3393 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
3394 fastload_reg_override=HOST_TEMPREG;
3395 }
3396 if (opcode[i]==0x22||opcode[i]==0x26) {
3397 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3398 }else{
3399 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3400 }
3401 }
3402 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3403 if(!c||memtarget) {
3404 int a=temp2;
3405 if(fastload_reg_override) a=fastload_reg_override;
3406 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3407 emit_readword_indexed_tlb(0,a,map,temp2);
3408 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3409 }
3410 else
3411 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3412 if(rt1[i]) {
3413 assert(tl>=0);
3414 emit_andimm(temp,24,temp);
3415#ifdef BIG_ENDIAN_MIPS
3416 if (opcode[i]==0x26) // LWR
3417#else
3418 if (opcode[i]==0x22) // LWL
3419#endif
3420 emit_xorimm(temp,24,temp);
3421 emit_movimm(-1,HOST_TEMPREG);
3422 if (opcode[i]==0x26) {
3423 emit_shr(temp2,temp,temp2);
3424 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3425 }else{
3426 emit_shl(temp2,temp,temp2);
3427 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3428 }
3429 emit_or(temp2,tl,tl);
3430 }
3431 //emit_storereg(rt1[i],tl); // DEBUG
3432 }
3433 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3434 // FIXME: little endian, fastload_reg_override
3435 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3436 if(!c||memtarget) {
3437 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3438 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3439 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3440 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3441 }
3442 else
3443 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3444 if(rt1[i]) {
3445 assert(th>=0);
3446 assert(tl>=0);
3447 emit_testimm(temp,32);
3448 emit_andimm(temp,24,temp);
3449 if (opcode[i]==0x1A) { // LDL
3450 emit_rsbimm(temp,32,HOST_TEMPREG);
3451 emit_shl(temp2h,temp,temp2h);
3452 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3453 emit_movimm(-1,HOST_TEMPREG);
3454 emit_shl(temp2,temp,temp2);
3455 emit_cmove_reg(temp2h,th);
3456 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3457 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3458 emit_orreq(temp2,tl,tl);
3459 emit_orrne(temp2,th,th);
3460 }
3461 if (opcode[i]==0x1B) { // LDR
3462 emit_xorimm(temp,24,temp);
3463 emit_rsbimm(temp,32,HOST_TEMPREG);
3464 emit_shr(temp2,temp,temp2);
3465 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3466 emit_movimm(-1,HOST_TEMPREG);
3467 emit_shr(temp2h,temp,temp2h);
3468 emit_cmovne_reg(temp2,tl);
3469 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3470 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3471 emit_orrne(temp2h,th,th);
3472 emit_orreq(temp2h,tl,tl);
3473 }
3474 }
3475 }
3476}
3477#define loadlr_assemble loadlr_assemble_arm
3478
3479static void cop0_assemble(int i,struct regstat *i_regs)
3480{
3481 if(opcode2[i]==0) // MFC0
3482 {
3483 signed char t=get_reg(i_regs->regmap,rt1[i]);
3484 char copr=(source[i]>>11)&0x1f;
3485 //assert(t>=0); // Why does this happen? OOT is weird
3486 if(t>=0&&rt1[i]!=0) {
3487 emit_readword((int)&reg_cop0+copr*4,t);
3488 }
3489 }
3490 else if(opcode2[i]==4) // MTC0
3491 {
3492 signed char s=get_reg(i_regs->regmap,rs1[i]);
3493 char copr=(source[i]>>11)&0x1f;
3494 assert(s>=0);
3495 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3496 if(copr==9||copr==11||copr==12||copr==13) {
3497 emit_readword((int)&last_count,HOST_TEMPREG);
3498 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3499 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3500 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
3501 emit_writeword(HOST_CCREG,(int)&Count);
3502 }
3503 // What a mess. The status register (12) can enable interrupts,
3504 // so needs a special case to handle a pending interrupt.
3505 // The interrupt must be taken immediately, because a subsequent
3506 // instruction might disable interrupts again.
3507 if(copr==12||copr==13) {
3508 if (is_delayslot) {
3509 // burn cycles to cause cc_interrupt, which will
3510 // reschedule next_interupt. Relies on CCREG from above.
3511 assem_debug("MTC0 DS %d\n", copr);
3512 emit_writeword(HOST_CCREG,(int)&last_count);
3513 emit_movimm(0,HOST_CCREG);
3514 emit_storereg(CCREG,HOST_CCREG);
3515 emit_loadreg(rs1[i],1);
3516 emit_movimm(copr,0);
3517 emit_call((int)pcsx_mtc0_ds);
3518 emit_loadreg(rs1[i],s);
3519 return;
3520 }
3521 emit_movimm(start+i*4+4,HOST_TEMPREG);
3522 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
3523 emit_movimm(0,HOST_TEMPREG);
3524 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
3525 }
3526 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3527 //else
3528 if(s==HOST_CCREG)
3529 emit_loadreg(rs1[i],1);
3530 else if(s!=1)
3531 emit_mov(s,1);
3532 emit_movimm(copr,0);
3533 emit_call((int)pcsx_mtc0);
3534 if(copr==9||copr==11||copr==12||copr==13) {
3535 emit_readword((int)&Count,HOST_CCREG);
3536 emit_readword((int)&next_interupt,HOST_TEMPREG);
3537 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
3538 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3539 emit_writeword(HOST_TEMPREG,(int)&last_count);
3540 emit_storereg(CCREG,HOST_CCREG);
3541 }
3542 if(copr==12||copr==13) {
3543 assert(!is_delayslot);
3544 emit_readword((int)&pending_exception,14);
3545 emit_test(14,14);
3546 emit_jne((int)&do_interrupt);
3547 }
3548 emit_loadreg(rs1[i],s);
3549 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3550 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3551 cop1_usable=0;
3552 }
3553 else
3554 {
3555 assert(opcode2[i]==0x10);
3556 if((source[i]&0x3f)==0x10) // RFE
3557 {
3558 emit_readword((int)&Status,0);
3559 emit_andimm(0,0x3c,1);
3560 emit_andimm(0,~0xf,0);
3561 emit_orrshr_imm(1,2,0);
3562 emit_writeword(0,(int)&Status);
3563 }
3564 }
3565}
3566
3567static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3568{
3569 switch (copr) {
3570 case 1:
3571 case 3:
3572 case 5:
3573 case 8:
3574 case 9:
3575 case 10:
3576 case 11:
3577 emit_readword((int)&reg_cop2d[copr],tl);
3578 emit_signextend16(tl,tl);
3579 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3580 break;
3581 case 7:
3582 case 16:
3583 case 17:
3584 case 18:
3585 case 19:
3586 emit_readword((int)&reg_cop2d[copr],tl);
3587 emit_andimm(tl,0xffff,tl);
3588 emit_writeword(tl,(int)&reg_cop2d[copr]);
3589 break;
3590 case 15:
3591 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3592 emit_writeword(tl,(int)&reg_cop2d[copr]);
3593 break;
3594 case 28:
3595 case 29:
3596 emit_readword((int)&reg_cop2d[9],temp);
3597 emit_testimm(temp,0x8000); // do we need this?
3598 emit_andimm(temp,0xf80,temp);
3599 emit_andne_imm(temp,0,temp);
3600 emit_shrimm(temp,7,tl);
3601 emit_readword((int)&reg_cop2d[10],temp);
3602 emit_testimm(temp,0x8000);
3603 emit_andimm(temp,0xf80,temp);
3604 emit_andne_imm(temp,0,temp);
3605 emit_orrshr_imm(temp,2,tl);
3606 emit_readword((int)&reg_cop2d[11],temp);
3607 emit_testimm(temp,0x8000);
3608 emit_andimm(temp,0xf80,temp);
3609 emit_andne_imm(temp,0,temp);
3610 emit_orrshl_imm(temp,3,tl);
3611 emit_writeword(tl,(int)&reg_cop2d[copr]);
3612 break;
3613 default:
3614 emit_readword((int)&reg_cop2d[copr],tl);
3615 break;
3616 }
3617}
3618
3619static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3620{
3621 switch (copr) {
3622 case 15:
3623 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3624 emit_writeword(sl,(int)&reg_cop2d[copr]);
3625 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3626 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3627 emit_writeword(sl,(int)&reg_cop2d[14]);
3628 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3629 break;
3630 case 28:
3631 emit_andimm(sl,0x001f,temp);
3632 emit_shlimm(temp,7,temp);
3633 emit_writeword(temp,(int)&reg_cop2d[9]);
3634 emit_andimm(sl,0x03e0,temp);
3635 emit_shlimm(temp,2,temp);
3636 emit_writeword(temp,(int)&reg_cop2d[10]);
3637 emit_andimm(sl,0x7c00,temp);
3638 emit_shrimm(temp,3,temp);
3639 emit_writeword(temp,(int)&reg_cop2d[11]);
3640 emit_writeword(sl,(int)&reg_cop2d[28]);
3641 break;
3642 case 30:
3643 emit_movs(sl,temp);
3644 emit_mvnmi(temp,temp);
3645#ifdef HAVE_ARMV5
3646 emit_clz(temp,temp);
3647#else
3648 emit_movs(temp,HOST_TEMPREG);
3649 emit_movimm(0,temp);
3650 emit_jeq((int)out+4*4);
3651 emit_addpl_imm(temp,1,temp);
3652 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3653 emit_jns((int)out-2*4);
3654#endif
3655 emit_writeword(sl,(int)&reg_cop2d[30]);
3656 emit_writeword(temp,(int)&reg_cop2d[31]);
3657 break;
3658 case 31:
3659 break;
3660 default:
3661 emit_writeword(sl,(int)&reg_cop2d[copr]);
3662 break;
3663 }
3664}
3665
3666static void cop2_assemble(int i,struct regstat *i_regs)
3667{
3668 u_int copr=(source[i]>>11)&0x1f;
3669 signed char temp=get_reg(i_regs->regmap,-1);
3670 if (opcode2[i]==0) { // MFC2
3671 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3672 if(tl>=0&&rt1[i]!=0)
3673 cop2_get_dreg(copr,tl,temp);
3674 }
3675 else if (opcode2[i]==4) { // MTC2
3676 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3677 cop2_put_dreg(copr,sl,temp);
3678 }
3679 else if (opcode2[i]==2) // CFC2
3680 {
3681 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3682 if(tl>=0&&rt1[i]!=0)
3683 emit_readword((int)&reg_cop2c[copr],tl);
3684 }
3685 else if (opcode2[i]==6) // CTC2
3686 {
3687 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3688 switch(copr) {
3689 case 4:
3690 case 12:
3691 case 20:
3692 case 26:
3693 case 27:
3694 case 29:
3695 case 30:
3696 emit_signextend16(sl,temp);
3697 break;
3698 case 31:
3699 //value = value & 0x7ffff000;
3700 //if (value & 0x7f87e000) value |= 0x80000000;
3701 emit_shrimm(sl,12,temp);
3702 emit_shlimm(temp,12,temp);
3703 emit_testimm(temp,0x7f000000);
3704 emit_testeqimm(temp,0x00870000);
3705 emit_testeqimm(temp,0x0000e000);
3706 emit_orrne_imm(temp,0x80000000,temp);
3707 break;
3708 default:
3709 temp=sl;
3710 break;
3711 }
3712 emit_writeword(temp,(int)&reg_cop2c[copr]);
3713 assert(sl>=0);
3714 }
3715}
3716
3717static void c2op_prologue(u_int op,u_int reglist)
3718{
3719 save_regs_all(reglist);
3720#ifdef PCNT
3721 emit_movimm(op,0);
3722 emit_call((int)pcnt_gte_start);
3723#endif
3724 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3725}
3726
3727static void c2op_epilogue(u_int op,u_int reglist)
3728{
3729#ifdef PCNT
3730 emit_movimm(op,0);
3731 emit_call((int)pcnt_gte_end);
3732#endif
3733 restore_regs_all(reglist);
3734}
3735
3736static void c2op_call_MACtoIR(int lm,int need_flags)
3737{
3738 if(need_flags)
3739 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
3740 else
3741 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
3742}
3743
3744static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3745{
3746 emit_call((int)func);
3747 // func is C code and trashes r0
3748 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3749 if(need_flags||need_ir)
3750 c2op_call_MACtoIR(lm,need_flags);
3751 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
3752}
3753
3754static void c2op_assemble(int i,struct regstat *i_regs)
3755{
3756 u_int c2op=source[i]&0x3f;
3757 u_int hr,reglist_full=0,reglist;
3758 int need_flags,need_ir;
3759 for(hr=0;hr<HOST_REGS;hr++) {
3760 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
3761 }
3762 reglist=reglist_full&CALLER_SAVE_REGS;
3763
3764 if (gte_handlers[c2op]!=NULL) {
3765 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
3766 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
3767 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3768 source[i],gte_unneeded[i+1],need_flags,need_ir);
3769 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3770 need_flags=0;
3771 int shift = (source[i] >> 19) & 1;
3772 int lm = (source[i] >> 10) & 1;
3773 switch(c2op) {
3774#ifndef DRC_DBG
3775 case GTE_MVMVA: {
3776#ifdef HAVE_ARMV5
3777 int v = (source[i] >> 15) & 3;
3778 int cv = (source[i] >> 13) & 3;
3779 int mx = (source[i] >> 17) & 3;
3780 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
3781 c2op_prologue(c2op,reglist);
3782 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3783 if(v<3)
3784 emit_ldrd(v*8,0,4);
3785 else {
3786 emit_movzwl_indexed(9*4,0,4); // gteIR
3787 emit_movzwl_indexed(10*4,0,6);
3788 emit_movzwl_indexed(11*4,0,5);
3789 emit_orrshl_imm(6,16,4);
3790 }
3791 if(mx<3)
3792 emit_addimm(0,32*4+mx*8*4,6);
3793 else
3794 emit_readword((int)&zeromem_ptr,6);
3795 if(cv<3)
3796 emit_addimm(0,32*4+(cv*8+5)*4,7);
3797 else
3798 emit_readword((int)&zeromem_ptr,7);
3799#ifdef __ARM_NEON__
3800 emit_movimm(source[i],1); // opcode
3801 emit_call((int)gteMVMVA_part_neon);
3802 if(need_flags) {
3803 emit_movimm(lm,1);
3804 emit_call((int)gteMACtoIR_flags_neon);
3805 }
3806#else
3807 if(cv==3&&shift)
3808 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3809 else {
3810 emit_movimm(shift,1);
3811 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3812 }
3813 if(need_flags||need_ir)
3814 c2op_call_MACtoIR(lm,need_flags);
3815#endif
3816#else /* if not HAVE_ARMV5 */
3817 c2op_prologue(c2op,reglist);
3818 emit_movimm(source[i],1); // opcode
3819 emit_writeword(1,(int)&psxRegs.code);
3820 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3821#endif
3822 break;
3823 }
3824 case GTE_OP:
3825 c2op_prologue(c2op,reglist);
3826 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
3827 if(need_flags||need_ir) {
3828 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3829 c2op_call_MACtoIR(lm,need_flags);
3830 }
3831 break;
3832 case GTE_DPCS:
3833 c2op_prologue(c2op,reglist);
3834 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3835 break;
3836 case GTE_INTPL:
3837 c2op_prologue(c2op,reglist);
3838 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3839 break;
3840 case GTE_SQR:
3841 c2op_prologue(c2op,reglist);
3842 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
3843 if(need_flags||need_ir) {
3844 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3845 c2op_call_MACtoIR(lm,need_flags);
3846 }
3847 break;
3848 case GTE_DCPL:
3849 c2op_prologue(c2op,reglist);
3850 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3851 break;
3852 case GTE_GPF:
3853 c2op_prologue(c2op,reglist);
3854 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3855 break;
3856 case GTE_GPL:
3857 c2op_prologue(c2op,reglist);
3858 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3859 break;
3860#endif
3861 default:
3862 c2op_prologue(c2op,reglist);
3863#ifdef DRC_DBG
3864 emit_movimm(source[i],1); // opcode
3865 emit_writeword(1,(int)&psxRegs.code);
3866#endif
3867 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3868 break;
3869 }
3870 c2op_epilogue(c2op,reglist);
3871 }
3872}
3873
3874static void cop1_unusable(int i,struct regstat *i_regs)
3875{
3876 // XXX: should just just do the exception instead
3877 if(!cop1_usable) {
3878 int jaddr=(int)out;
3879 emit_jmp(0);
3880 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3881 cop1_usable=1;
3882 }
3883}
3884
3885static void cop1_assemble(int i,struct regstat *i_regs)
3886{
3887 cop1_unusable(i, i_regs);
3888}
3889
3890static void fconv_assemble_arm(int i,struct regstat *i_regs)
3891{
3892 cop1_unusable(i, i_regs);
3893}
3894#define fconv_assemble fconv_assemble_arm
3895
3896static void fcomp_assemble(int i,struct regstat *i_regs)
3897{
3898 cop1_unusable(i, i_regs);
3899}
3900
3901static void float_assemble(int i,struct regstat *i_regs)
3902{
3903 cop1_unusable(i, i_regs);
3904}
3905
3906static void multdiv_assemble_arm(int i,struct regstat *i_regs)
3907{
3908 // case 0x18: MULT
3909 // case 0x19: MULTU
3910 // case 0x1A: DIV
3911 // case 0x1B: DIVU
3912 // case 0x1C: DMULT
3913 // case 0x1D: DMULTU
3914 // case 0x1E: DDIV
3915 // case 0x1F: DDIVU
3916 if(rs1[i]&&rs2[i])
3917 {
3918 if((opcode2[i]&4)==0) // 32-bit
3919 {
3920 if(opcode2[i]==0x18) // MULT
3921 {
3922 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3923 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3924 signed char hi=get_reg(i_regs->regmap,HIREG);
3925 signed char lo=get_reg(i_regs->regmap,LOREG);
3926 assert(m1>=0);
3927 assert(m2>=0);
3928 assert(hi>=0);
3929 assert(lo>=0);
3930 emit_smull(m1,m2,hi,lo);
3931 }
3932 if(opcode2[i]==0x19) // MULTU
3933 {
3934 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3935 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3936 signed char hi=get_reg(i_regs->regmap,HIREG);
3937 signed char lo=get_reg(i_regs->regmap,LOREG);
3938 assert(m1>=0);
3939 assert(m2>=0);
3940 assert(hi>=0);
3941 assert(lo>=0);
3942 emit_umull(m1,m2,hi,lo);
3943 }
3944 if(opcode2[i]==0x1A) // DIV
3945 {
3946 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3947 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3948 assert(d1>=0);
3949 assert(d2>=0);
3950 signed char quotient=get_reg(i_regs->regmap,LOREG);
3951 signed char remainder=get_reg(i_regs->regmap,HIREG);
3952 assert(quotient>=0);
3953 assert(remainder>=0);
3954 emit_movs(d1,remainder);
3955 emit_movimm(0xffffffff,quotient);
3956 emit_negmi(quotient,quotient); // .. quotient and ..
3957 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
3958 emit_movs(d2,HOST_TEMPREG);
3959 emit_jeq((int)out+52); // Division by zero
3960 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
3961#ifdef HAVE_ARMV5
3962 emit_clz(HOST_TEMPREG,quotient);
3963 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
3964#else
3965 emit_movimm(0,quotient);
3966 emit_addpl_imm(quotient,1,quotient);
3967 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3968 emit_jns((int)out-2*4);
3969#endif
3970 emit_orimm(quotient,1<<31,quotient);
3971 emit_shr(quotient,quotient,quotient);
3972 emit_cmp(remainder,HOST_TEMPREG);
3973 emit_subcs(remainder,HOST_TEMPREG,remainder);
3974 emit_adcs(quotient,quotient,quotient);
3975 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
3976 emit_jcc((int)out-16); // -4
3977 emit_teq(d1,d2);
3978 emit_negmi(quotient,quotient);
3979 emit_test(d1,d1);
3980 emit_negmi(remainder,remainder);
3981 }
3982 if(opcode2[i]==0x1B) // DIVU
3983 {
3984 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3985 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3986 assert(d1>=0);
3987 assert(d2>=0);
3988 signed char quotient=get_reg(i_regs->regmap,LOREG);
3989 signed char remainder=get_reg(i_regs->regmap,HIREG);
3990 assert(quotient>=0);
3991 assert(remainder>=0);
3992 emit_mov(d1,remainder);
3993 emit_movimm(0xffffffff,quotient); // div0 case
3994 emit_test(d2,d2);
3995 emit_jeq((int)out+40); // Division by zero
3996#ifdef HAVE_ARMV5
3997 emit_clz(d2,HOST_TEMPREG);
3998 emit_movimm(1<<31,quotient);
3999 emit_shl(d2,HOST_TEMPREG,d2);
4000#else
4001 emit_movimm(0,HOST_TEMPREG);
4002 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
4003 emit_lslpls_imm(d2,1,d2);
4004 emit_jns((int)out-2*4);
4005 emit_movimm(1<<31,quotient);
4006#endif
4007 emit_shr(quotient,HOST_TEMPREG,quotient);
4008 emit_cmp(remainder,d2);
4009 emit_subcs(remainder,d2,remainder);
4010 emit_adcs(quotient,quotient,quotient);
4011 emit_shrcc_imm(d2,1,d2);
4012 emit_jcc((int)out-16); // -4
4013 }
4014 }
4015 else // 64-bit
4016 assert(0);
4017 }
4018 else
4019 {
4020 // Multiply by zero is zero.
4021 // MIPS does not have a divide by zero exception.
4022 // The result is undefined, we return zero.
4023 signed char hr=get_reg(i_regs->regmap,HIREG);
4024 signed char lr=get_reg(i_regs->regmap,LOREG);
4025 if(hr>=0) emit_zeroreg(hr);
4026 if(lr>=0) emit_zeroreg(lr);
4027 }
4028}
4029#define multdiv_assemble multdiv_assemble_arm
4030
4031static void do_preload_rhash(int r) {
4032 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4033 // register. On ARM the hash can be done with a single instruction (below)
4034}
4035
4036static void do_preload_rhtbl(int ht) {
4037 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4038}
4039
4040static void do_rhash(int rs,int rh) {
4041 emit_andimm(rs,0xf8,rh);
4042}
4043
4044static void do_miniht_load(int ht,int rh) {
4045 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4046 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4047}
4048
4049static void do_miniht_jump(int rs,int rh,int ht) {
4050 emit_cmp(rh,rs);
4051 emit_ldreq_indexed(ht,4,15);
4052 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4053 emit_mov(rs,7);
4054 emit_jmp(jump_vaddr_reg[7]);
4055 #else
4056 emit_jmp(jump_vaddr_reg[rs]);
4057 #endif
4058}
4059
4060static void do_miniht_insert(u_int return_address,int rt,int temp) {
4061 #ifndef HAVE_ARMV7
4062 emit_movimm(return_address,rt); // PC into link register
4063 add_to_linker((int)out,return_address,1);
4064 emit_pcreladdr(temp);
4065 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4066 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4067 #else
4068 emit_movw(return_address&0x0000FFFF,rt);
4069 add_to_linker((int)out,return_address,1);
4070 emit_pcreladdr(temp);
4071 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4072 emit_movt(return_address&0xFFFF0000,rt);
4073 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4074 #endif
4075}
4076
4077static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4078{
4079 //if(dirty_pre==dirty) return;
4080 int hr,reg;
4081 for(hr=0;hr<HOST_REGS;hr++) {
4082 if(hr!=EXCLUDE_REG) {
4083 reg=pre[hr];
4084 if(((~u)>>(reg&63))&1) {
4085 if(reg>0) {
4086 if(((dirty_pre&~dirty)>>hr)&1) {
4087 if(reg>0&&reg<34) {
4088 emit_storereg(reg,hr);
4089 if( ((is32_pre&~uu)>>reg)&1 ) {
4090 emit_sarimm(hr,31,HOST_TEMPREG);
4091 emit_storereg(reg|64,HOST_TEMPREG);
4092 }
4093 }
4094 else if(reg>=64) {
4095 emit_storereg(reg,hr);
4096 }
4097 }
4098 }
4099 }
4100 }
4101 }
4102}
4103
4104
4105/* using strd could possibly help but you'd have to allocate registers in pairs
4106static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4107{
4108 int hr;
4109 int wrote=-1;
4110 for(hr=HOST_REGS-1;hr>=0;hr--) {
4111 if(hr!=EXCLUDE_REG) {
4112 if(pre[hr]!=entry[hr]) {
4113 if(pre[hr]>=0) {
4114 if((dirty>>hr)&1) {
4115 if(get_reg(entry,pre[hr])<0) {
4116 if(pre[hr]<64) {
4117 if(!((u>>pre[hr])&1)) {
4118 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4119 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4120 emit_sarimm(hr,31,hr+1);
4121 emit_strdreg(pre[hr],hr);
4122 }
4123 else
4124 emit_storereg(pre[hr],hr);
4125 }else{
4126 emit_storereg(pre[hr],hr);
4127 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4128 emit_sarimm(hr,31,hr);
4129 emit_storereg(pre[hr]|64,hr);
4130 }
4131 }
4132 }
4133 }else{
4134 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4135 emit_storereg(pre[hr],hr);
4136 }
4137 }
4138 wrote=hr;
4139 }
4140 }
4141 }
4142 }
4143 }
4144 }
4145 for(hr=0;hr<HOST_REGS;hr++) {
4146 if(hr!=EXCLUDE_REG) {
4147 if(pre[hr]!=entry[hr]) {
4148 if(pre[hr]>=0) {
4149 int nr;
4150 if((nr=get_reg(entry,pre[hr]))>=0) {
4151 emit_mov(hr,nr);
4152 }
4153 }
4154 }
4155 }
4156 }
4157}
4158#define wb_invalidate wb_invalidate_arm
4159*/
4160
4161static void mark_clear_cache(void *target)
4162{
4163 u_long offset = (char *)target - (char *)BASE_ADDR;
4164 u_int mask = 1u << ((offset >> 12) & 31);
4165 if (!(needs_clear_cache[offset >> 17] & mask)) {
4166 char *start = (char *)((u_long)target & ~4095ul);
4167 start_tcache_write(start, start + 4096);
4168 needs_clear_cache[offset >> 17] |= mask;
4169 }
4170}
4171
4172// Clearing the cache is rather slow on ARM Linux, so mark the areas
4173// that need to be cleared, and then only clear these areas once.
4174static void do_clear_cache()
4175{
4176 int i,j;
4177 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4178 {
4179 u_int bitmap=needs_clear_cache[i];
4180 if(bitmap) {
4181 u_int start,end;
4182 for(j=0;j<32;j++)
4183 {
4184 if(bitmap&(1<<j)) {
4185 start=(u_int)BASE_ADDR+i*131072+j*4096;
4186 end=start+4095;
4187 j++;
4188 while(j<32) {
4189 if(bitmap&(1<<j)) {
4190 end+=4096;
4191 j++;
4192 }else{
4193 end_tcache_write((void *)start,(void *)end);
4194 break;
4195 }
4196 }
4197 }
4198 }
4199 needs_clear_cache[i]=0;
4200 }
4201 }
4202}
4203
4204// CPU-architecture-specific initialization
4205static void arch_init() {
4206}
4207
4208// vim:shiftwidth=2:expandtab