drc: convert 'stubs' to a struct with proper types (rework part 2)
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
29#include "arm_features.h"
30
31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33char *translation_cache;
34#else
35char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
44#define unused __attribute__((unused))
45
46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
52extern int cycle_count;
53extern int last_count;
54extern int pcaddr;
55extern int pending_exception;
56extern int branch_target;
57extern uint64_t readmem_dword;
58extern void *dynarec_local;
59extern u_int mini_ht[32][2];
60
61void indirect_jump_indexed();
62void indirect_jump();
63void do_interrupt();
64void jump_vaddr_r0();
65void jump_vaddr_r1();
66void jump_vaddr_r2();
67void jump_vaddr_r3();
68void jump_vaddr_r4();
69void jump_vaddr_r5();
70void jump_vaddr_r6();
71void jump_vaddr_r7();
72void jump_vaddr_r8();
73void jump_vaddr_r9();
74void jump_vaddr_r10();
75void jump_vaddr_r12();
76
77void * const jump_vaddr_reg[16] = {
78 jump_vaddr_r0,
79 jump_vaddr_r1,
80 jump_vaddr_r2,
81 jump_vaddr_r3,
82 jump_vaddr_r4,
83 jump_vaddr_r5,
84 jump_vaddr_r6,
85 jump_vaddr_r7,
86 jump_vaddr_r8,
87 jump_vaddr_r9,
88 jump_vaddr_r10,
89 0,
90 jump_vaddr_r12,
91 0,
92 0,
93 0
94};
95
96void invalidate_addr_r0();
97void invalidate_addr_r1();
98void invalidate_addr_r2();
99void invalidate_addr_r3();
100void invalidate_addr_r4();
101void invalidate_addr_r5();
102void invalidate_addr_r6();
103void invalidate_addr_r7();
104void invalidate_addr_r8();
105void invalidate_addr_r9();
106void invalidate_addr_r10();
107void invalidate_addr_r12();
108
109const u_int invalidate_addr_reg[16] = {
110 (int)invalidate_addr_r0,
111 (int)invalidate_addr_r1,
112 (int)invalidate_addr_r2,
113 (int)invalidate_addr_r3,
114 (int)invalidate_addr_r4,
115 (int)invalidate_addr_r5,
116 (int)invalidate_addr_r6,
117 (int)invalidate_addr_r7,
118 (int)invalidate_addr_r8,
119 (int)invalidate_addr_r9,
120 (int)invalidate_addr_r10,
121 0,
122 (int)invalidate_addr_r12,
123 0,
124 0,
125 0};
126
127static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
128
129/* Linker */
130
131static void set_jump_target(void *addr, void *target_)
132{
133 u_int target = (u_int)target_;
134 u_char *ptr = addr;
135 u_int *ptr2=(u_int *)ptr;
136 if(ptr[3]==0xe2) {
137 assert((target-(u_int)ptr2-8)<1024);
138 assert(((uintptr_t)addr&3)==0);
139 assert((target&3)==0);
140 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
141 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
142 }
143 else if(ptr[3]==0x72) {
144 // generated by emit_jno_unlikely
145 if((target-(u_int)ptr2-8)<1024) {
146 assert(((uintptr_t)addr&3)==0);
147 assert((target&3)==0);
148 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
149 }
150 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
151 assert(((uintptr_t)addr&3)==0);
152 assert((target&3)==0);
153 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
154 }
155 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
156 }
157 else {
158 assert((ptr[3]&0x0e)==0xa);
159 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
160 }
161}
162
163// This optionally copies the instruction from the target of the branch into
164// the space before the branch. Works, but the difference in speed is
165// usually insignificant.
166#if 0
167static void set_jump_target_fillslot(int addr,u_int target,int copy)
168{
169 u_char *ptr=(u_char *)addr;
170 u_int *ptr2=(u_int *)ptr;
171 assert(!copy||ptr2[-1]==0xe28dd000);
172 if(ptr[3]==0xe2) {
173 assert(!copy);
174 assert((target-(u_int)ptr2-8)<4096);
175 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
176 }
177 else {
178 assert((ptr[3]&0x0e)==0xa);
179 u_int target_insn=*(u_int *)target;
180 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
181 copy=0;
182 }
183 if((target_insn&0x0c100000)==0x04100000) { // Load
184 copy=0;
185 }
186 if(target_insn&0x08000000) {
187 copy=0;
188 }
189 if(copy) {
190 ptr2[-1]=target_insn;
191 target+=4;
192 }
193 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
194 }
195}
196#endif
197
198/* Literal pool */
199static void add_literal(int addr,int val)
200{
201 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
202 literals[literalcount][0]=addr;
203 literals[literalcount][1]=val;
204 literalcount++;
205}
206
207// from a pointer to external jump stub (which was produced by emit_extjump2)
208// find where the jumping insn is
209static void *find_extjump_insn(void *stub)
210{
211 int *ptr=(int *)(stub+4);
212 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
213 u_int offset=*ptr&0xfff;
214 void **l_ptr=(void *)ptr+offset+8;
215 return *l_ptr;
216}
217
218// find where external branch is liked to using addr of it's stub:
219// get address that insn one after stub loads (dyna_linker arg1),
220// treat it as a pointer to branch insn,
221// return addr where that branch jumps to
222static int get_pointer(void *stub)
223{
224 //printf("get_pointer(%x)\n",(int)stub);
225 int *i_ptr=find_extjump_insn(stub);
226 assert((*i_ptr&0x0f000000)==0x0a000000);
227 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
228}
229
230// Find the "clean" entry point from a "dirty" entry point
231// by skipping past the call to verify_code
232static void *get_clean_addr(void *addr)
233{
234 signed int *ptr = addr;
235 #ifndef HAVE_ARMV7
236 ptr+=4;
237 #else
238 ptr+=6;
239 #endif
240 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
241 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
242 ptr++;
243 if((*ptr&0xFF000000)==0xea000000) {
244 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
245 }
246 return ptr;
247}
248
249static int verify_dirty(u_int *ptr)
250{
251 #ifndef HAVE_ARMV7
252 u_int offset;
253 // get from literal pool
254 assert((*ptr&0xFFFF0000)==0xe59f0000);
255 offset=*ptr&0xfff;
256 u_int source=*(u_int*)((void *)ptr+offset+8);
257 ptr++;
258 assert((*ptr&0xFFFF0000)==0xe59f0000);
259 offset=*ptr&0xfff;
260 u_int copy=*(u_int*)((void *)ptr+offset+8);
261 ptr++;
262 assert((*ptr&0xFFFF0000)==0xe59f0000);
263 offset=*ptr&0xfff;
264 u_int len=*(u_int*)((void *)ptr+offset+8);
265 ptr++;
266 ptr++;
267 #else
268 // ARMv7 movw/movt
269 assert((*ptr&0xFFF00000)==0xe3000000);
270 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
271 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
272 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
273 ptr+=6;
274 #endif
275 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
276 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
277 //printf("verify_dirty: %x %x %x\n",source,copy,len);
278 return !memcmp((void *)source,(void *)copy,len);
279}
280
281// This doesn't necessarily find all clean entry points, just
282// guarantees that it's not dirty
283static int isclean(void *addr)
284{
285 #ifndef HAVE_ARMV7
286 u_int *ptr=((u_int *)addr)+4;
287 #else
288 u_int *ptr=((u_int *)addr)+6;
289 #endif
290 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
291 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
292 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
293 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
294 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
295 return 1;
296}
297
298// get source that block at addr was compiled from (host pointers)
299static void get_bounds(int addr,u_int *start,u_int *end)
300{
301 u_int *ptr=(u_int *)addr;
302 #ifndef HAVE_ARMV7
303 u_int offset;
304 // get from literal pool
305 assert((*ptr&0xFFFF0000)==0xe59f0000);
306 offset=*ptr&0xfff;
307 u_int source=*(u_int*)((void *)ptr+offset+8);
308 ptr++;
309 //assert((*ptr&0xFFFF0000)==0xe59f0000);
310 //offset=*ptr&0xfff;
311 //u_int copy=*(u_int*)((void *)ptr+offset+8);
312 ptr++;
313 assert((*ptr&0xFFFF0000)==0xe59f0000);
314 offset=*ptr&0xfff;
315 u_int len=*(u_int*)((void *)ptr+offset+8);
316 ptr++;
317 ptr++;
318 #else
319 // ARMv7 movw/movt
320 assert((*ptr&0xFFF00000)==0xe3000000);
321 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
322 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
323 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
324 ptr+=6;
325 #endif
326 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
327 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
328 *start=source;
329 *end=source+len;
330}
331
332/* Register allocation */
333
334// Note: registers are allocated clean (unmodified state)
335// if you intend to modify the register, you must call dirty_reg().
336static void alloc_reg(struct regstat *cur,int i,signed char reg)
337{
338 int r,hr;
339 int preferred_reg = (reg&7);
340 if(reg==CCREG) preferred_reg=HOST_CCREG;
341 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
342
343 // Don't allocate unused registers
344 if((cur->u>>reg)&1) return;
345
346 // see if it's already allocated
347 for(hr=0;hr<HOST_REGS;hr++)
348 {
349 if(cur->regmap[hr]==reg) return;
350 }
351
352 // Keep the same mapping if the register was already allocated in a loop
353 preferred_reg = loop_reg(i,reg,preferred_reg);
354
355 // Try to allocate the preferred register
356 if(cur->regmap[preferred_reg]==-1) {
357 cur->regmap[preferred_reg]=reg;
358 cur->dirty&=~(1<<preferred_reg);
359 cur->isconst&=~(1<<preferred_reg);
360 return;
361 }
362 r=cur->regmap[preferred_reg];
363 if(r<64&&((cur->u>>r)&1)) {
364 cur->regmap[preferred_reg]=reg;
365 cur->dirty&=~(1<<preferred_reg);
366 cur->isconst&=~(1<<preferred_reg);
367 return;
368 }
369 if(r>=64&&((cur->uu>>(r&63))&1)) {
370 cur->regmap[preferred_reg]=reg;
371 cur->dirty&=~(1<<preferred_reg);
372 cur->isconst&=~(1<<preferred_reg);
373 return;
374 }
375
376 // Clear any unneeded registers
377 // We try to keep the mapping consistent, if possible, because it
378 // makes branches easier (especially loops). So we try to allocate
379 // first (see above) before removing old mappings. If this is not
380 // possible then go ahead and clear out the registers that are no
381 // longer needed.
382 for(hr=0;hr<HOST_REGS;hr++)
383 {
384 r=cur->regmap[hr];
385 if(r>=0) {
386 if(r<64) {
387 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
388 }
389 else
390 {
391 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
392 }
393 }
394 }
395 // Try to allocate any available register, but prefer
396 // registers that have not been used recently.
397 if(i>0) {
398 for(hr=0;hr<HOST_REGS;hr++) {
399 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
400 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
401 cur->regmap[hr]=reg;
402 cur->dirty&=~(1<<hr);
403 cur->isconst&=~(1<<hr);
404 return;
405 }
406 }
407 }
408 }
409 // Try to allocate any available register
410 for(hr=0;hr<HOST_REGS;hr++) {
411 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
412 cur->regmap[hr]=reg;
413 cur->dirty&=~(1<<hr);
414 cur->isconst&=~(1<<hr);
415 return;
416 }
417 }
418
419 // Ok, now we have to evict someone
420 // Pick a register we hopefully won't need soon
421 u_char hsn[MAXREG+1];
422 memset(hsn,10,sizeof(hsn));
423 int j;
424 lsn(hsn,i,&preferred_reg);
425 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
426 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
427 if(i>0) {
428 // Don't evict the cycle count at entry points, otherwise the entry
429 // stub will have to write it.
430 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
431 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
432 for(j=10;j>=3;j--)
433 {
434 // Alloc preferred register if available
435 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
436 for(hr=0;hr<HOST_REGS;hr++) {
437 // Evict both parts of a 64-bit register
438 if((cur->regmap[hr]&63)==r) {
439 cur->regmap[hr]=-1;
440 cur->dirty&=~(1<<hr);
441 cur->isconst&=~(1<<hr);
442 }
443 }
444 cur->regmap[preferred_reg]=reg;
445 return;
446 }
447 for(r=1;r<=MAXREG;r++)
448 {
449 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
450 for(hr=0;hr<HOST_REGS;hr++) {
451 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
452 if(cur->regmap[hr]==r+64) {
453 cur->regmap[hr]=reg;
454 cur->dirty&=~(1<<hr);
455 cur->isconst&=~(1<<hr);
456 return;
457 }
458 }
459 }
460 for(hr=0;hr<HOST_REGS;hr++) {
461 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
462 if(cur->regmap[hr]==r) {
463 cur->regmap[hr]=reg;
464 cur->dirty&=~(1<<hr);
465 cur->isconst&=~(1<<hr);
466 return;
467 }
468 }
469 }
470 }
471 }
472 }
473 }
474 for(j=10;j>=0;j--)
475 {
476 for(r=1;r<=MAXREG;r++)
477 {
478 if(hsn[r]==j) {
479 for(hr=0;hr<HOST_REGS;hr++) {
480 if(cur->regmap[hr]==r+64) {
481 cur->regmap[hr]=reg;
482 cur->dirty&=~(1<<hr);
483 cur->isconst&=~(1<<hr);
484 return;
485 }
486 }
487 for(hr=0;hr<HOST_REGS;hr++) {
488 if(cur->regmap[hr]==r) {
489 cur->regmap[hr]=reg;
490 cur->dirty&=~(1<<hr);
491 cur->isconst&=~(1<<hr);
492 return;
493 }
494 }
495 }
496 }
497 }
498 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
499}
500
501static void alloc_reg64(struct regstat *cur,int i,signed char reg)
502{
503 int preferred_reg = 8+(reg&1);
504 int r,hr;
505
506 // allocate the lower 32 bits
507 alloc_reg(cur,i,reg);
508
509 // Don't allocate unused registers
510 if((cur->uu>>reg)&1) return;
511
512 // see if the upper half is already allocated
513 for(hr=0;hr<HOST_REGS;hr++)
514 {
515 if(cur->regmap[hr]==reg+64) return;
516 }
517
518 // Keep the same mapping if the register was already allocated in a loop
519 preferred_reg = loop_reg(i,reg,preferred_reg);
520
521 // Try to allocate the preferred register
522 if(cur->regmap[preferred_reg]==-1) {
523 cur->regmap[preferred_reg]=reg|64;
524 cur->dirty&=~(1<<preferred_reg);
525 cur->isconst&=~(1<<preferred_reg);
526 return;
527 }
528 r=cur->regmap[preferred_reg];
529 if(r<64&&((cur->u>>r)&1)) {
530 cur->regmap[preferred_reg]=reg|64;
531 cur->dirty&=~(1<<preferred_reg);
532 cur->isconst&=~(1<<preferred_reg);
533 return;
534 }
535 if(r>=64&&((cur->uu>>(r&63))&1)) {
536 cur->regmap[preferred_reg]=reg|64;
537 cur->dirty&=~(1<<preferred_reg);
538 cur->isconst&=~(1<<preferred_reg);
539 return;
540 }
541
542 // Clear any unneeded registers
543 // We try to keep the mapping consistent, if possible, because it
544 // makes branches easier (especially loops). So we try to allocate
545 // first (see above) before removing old mappings. If this is not
546 // possible then go ahead and clear out the registers that are no
547 // longer needed.
548 for(hr=HOST_REGS-1;hr>=0;hr--)
549 {
550 r=cur->regmap[hr];
551 if(r>=0) {
552 if(r<64) {
553 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
554 }
555 else
556 {
557 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
558 }
559 }
560 }
561 // Try to allocate any available register, but prefer
562 // registers that have not been used recently.
563 if(i>0) {
564 for(hr=0;hr<HOST_REGS;hr++) {
565 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
566 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
567 cur->regmap[hr]=reg|64;
568 cur->dirty&=~(1<<hr);
569 cur->isconst&=~(1<<hr);
570 return;
571 }
572 }
573 }
574 }
575 // Try to allocate any available register
576 for(hr=0;hr<HOST_REGS;hr++) {
577 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
578 cur->regmap[hr]=reg|64;
579 cur->dirty&=~(1<<hr);
580 cur->isconst&=~(1<<hr);
581 return;
582 }
583 }
584
585 // Ok, now we have to evict someone
586 // Pick a register we hopefully won't need soon
587 u_char hsn[MAXREG+1];
588 memset(hsn,10,sizeof(hsn));
589 int j;
590 lsn(hsn,i,&preferred_reg);
591 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
592 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
593 if(i>0) {
594 // Don't evict the cycle count at entry points, otherwise the entry
595 // stub will have to write it.
596 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
597 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
598 for(j=10;j>=3;j--)
599 {
600 // Alloc preferred register if available
601 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
602 for(hr=0;hr<HOST_REGS;hr++) {
603 // Evict both parts of a 64-bit register
604 if((cur->regmap[hr]&63)==r) {
605 cur->regmap[hr]=-1;
606 cur->dirty&=~(1<<hr);
607 cur->isconst&=~(1<<hr);
608 }
609 }
610 cur->regmap[preferred_reg]=reg|64;
611 return;
612 }
613 for(r=1;r<=MAXREG;r++)
614 {
615 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
616 for(hr=0;hr<HOST_REGS;hr++) {
617 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
618 if(cur->regmap[hr]==r+64) {
619 cur->regmap[hr]=reg|64;
620 cur->dirty&=~(1<<hr);
621 cur->isconst&=~(1<<hr);
622 return;
623 }
624 }
625 }
626 for(hr=0;hr<HOST_REGS;hr++) {
627 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
628 if(cur->regmap[hr]==r) {
629 cur->regmap[hr]=reg|64;
630 cur->dirty&=~(1<<hr);
631 cur->isconst&=~(1<<hr);
632 return;
633 }
634 }
635 }
636 }
637 }
638 }
639 }
640 for(j=10;j>=0;j--)
641 {
642 for(r=1;r<=MAXREG;r++)
643 {
644 if(hsn[r]==j) {
645 for(hr=0;hr<HOST_REGS;hr++) {
646 if(cur->regmap[hr]==r+64) {
647 cur->regmap[hr]=reg|64;
648 cur->dirty&=~(1<<hr);
649 cur->isconst&=~(1<<hr);
650 return;
651 }
652 }
653 for(hr=0;hr<HOST_REGS;hr++) {
654 if(cur->regmap[hr]==r) {
655 cur->regmap[hr]=reg|64;
656 cur->dirty&=~(1<<hr);
657 cur->isconst&=~(1<<hr);
658 return;
659 }
660 }
661 }
662 }
663 }
664 SysPrintf("This shouldn't happen");exit(1);
665}
666
667// Allocate a temporary register. This is done without regard to
668// dirty status or whether the register we request is on the unneeded list
669// Note: This will only allocate one register, even if called multiple times
670static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
671{
672 int r,hr;
673 int preferred_reg = -1;
674
675 // see if it's already allocated
676 for(hr=0;hr<HOST_REGS;hr++)
677 {
678 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
679 }
680
681 // Try to allocate any available register
682 for(hr=HOST_REGS-1;hr>=0;hr--) {
683 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
684 cur->regmap[hr]=reg;
685 cur->dirty&=~(1<<hr);
686 cur->isconst&=~(1<<hr);
687 return;
688 }
689 }
690
691 // Find an unneeded register
692 for(hr=HOST_REGS-1;hr>=0;hr--)
693 {
694 r=cur->regmap[hr];
695 if(r>=0) {
696 if(r<64) {
697 if((cur->u>>r)&1) {
698 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
699 cur->regmap[hr]=reg;
700 cur->dirty&=~(1<<hr);
701 cur->isconst&=~(1<<hr);
702 return;
703 }
704 }
705 }
706 else
707 {
708 if((cur->uu>>(r&63))&1) {
709 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
710 cur->regmap[hr]=reg;
711 cur->dirty&=~(1<<hr);
712 cur->isconst&=~(1<<hr);
713 return;
714 }
715 }
716 }
717 }
718 }
719
720 // Ok, now we have to evict someone
721 // Pick a register we hopefully won't need soon
722 // TODO: we might want to follow unconditional jumps here
723 // TODO: get rid of dupe code and make this into a function
724 u_char hsn[MAXREG+1];
725 memset(hsn,10,sizeof(hsn));
726 int j;
727 lsn(hsn,i,&preferred_reg);
728 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
729 if(i>0) {
730 // Don't evict the cycle count at entry points, otherwise the entry
731 // stub will have to write it.
732 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
733 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
734 for(j=10;j>=3;j--)
735 {
736 for(r=1;r<=MAXREG;r++)
737 {
738 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
739 for(hr=0;hr<HOST_REGS;hr++) {
740 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
741 if(cur->regmap[hr]==r+64) {
742 cur->regmap[hr]=reg;
743 cur->dirty&=~(1<<hr);
744 cur->isconst&=~(1<<hr);
745 return;
746 }
747 }
748 }
749 for(hr=0;hr<HOST_REGS;hr++) {
750 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
751 if(cur->regmap[hr]==r) {
752 cur->regmap[hr]=reg;
753 cur->dirty&=~(1<<hr);
754 cur->isconst&=~(1<<hr);
755 return;
756 }
757 }
758 }
759 }
760 }
761 }
762 }
763 for(j=10;j>=0;j--)
764 {
765 for(r=1;r<=MAXREG;r++)
766 {
767 if(hsn[r]==j) {
768 for(hr=0;hr<HOST_REGS;hr++) {
769 if(cur->regmap[hr]==r+64) {
770 cur->regmap[hr]=reg;
771 cur->dirty&=~(1<<hr);
772 cur->isconst&=~(1<<hr);
773 return;
774 }
775 }
776 for(hr=0;hr<HOST_REGS;hr++) {
777 if(cur->regmap[hr]==r) {
778 cur->regmap[hr]=reg;
779 cur->dirty&=~(1<<hr);
780 cur->isconst&=~(1<<hr);
781 return;
782 }
783 }
784 }
785 }
786 }
787 SysPrintf("This shouldn't happen");exit(1);
788}
789
790// Allocate a specific ARM register.
791static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
792{
793 int n;
794 int dirty=0;
795
796 // see if it's already allocated (and dealloc it)
797 for(n=0;n<HOST_REGS;n++)
798 {
799 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
800 dirty=(cur->dirty>>n)&1;
801 cur->regmap[n]=-1;
802 }
803 }
804
805 cur->regmap[hr]=reg;
806 cur->dirty&=~(1<<hr);
807 cur->dirty|=dirty<<hr;
808 cur->isconst&=~(1<<hr);
809}
810
811// Alloc cycle count into dedicated register
812static void alloc_cc(struct regstat *cur,int i)
813{
814 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
815}
816
817/* Special alloc */
818
819
820/* Assembler */
821
822static unused char regname[16][4] = {
823 "r0",
824 "r1",
825 "r2",
826 "r3",
827 "r4",
828 "r5",
829 "r6",
830 "r7",
831 "r8",
832 "r9",
833 "r10",
834 "fp",
835 "r12",
836 "sp",
837 "lr",
838 "pc"};
839
840static void output_w32(u_int word)
841{
842 *((u_int *)out)=word;
843 out+=4;
844}
845
846static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
847{
848 assert(rd<16);
849 assert(rn<16);
850 assert(rm<16);
851 return((rn<<16)|(rd<<12)|rm);
852}
853
854static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
855{
856 assert(rd<16);
857 assert(rn<16);
858 assert(imm<256);
859 assert((shift&1)==0);
860 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
861}
862
863static u_int genimm(u_int imm,u_int *encoded)
864{
865 *encoded=0;
866 if(imm==0) return 1;
867 int i=32;
868 while(i>0)
869 {
870 if(imm<256) {
871 *encoded=((i&30)<<7)|imm;
872 return 1;
873 }
874 imm=(imm>>2)|(imm<<30);i-=2;
875 }
876 return 0;
877}
878
879static void genimm_checked(u_int imm,u_int *encoded)
880{
881 u_int ret=genimm(imm,encoded);
882 assert(ret);
883 (void)ret;
884}
885
886static u_int genjmp(u_int addr)
887{
888 int offset=addr-(int)out-8;
889 if(offset<-33554432||offset>=33554432) {
890 if (addr>2) {
891 SysPrintf("genjmp: out of range: %08x\n", offset);
892 exit(1);
893 }
894 return 0;
895 }
896 return ((u_int)offset>>2)&0xffffff;
897}
898
899static void emit_mov(int rs,int rt)
900{
901 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
902 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
903}
904
905static void emit_movs(int rs,int rt)
906{
907 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
908 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
909}
910
911static void emit_add(int rs1,int rs2,int rt)
912{
913 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
914 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
915}
916
917static void emit_adds(int rs1,int rs2,int rt)
918{
919 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
920 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
921}
922
923static void emit_adcs(int rs1,int rs2,int rt)
924{
925 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
926 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
927}
928
929static void emit_sbc(int rs1,int rs2,int rt)
930{
931 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
932 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
933}
934
935static void emit_sbcs(int rs1,int rs2,int rt)
936{
937 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
938 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
939}
940
941static void emit_neg(int rs, int rt)
942{
943 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
944 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
945}
946
947static void emit_negs(int rs, int rt)
948{
949 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
950 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
951}
952
953static void emit_sub(int rs1,int rs2,int rt)
954{
955 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
956 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
957}
958
959static void emit_subs(int rs1,int rs2,int rt)
960{
961 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
962 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
963}
964
965static void emit_zeroreg(int rt)
966{
967 assem_debug("mov %s,#0\n",regname[rt]);
968 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
969}
970
971static void emit_loadlp(u_int imm,u_int rt)
972{
973 add_literal((int)out,imm);
974 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
975 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
976}
977
978static void emit_movw(u_int imm,u_int rt)
979{
980 assert(imm<65536);
981 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
982 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
983}
984
985static void emit_movt(u_int imm,u_int rt)
986{
987 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
988 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
989}
990
991static void emit_movimm(u_int imm,u_int rt)
992{
993 u_int armval;
994 if(genimm(imm,&armval)) {
995 assem_debug("mov %s,#%d\n",regname[rt],imm);
996 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
997 }else if(genimm(~imm,&armval)) {
998 assem_debug("mvn %s,#%d\n",regname[rt],imm);
999 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1000 }else if(imm<65536) {
1001 #ifndef HAVE_ARMV7
1002 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1003 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1004 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1005 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1006 #else
1007 emit_movw(imm,rt);
1008 #endif
1009 }else{
1010 #ifndef HAVE_ARMV7
1011 emit_loadlp(imm,rt);
1012 #else
1013 emit_movw(imm&0x0000FFFF,rt);
1014 emit_movt(imm&0xFFFF0000,rt);
1015 #endif
1016 }
1017}
1018
1019static void emit_pcreladdr(u_int rt)
1020{
1021 assem_debug("add %s,pc,#?\n",regname[rt]);
1022 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1023}
1024
1025static void emit_loadreg(int r, int hr)
1026{
1027 if(r&64) {
1028 SysPrintf("64bit load in 32bit mode!\n");
1029 assert(0);
1030 return;
1031 }
1032 if((r&63)==0)
1033 emit_zeroreg(hr);
1034 else {
1035 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1036 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1037 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1038 if(r==CCREG) addr=(int)&cycle_count;
1039 if(r==CSREG) addr=(int)&Status;
1040 if(r==FSREG) addr=(int)&FCR31;
1041 if(r==INVCP) addr=(int)&invc_ptr;
1042 u_int offset = addr-(u_int)&dynarec_local;
1043 assert(offset<4096);
1044 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1045 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1046 }
1047}
1048
1049static void emit_storereg(int r, int hr)
1050{
1051 if(r&64) {
1052 SysPrintf("64bit store in 32bit mode!\n");
1053 assert(0);
1054 return;
1055 }
1056 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1057 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1058 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1059 if(r==CCREG) addr=(int)&cycle_count;
1060 if(r==FSREG) addr=(int)&FCR31;
1061 u_int offset = addr-(u_int)&dynarec_local;
1062 assert(offset<4096);
1063 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1064 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1065}
1066
1067static void emit_test(int rs, int rt)
1068{
1069 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1070 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1071}
1072
1073static void emit_testimm(int rs,int imm)
1074{
1075 u_int armval;
1076 assem_debug("tst %s,#%d\n",regname[rs],imm);
1077 genimm_checked(imm,&armval);
1078 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1079}
1080
1081static void emit_testeqimm(int rs,int imm)
1082{
1083 u_int armval;
1084 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
1085 genimm_checked(imm,&armval);
1086 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1087}
1088
1089static void emit_not(int rs,int rt)
1090{
1091 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1092 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1093}
1094
1095static void emit_mvnmi(int rs,int rt)
1096{
1097 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1098 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1099}
1100
1101static void emit_and(u_int rs1,u_int rs2,u_int rt)
1102{
1103 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1104 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1105}
1106
1107static void emit_or(u_int rs1,u_int rs2,u_int rt)
1108{
1109 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1110 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1111}
1112
1113static void emit_or_and_set_flags(int rs1,int rs2,int rt)
1114{
1115 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1116 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1117}
1118
1119static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1120{
1121 assert(rs<16);
1122 assert(rt<16);
1123 assert(imm<32);
1124 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1125 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1126}
1127
1128static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1129{
1130 assert(rs<16);
1131 assert(rt<16);
1132 assert(imm<32);
1133 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1134 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1135}
1136
1137static void emit_xor(u_int rs1,u_int rs2,u_int rt)
1138{
1139 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1140 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1141}
1142
1143static void emit_addimm(u_int rs,int imm,u_int rt)
1144{
1145 assert(rs<16);
1146 assert(rt<16);
1147 if(imm!=0) {
1148 u_int armval;
1149 if(genimm(imm,&armval)) {
1150 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1151 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1152 }else if(genimm(-imm,&armval)) {
1153 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
1154 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1155 #ifdef HAVE_ARMV7
1156 }else if(rt!=rs&&(u_int)imm<65536) {
1157 emit_movw(imm&0x0000ffff,rt);
1158 emit_add(rs,rt,rt);
1159 }else if(rt!=rs&&(u_int)-imm<65536) {
1160 emit_movw(-imm&0x0000ffff,rt);
1161 emit_sub(rs,rt,rt);
1162 #endif
1163 }else if((u_int)-imm<65536) {
1164 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1165 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1166 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1167 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1168 }else {
1169 do {
1170 int shift = (ffs(imm) - 1) & ~1;
1171 int imm8 = imm & (0xff << shift);
1172 genimm_checked(imm8,&armval);
1173 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1174 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1175 rs = rt;
1176 imm &= ~imm8;
1177 }
1178 while (imm != 0);
1179 }
1180 }
1181 else if(rs!=rt) emit_mov(rs,rt);
1182}
1183
1184static void emit_addimm_and_set_flags(int imm,int rt)
1185{
1186 assert(imm>-65536&&imm<65536);
1187 u_int armval;
1188 if(genimm(imm,&armval)) {
1189 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1190 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1191 }else if(genimm(-imm,&armval)) {
1192 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1193 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1194 }else if(imm<0) {
1195 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1196 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1197 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1198 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1199 }else{
1200 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1201 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1202 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1203 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1204 }
1205}
1206
1207static void emit_addimm_no_flags(u_int imm,u_int rt)
1208{
1209 emit_addimm(rt,imm,rt);
1210}
1211
1212static void emit_addnop(u_int r)
1213{
1214 assert(r<16);
1215 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1216 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1217}
1218
1219static void emit_adcimm(u_int rs,int imm,u_int rt)
1220{
1221 u_int armval;
1222 genimm_checked(imm,&armval);
1223 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1224 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1225}
1226
1227static void emit_rscimm(int rs,int imm,u_int rt)
1228{
1229 assert(0);
1230 u_int armval;
1231 genimm_checked(imm,&armval);
1232 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1233 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1234}
1235
1236static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1237{
1238 // TODO: if(genimm(imm,&armval)) ...
1239 // else
1240 emit_movimm(imm,HOST_TEMPREG);
1241 emit_adds(HOST_TEMPREG,rsl,rtl);
1242 emit_adcimm(rsh,0,rth);
1243}
1244
1245static void emit_andimm(int rs,int imm,int rt)
1246{
1247 u_int armval;
1248 if(imm==0) {
1249 emit_zeroreg(rt);
1250 }else if(genimm(imm,&armval)) {
1251 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1252 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1253 }else if(genimm(~imm,&armval)) {
1254 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1255 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1256 }else if(imm==65535) {
1257 #ifndef HAVE_ARMV6
1258 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1259 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1260 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1261 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1262 #else
1263 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1264 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1265 #endif
1266 }else{
1267 assert(imm>0&&imm<65535);
1268 #ifndef HAVE_ARMV7
1269 assem_debug("mov r14,#%d\n",imm&0xFF00);
1270 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1271 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1272 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1273 #else
1274 emit_movw(imm,HOST_TEMPREG);
1275 #endif
1276 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1277 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1278 }
1279}
1280
1281static void emit_orimm(int rs,int imm,int rt)
1282{
1283 u_int armval;
1284 if(imm==0) {
1285 if(rs!=rt) emit_mov(rs,rt);
1286 }else if(genimm(imm,&armval)) {
1287 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1288 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1289 }else{
1290 assert(imm>0&&imm<65536);
1291 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1292 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1293 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1294 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1295 }
1296}
1297
1298static void emit_xorimm(int rs,int imm,int rt)
1299{
1300 u_int armval;
1301 if(imm==0) {
1302 if(rs!=rt) emit_mov(rs,rt);
1303 }else if(genimm(imm,&armval)) {
1304 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1305 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1306 }else{
1307 assert(imm>0&&imm<65536);
1308 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1309 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1310 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1311 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1312 }
1313}
1314
1315static void emit_shlimm(int rs,u_int imm,int rt)
1316{
1317 assert(imm>0);
1318 assert(imm<32);
1319 //if(imm==1) ...
1320 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1321 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1322}
1323
1324static void emit_lsls_imm(int rs,int imm,int rt)
1325{
1326 assert(imm>0);
1327 assert(imm<32);
1328 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1329 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1330}
1331
1332static unused void emit_lslpls_imm(int rs,int imm,int rt)
1333{
1334 assert(imm>0);
1335 assert(imm<32);
1336 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1337 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1338}
1339
1340static void emit_shrimm(int rs,u_int imm,int rt)
1341{
1342 assert(imm>0);
1343 assert(imm<32);
1344 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1345 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1346}
1347
1348static void emit_sarimm(int rs,u_int imm,int rt)
1349{
1350 assert(imm>0);
1351 assert(imm<32);
1352 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1353 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1354}
1355
1356static void emit_rorimm(int rs,u_int imm,int rt)
1357{
1358 assert(imm>0);
1359 assert(imm<32);
1360 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1361 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1362}
1363
1364static void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1365{
1366 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1367 assert(imm>0);
1368 assert(imm<32);
1369 //if(imm==1) ...
1370 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1371 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1372 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1373 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1374}
1375
1376static void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1377{
1378 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1379 assert(imm>0);
1380 assert(imm<32);
1381 //if(imm==1) ...
1382 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1383 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1384 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1385 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1386}
1387
1388static void emit_signextend16(int rs,int rt)
1389{
1390 #ifndef HAVE_ARMV6
1391 emit_shlimm(rs,16,rt);
1392 emit_sarimm(rt,16,rt);
1393 #else
1394 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1395 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1396 #endif
1397}
1398
1399static void emit_signextend8(int rs,int rt)
1400{
1401 #ifndef HAVE_ARMV6
1402 emit_shlimm(rs,24,rt);
1403 emit_sarimm(rt,24,rt);
1404 #else
1405 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1406 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1407 #endif
1408}
1409
1410static void emit_shl(u_int rs,u_int shift,u_int rt)
1411{
1412 assert(rs<16);
1413 assert(rt<16);
1414 assert(shift<16);
1415 //if(imm==1) ...
1416 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1417 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1418}
1419
1420static void emit_shr(u_int rs,u_int shift,u_int rt)
1421{
1422 assert(rs<16);
1423 assert(rt<16);
1424 assert(shift<16);
1425 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1426 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1427}
1428
1429static void emit_sar(u_int rs,u_int shift,u_int rt)
1430{
1431 assert(rs<16);
1432 assert(rt<16);
1433 assert(shift<16);
1434 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1435 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1436}
1437
1438static void emit_orrshl(u_int rs,u_int shift,u_int rt)
1439{
1440 assert(rs<16);
1441 assert(rt<16);
1442 assert(shift<16);
1443 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1444 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1445}
1446
1447static void emit_orrshr(u_int rs,u_int shift,u_int rt)
1448{
1449 assert(rs<16);
1450 assert(rt<16);
1451 assert(shift<16);
1452 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1453 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1454}
1455
1456static void emit_cmpimm(int rs,int imm)
1457{
1458 u_int armval;
1459 if(genimm(imm,&armval)) {
1460 assem_debug("cmp %s,#%d\n",regname[rs],imm);
1461 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1462 }else if(genimm(-imm,&armval)) {
1463 assem_debug("cmn %s,#%d\n",regname[rs],imm);
1464 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1465 }else if(imm>0) {
1466 assert(imm<65536);
1467 emit_movimm(imm,HOST_TEMPREG);
1468 assem_debug("cmp %s,r14\n",regname[rs]);
1469 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1470 }else{
1471 assert(imm>-65536);
1472 emit_movimm(-imm,HOST_TEMPREG);
1473 assem_debug("cmn %s,r14\n",regname[rs]);
1474 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1475 }
1476}
1477
1478static void emit_cmovne_imm(int imm,int rt)
1479{
1480 assem_debug("movne %s,#%d\n",regname[rt],imm);
1481 u_int armval;
1482 genimm_checked(imm,&armval);
1483 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1484}
1485
1486static void emit_cmovl_imm(int imm,int rt)
1487{
1488 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1489 u_int armval;
1490 genimm_checked(imm,&armval);
1491 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1492}
1493
1494static void emit_cmovb_imm(int imm,int rt)
1495{
1496 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1497 u_int armval;
1498 genimm_checked(imm,&armval);
1499 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1500}
1501
1502static void emit_cmovs_imm(int imm,int rt)
1503{
1504 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1505 u_int armval;
1506 genimm_checked(imm,&armval);
1507 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1508}
1509
1510static void emit_cmove_reg(int rs,int rt)
1511{
1512 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1513 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1514}
1515
1516static void emit_cmovne_reg(int rs,int rt)
1517{
1518 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1519 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1520}
1521
1522static void emit_cmovl_reg(int rs,int rt)
1523{
1524 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1525 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1526}
1527
1528static void emit_cmovs_reg(int rs,int rt)
1529{
1530 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1531 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1532}
1533
1534static void emit_slti32(int rs,int imm,int rt)
1535{
1536 if(rs!=rt) emit_zeroreg(rt);
1537 emit_cmpimm(rs,imm);
1538 if(rs==rt) emit_movimm(0,rt);
1539 emit_cmovl_imm(1,rt);
1540}
1541
1542static void emit_sltiu32(int rs,int imm,int rt)
1543{
1544 if(rs!=rt) emit_zeroreg(rt);
1545 emit_cmpimm(rs,imm);
1546 if(rs==rt) emit_movimm(0,rt);
1547 emit_cmovb_imm(1,rt);
1548}
1549
1550static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1551{
1552 assert(rsh!=rt);
1553 emit_slti32(rsl,imm,rt);
1554 if(imm>=0)
1555 {
1556 emit_test(rsh,rsh);
1557 emit_cmovne_imm(0,rt);
1558 emit_cmovs_imm(1,rt);
1559 }
1560 else
1561 {
1562 emit_cmpimm(rsh,-1);
1563 emit_cmovne_imm(0,rt);
1564 emit_cmovl_imm(1,rt);
1565 }
1566}
1567
1568static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1569{
1570 assert(rsh!=rt);
1571 emit_sltiu32(rsl,imm,rt);
1572 if(imm>=0)
1573 {
1574 emit_test(rsh,rsh);
1575 emit_cmovne_imm(0,rt);
1576 }
1577 else
1578 {
1579 emit_cmpimm(rsh,-1);
1580 emit_cmovne_imm(1,rt);
1581 }
1582}
1583
1584static void emit_cmp(int rs,int rt)
1585{
1586 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1587 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1588}
1589
1590static void emit_set_gz32(int rs, int rt)
1591{
1592 //assem_debug("set_gz32\n");
1593 emit_cmpimm(rs,1);
1594 emit_movimm(1,rt);
1595 emit_cmovl_imm(0,rt);
1596}
1597
1598static void emit_set_nz32(int rs, int rt)
1599{
1600 //assem_debug("set_nz32\n");
1601 if(rs!=rt) emit_movs(rs,rt);
1602 else emit_test(rs,rs);
1603 emit_cmovne_imm(1,rt);
1604}
1605
1606static void emit_set_gz64_32(int rsh, int rsl, int rt)
1607{
1608 //assem_debug("set_gz64\n");
1609 emit_set_gz32(rsl,rt);
1610 emit_test(rsh,rsh);
1611 emit_cmovne_imm(1,rt);
1612 emit_cmovs_imm(0,rt);
1613}
1614
1615static void emit_set_nz64_32(int rsh, int rsl, int rt)
1616{
1617 //assem_debug("set_nz64\n");
1618 emit_or_and_set_flags(rsh,rsl,rt);
1619 emit_cmovne_imm(1,rt);
1620}
1621
1622static void emit_set_if_less32(int rs1, int rs2, int rt)
1623{
1624 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1625 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1626 emit_cmp(rs1,rs2);
1627 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1628 emit_cmovl_imm(1,rt);
1629}
1630
1631static void emit_set_if_carry32(int rs1, int rs2, int rt)
1632{
1633 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1634 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1635 emit_cmp(rs1,rs2);
1636 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1637 emit_cmovb_imm(1,rt);
1638}
1639
1640static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1641{
1642 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1643 assert(u1!=rt);
1644 assert(u2!=rt);
1645 emit_cmp(l1,l2);
1646 emit_movimm(0,rt);
1647 emit_sbcs(u1,u2,HOST_TEMPREG);
1648 emit_cmovl_imm(1,rt);
1649}
1650
1651static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1652{
1653 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1654 assert(u1!=rt);
1655 assert(u2!=rt);
1656 emit_cmp(l1,l2);
1657 emit_movimm(0,rt);
1658 emit_sbcs(u1,u2,HOST_TEMPREG);
1659 emit_cmovb_imm(1,rt);
1660}
1661
1662#ifdef DRC_DBG
1663extern void gen_interupt();
1664extern void do_insn_cmp();
1665#define FUNCNAME(f) { (intptr_t)f, " " #f }
1666static const struct {
1667 intptr_t addr;
1668 const char *name;
1669} function_names[] = {
1670 FUNCNAME(cc_interrupt),
1671 FUNCNAME(gen_interupt),
1672 FUNCNAME(get_addr_ht),
1673 FUNCNAME(get_addr),
1674 FUNCNAME(jump_handler_read8),
1675 FUNCNAME(jump_handler_read16),
1676 FUNCNAME(jump_handler_read32),
1677 FUNCNAME(jump_handler_write8),
1678 FUNCNAME(jump_handler_write16),
1679 FUNCNAME(jump_handler_write32),
1680 FUNCNAME(invalidate_addr),
1681 FUNCNAME(verify_code_vm),
1682 FUNCNAME(verify_code),
1683 FUNCNAME(jump_hlecall),
1684 FUNCNAME(jump_syscall_hle),
1685 FUNCNAME(new_dyna_leave),
1686 FUNCNAME(pcsx_mtc0),
1687 FUNCNAME(pcsx_mtc0_ds),
1688 FUNCNAME(do_insn_cmp),
1689};
1690
1691static const char *func_name(intptr_t a)
1692{
1693 int i;
1694 for (i = 0; i < sizeof(function_names)/sizeof(function_names[0]); i++)
1695 if (function_names[i].addr == a)
1696 return function_names[i].name;
1697 return "";
1698}
1699#else
1700#define func_name(x) ""
1701#endif
1702
1703static void emit_call(int a)
1704{
1705 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1706 u_int offset=genjmp(a);
1707 output_w32(0xeb000000|offset);
1708}
1709
1710static void emit_jmp(const void *a_)
1711{
1712 int a = (int)a_;
1713 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1714 u_int offset=genjmp(a);
1715 output_w32(0xea000000|offset);
1716}
1717
1718static void emit_jne(int a)
1719{
1720 assem_debug("bne %x\n",a);
1721 u_int offset=genjmp(a);
1722 output_w32(0x1a000000|offset);
1723}
1724
1725static void emit_jeq(int a)
1726{
1727 assem_debug("beq %x\n",a);
1728 u_int offset=genjmp(a);
1729 output_w32(0x0a000000|offset);
1730}
1731
1732static void emit_js(int a)
1733{
1734 assem_debug("bmi %x\n",a);
1735 u_int offset=genjmp(a);
1736 output_w32(0x4a000000|offset);
1737}
1738
1739static void emit_jns(int a)
1740{
1741 assem_debug("bpl %x\n",a);
1742 u_int offset=genjmp(a);
1743 output_w32(0x5a000000|offset);
1744}
1745
1746static void emit_jl(int a)
1747{
1748 assem_debug("blt %x\n",a);
1749 u_int offset=genjmp(a);
1750 output_w32(0xba000000|offset);
1751}
1752
1753static void emit_jge(int a)
1754{
1755 assem_debug("bge %x\n",a);
1756 u_int offset=genjmp(a);
1757 output_w32(0xaa000000|offset);
1758}
1759
1760static void emit_jno(int a)
1761{
1762 assem_debug("bvc %x\n",a);
1763 u_int offset=genjmp(a);
1764 output_w32(0x7a000000|offset);
1765}
1766
1767static void emit_jc(int a)
1768{
1769 assem_debug("bcs %x\n",a);
1770 u_int offset=genjmp(a);
1771 output_w32(0x2a000000|offset);
1772}
1773
1774static void emit_jcc(void *a_)
1775{
1776 int a = (int)a_;
1777 assem_debug("bcc %x\n",a);
1778 u_int offset=genjmp(a);
1779 output_w32(0x3a000000|offset);
1780}
1781
1782static void emit_callreg(u_int r)
1783{
1784 assert(r<15);
1785 assem_debug("blx %s\n",regname[r]);
1786 output_w32(0xe12fff30|r);
1787}
1788
1789static void emit_jmpreg(u_int r)
1790{
1791 assem_debug("mov pc,%s\n",regname[r]);
1792 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1793}
1794
1795static void emit_readword_indexed(int offset, int rs, int rt)
1796{
1797 assert(offset>-4096&&offset<4096);
1798 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1799 if(offset>=0) {
1800 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1801 }else{
1802 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1803 }
1804}
1805
1806static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1807{
1808 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1809 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1810}
1811
1812static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1813{
1814 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1815 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1816}
1817
1818static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1819{
1820 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1821 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1822}
1823
1824static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1825{
1826 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1827 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1828}
1829
1830static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1831{
1832 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1833 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1834}
1835
1836static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1837{
1838 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1839 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1840}
1841
1842static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1843{
1844 if(map<0) emit_readword_indexed(addr, rs, rt);
1845 else {
1846 assert(addr==0);
1847 emit_readword_dualindexedx4(rs, map, rt);
1848 }
1849}
1850
1851static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1852{
1853 if(map<0) {
1854 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1855 emit_readword_indexed(addr+4, rs, rl);
1856 }else{
1857 assert(rh!=rs);
1858 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1859 emit_addimm(map,1,map);
1860 emit_readword_indexed_tlb(addr, rs, map, rl);
1861 }
1862}
1863
1864static void emit_movsbl_indexed(int offset, int rs, int rt)
1865{
1866 assert(offset>-256&&offset<256);
1867 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1868 if(offset>=0) {
1869 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1870 }else{
1871 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1872 }
1873}
1874
1875static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1876{
1877 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1878 else {
1879 if(addr==0) {
1880 emit_shlimm(map,2,map);
1881 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1882 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1883 }else{
1884 assert(addr>-256&&addr<256);
1885 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1886 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1887 emit_movsbl_indexed(addr, rt, rt);
1888 }
1889 }
1890}
1891
1892static void emit_movswl_indexed(int offset, int rs, int rt)
1893{
1894 assert(offset>-256&&offset<256);
1895 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1896 if(offset>=0) {
1897 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1898 }else{
1899 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1900 }
1901}
1902
1903static void emit_movzbl_indexed(int offset, int rs, int rt)
1904{
1905 assert(offset>-4096&&offset<4096);
1906 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1907 if(offset>=0) {
1908 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1909 }else{
1910 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1911 }
1912}
1913
1914static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1915{
1916 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1917 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1918}
1919
1920static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1921{
1922 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1923 else {
1924 if(addr==0) {
1925 emit_movzbl_dualindexedx4(rs, map, rt);
1926 }else{
1927 emit_addimm(rs,addr,rt);
1928 emit_movzbl_dualindexedx4(rt, map, rt);
1929 }
1930 }
1931}
1932
1933static void emit_movzwl_indexed(int offset, int rs, int rt)
1934{
1935 assert(offset>-256&&offset<256);
1936 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1937 if(offset>=0) {
1938 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1939 }else{
1940 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1941 }
1942}
1943
1944static void emit_ldrd(int offset, int rs, int rt)
1945{
1946 assert(offset>-256&&offset<256);
1947 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1948 if(offset>=0) {
1949 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1950 }else{
1951 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1952 }
1953}
1954
1955static void emit_readword(int addr, int rt)
1956{
1957 u_int offset = addr-(u_int)&dynarec_local;
1958 assert(offset<4096);
1959 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1960 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1961}
1962
1963static unused void emit_movsbl(int addr, int rt)
1964{
1965 u_int offset = addr-(u_int)&dynarec_local;
1966 assert(offset<256);
1967 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1968 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1969}
1970
1971static unused void emit_movswl(int addr, int rt)
1972{
1973 u_int offset = addr-(u_int)&dynarec_local;
1974 assert(offset<256);
1975 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1976 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1977}
1978
1979static unused void emit_movzbl(int addr, int rt)
1980{
1981 u_int offset = addr-(u_int)&dynarec_local;
1982 assert(offset<4096);
1983 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1984 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1985}
1986
1987static unused void emit_movzwl(int addr, int rt)
1988{
1989 u_int offset = addr-(u_int)&dynarec_local;
1990 assert(offset<256);
1991 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1992 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1993}
1994
1995static void emit_writeword_indexed(int rt, int offset, int rs)
1996{
1997 assert(offset>-4096&&offset<4096);
1998 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1999 if(offset>=0) {
2000 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
2001 }else{
2002 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
2003 }
2004}
2005
2006static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
2007{
2008 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2009 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2010}
2011
2012static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2013{
2014 if(map<0) emit_writeword_indexed(rt, addr, rs);
2015 else {
2016 assert(addr==0);
2017 emit_writeword_dualindexedx4(rt, rs, map);
2018 }
2019}
2020
2021static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2022{
2023 if(map<0) {
2024 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2025 emit_writeword_indexed(rl, addr+4, rs);
2026 }else{
2027 assert(rh>=0);
2028 if(temp!=rs) emit_addimm(map,1,temp);
2029 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2030 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2031 else {
2032 emit_addimm(rs,4,rs);
2033 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2034 }
2035 }
2036}
2037
2038static void emit_writehword_indexed(int rt, int offset, int rs)
2039{
2040 assert(offset>-256&&offset<256);
2041 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2042 if(offset>=0) {
2043 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2044 }else{
2045 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2046 }
2047}
2048
2049static void emit_writebyte_indexed(int rt, int offset, int rs)
2050{
2051 assert(offset>-4096&&offset<4096);
2052 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2053 if(offset>=0) {
2054 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2055 }else{
2056 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2057 }
2058}
2059
2060static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2061{
2062 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2063 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2064}
2065
2066static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2067{
2068 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2069 else {
2070 if(addr==0) {
2071 emit_writebyte_dualindexedx4(rt, rs, map);
2072 }else{
2073 emit_addimm(rs,addr,temp);
2074 emit_writebyte_dualindexedx4(rt, temp, map);
2075 }
2076 }
2077}
2078
2079static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2080{
2081 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2082 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2083}
2084
2085static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2086{
2087 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2088 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2089}
2090
2091static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2092{
2093 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2094 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2095}
2096
2097static void emit_writeword(int rt, int addr)
2098{
2099 u_int offset = addr-(u_int)&dynarec_local;
2100 assert(offset<4096);
2101 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2102 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2103}
2104
2105static unused void emit_writehword(int rt, int addr)
2106{
2107 u_int offset = addr-(u_int)&dynarec_local;
2108 assert(offset<256);
2109 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2110 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2111}
2112
2113static unused void emit_writebyte(int rt, int addr)
2114{
2115 u_int offset = addr-(u_int)&dynarec_local;
2116 assert(offset<4096);
2117 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
2118 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2119}
2120
2121static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2122{
2123 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2124 assert(rs1<16);
2125 assert(rs2<16);
2126 assert(hi<16);
2127 assert(lo<16);
2128 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2129}
2130
2131static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2132{
2133 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2134 assert(rs1<16);
2135 assert(rs2<16);
2136 assert(hi<16);
2137 assert(lo<16);
2138 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2139}
2140
2141static void emit_clz(int rs,int rt)
2142{
2143 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2144 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2145}
2146
2147static void emit_subcs(int rs1,int rs2,int rt)
2148{
2149 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2150 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2151}
2152
2153static void emit_shrcc_imm(int rs,u_int imm,int rt)
2154{
2155 assert(imm>0);
2156 assert(imm<32);
2157 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2158 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2159}
2160
2161static void emit_shrne_imm(int rs,u_int imm,int rt)
2162{
2163 assert(imm>0);
2164 assert(imm<32);
2165 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2166 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2167}
2168
2169static void emit_negmi(int rs, int rt)
2170{
2171 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2172 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2173}
2174
2175static void emit_negsmi(int rs, int rt)
2176{
2177 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2178 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2179}
2180
2181static void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2182{
2183 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2184 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2185}
2186
2187static void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2188{
2189 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2190 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2191}
2192
2193static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2194{
2195 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2196 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2197}
2198
2199static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2200{
2201 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2202 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2203}
2204
2205static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2206{
2207 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2208 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2209}
2210
2211static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2212{
2213 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2214 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2215}
2216
2217static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2218{
2219 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2220 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2221}
2222
2223static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2224{
2225 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2226 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2227}
2228
2229static void emit_teq(int rs, int rt)
2230{
2231 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2232 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2233}
2234
2235static void emit_rsbimm(int rs, int imm, int rt)
2236{
2237 u_int armval;
2238 genimm_checked(imm,&armval);
2239 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2240 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2241}
2242
2243// Load 2 immediates optimizing for small code size
2244static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2245{
2246 emit_movimm(imm1,rt1);
2247 u_int armval;
2248 if(genimm(imm2-imm1,&armval)) {
2249 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2250 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2251 }else if(genimm(imm1-imm2,&armval)) {
2252 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2253 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2254 }
2255 else emit_movimm(imm2,rt2);
2256}
2257
2258// Conditionally select one of two immediates, optimizing for small code size
2259// This will only be called if HAVE_CMOV_IMM is defined
2260static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2261{
2262 u_int armval;
2263 if(genimm(imm2-imm1,&armval)) {
2264 emit_movimm(imm1,rt);
2265 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2266 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2267 }else if(genimm(imm1-imm2,&armval)) {
2268 emit_movimm(imm1,rt);
2269 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2270 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2271 }
2272 else {
2273 #ifndef HAVE_ARMV7
2274 emit_movimm(imm1,rt);
2275 add_literal((int)out,imm2);
2276 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2277 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2278 #else
2279 emit_movw(imm1&0x0000FFFF,rt);
2280 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2281 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2282 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2283 }
2284 emit_movt(imm1&0xFFFF0000,rt);
2285 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2286 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2287 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2288 }
2289 #endif
2290 }
2291}
2292
2293// special case for checking invalid_code
2294static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2295{
2296 assert(imm<128&&imm>=0);
2297 assert(r>=0&&r<16);
2298 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2299 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2300 emit_cmpimm(HOST_TEMPREG,imm);
2301}
2302
2303static void emit_callne(int a)
2304{
2305 assem_debug("blne %x\n",a);
2306 u_int offset=genjmp(a);
2307 output_w32(0x1b000000|offset);
2308}
2309
2310// Used to preload hash table entries
2311static unused void emit_prefetchreg(int r)
2312{
2313 assem_debug("pld %s\n",regname[r]);
2314 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2315}
2316
2317// Special case for mini_ht
2318static void emit_ldreq_indexed(int rs, u_int offset, int rt)
2319{
2320 assert(offset<4096);
2321 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2322 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2323}
2324
2325static unused void emit_bicne_imm(int rs,int imm,int rt)
2326{
2327 u_int armval;
2328 genimm_checked(imm,&armval);
2329 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2330 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2331}
2332
2333static unused void emit_biccs_imm(int rs,int imm,int rt)
2334{
2335 u_int armval;
2336 genimm_checked(imm,&armval);
2337 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2338 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2339}
2340
2341static unused void emit_bicvc_imm(int rs,int imm,int rt)
2342{
2343 u_int armval;
2344 genimm_checked(imm,&armval);
2345 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2346 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2347}
2348
2349static unused void emit_bichi_imm(int rs,int imm,int rt)
2350{
2351 u_int armval;
2352 genimm_checked(imm,&armval);
2353 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2354 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2355}
2356
2357static unused void emit_orrvs_imm(int rs,int imm,int rt)
2358{
2359 u_int armval;
2360 genimm_checked(imm,&armval);
2361 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2362 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2363}
2364
2365static void emit_orrne_imm(int rs,int imm,int rt)
2366{
2367 u_int armval;
2368 genimm_checked(imm,&armval);
2369 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2370 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2371}
2372
2373static void emit_andne_imm(int rs,int imm,int rt)
2374{
2375 u_int armval;
2376 genimm_checked(imm,&armval);
2377 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2378 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2379}
2380
2381static unused void emit_addpl_imm(int rs,int imm,int rt)
2382{
2383 u_int armval;
2384 genimm_checked(imm,&armval);
2385 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2386 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2387}
2388
2389static void emit_jno_unlikely(int a)
2390{
2391 //emit_jno(a);
2392 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2393 output_w32(0x72800000|rd_rn_rm(15,15,0));
2394}
2395
2396static void save_regs_all(u_int reglist)
2397{
2398 int i;
2399 if(!reglist) return;
2400 assem_debug("stmia fp,{");
2401 for(i=0;i<16;i++)
2402 if(reglist&(1<<i))
2403 assem_debug("r%d,",i);
2404 assem_debug("}\n");
2405 output_w32(0xe88b0000|reglist);
2406}
2407
2408static void restore_regs_all(u_int reglist)
2409{
2410 int i;
2411 if(!reglist) return;
2412 assem_debug("ldmia fp,{");
2413 for(i=0;i<16;i++)
2414 if(reglist&(1<<i))
2415 assem_debug("r%d,",i);
2416 assem_debug("}\n");
2417 output_w32(0xe89b0000|reglist);
2418}
2419
2420// Save registers before function call
2421static void save_regs(u_int reglist)
2422{
2423 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
2424 save_regs_all(reglist);
2425}
2426
2427// Restore registers after function call
2428static void restore_regs(u_int reglist)
2429{
2430 reglist&=CALLER_SAVE_REGS;
2431 restore_regs_all(reglist);
2432}
2433
2434/* Stubs/epilogue */
2435
2436static void literal_pool(int n)
2437{
2438 if(!literalcount) return;
2439 if(n) {
2440 if((int)out-literals[0][0]<4096-n) return;
2441 }
2442 u_int *ptr;
2443 int i;
2444 for(i=0;i<literalcount;i++)
2445 {
2446 u_int l_addr=(u_int)out;
2447 int j;
2448 for(j=0;j<i;j++) {
2449 if(literals[j][1]==literals[i][1]) {
2450 //printf("dup %08x\n",literals[i][1]);
2451 l_addr=literals[j][0];
2452 break;
2453 }
2454 }
2455 ptr=(u_int *)literals[i][0];
2456 u_int offset=l_addr-(u_int)ptr-8;
2457 assert(offset<4096);
2458 assert(!(offset&3));
2459 *ptr|=offset;
2460 if(l_addr==(u_int)out) {
2461 literals[i][0]=l_addr; // remember for dupes
2462 output_w32(literals[i][1]);
2463 }
2464 }
2465 literalcount=0;
2466}
2467
2468static void literal_pool_jumpover(int n)
2469{
2470 if(!literalcount) return;
2471 if(n) {
2472 if((int)out-literals[0][0]<4096-n) return;
2473 }
2474 void *jaddr = out;
2475 emit_jmp(0);
2476 literal_pool(0);
2477 set_jump_target(jaddr, out);
2478}
2479
2480static void emit_extjump2(u_int addr, int target, void *linker)
2481{
2482 u_char *ptr=(u_char *)addr;
2483 assert((ptr[3]&0x0e)==0xa);
2484 (void)ptr;
2485
2486 emit_loadlp(target,0);
2487 emit_loadlp(addr,1);
2488 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
2489 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2490//DEBUG >
2491#ifdef DEBUG_CYCLE_COUNT
2492 emit_readword((int)&last_count,ECX);
2493 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2494 emit_readword((int)&next_interupt,ECX);
2495 emit_writeword(HOST_CCREG,(int)&Count);
2496 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2497 emit_writeword(ECX,(int)&last_count);
2498#endif
2499//DEBUG <
2500 emit_jmp(linker);
2501}
2502
2503static void emit_extjump(int addr, int target)
2504{
2505 emit_extjump2(addr, target, dyna_linker);
2506}
2507
2508static void emit_extjump_ds(int addr, int target)
2509{
2510 emit_extjump2(addr, target, dyna_linker_ds);
2511}
2512
2513// put rt_val into rt, potentially making use of rs with value rs_val
2514static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2515{
2516 u_int armval;
2517 int diff;
2518 if(genimm(rt_val,&armval)) {
2519 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2520 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2521 return;
2522 }
2523 if(genimm(~rt_val,&armval)) {
2524 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2525 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2526 return;
2527 }
2528 diff=rt_val-rs_val;
2529 if(genimm(diff,&armval)) {
2530 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2531 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2532 return;
2533 }else if(genimm(-diff,&armval)) {
2534 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2535 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2536 return;
2537 }
2538 emit_movimm(rt_val,rt);
2539}
2540
2541// return 1 if above function can do it's job cheaply
2542static int is_similar_value(u_int v1,u_int v2)
2543{
2544 u_int xs;
2545 int diff;
2546 if(v1==v2) return 1;
2547 diff=v2-v1;
2548 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
2549 ;
2550 if(xs<0x100) return 1;
2551 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2552 ;
2553 if(xs<0x100) return 1;
2554 return 0;
2555}
2556
2557// trashes r2
2558static void pass_args(int a0, int a1)
2559{
2560 if(a0==1&&a1==0) {
2561 // must swap
2562 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2563 }
2564 else if(a0!=0&&a1==0) {
2565 emit_mov(a1,1);
2566 if (a0>=0) emit_mov(a0,0);
2567 }
2568 else {
2569 if(a0>=0&&a0!=0) emit_mov(a0,0);
2570 if(a1>=0&&a1!=1) emit_mov(a1,1);
2571 }
2572}
2573
2574static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
2575{
2576 switch(type) {
2577 case LOADB_STUB: emit_signextend8(rs,rt); break;
2578 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2579 case LOADH_STUB: emit_signextend16(rs,rt); break;
2580 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2581 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2582 default: assert(0);
2583 }
2584}
2585
2586#include "pcsxmem.h"
2587#include "pcsxmem_inline.c"
2588
2589static void do_readstub(int n)
2590{
2591 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
2592 literal_pool(256);
2593 set_jump_target(stubs[n].addr, out);
2594 enum stub_type type=stubs[n].type;
2595 int i=stubs[n].a;
2596 int rs=stubs[n].b;
2597 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2598 u_int reglist=stubs[n].e;
2599 signed char *i_regmap=i_regs->regmap;
2600 int rt;
2601 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
2602 rt=get_reg(i_regmap,FTEMP);
2603 }else{
2604 rt=get_reg(i_regmap,rt1[i]);
2605 }
2606 assert(rs>=0);
2607 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
2608 void *restore_jump = NULL;
2609 reglist|=(1<<rs);
2610 for(r=0;r<=12;r++) {
2611 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2612 temp=r; break;
2613 }
2614 }
2615 if(rt>=0&&rt1[i]!=0)
2616 reglist&=~(1<<rt);
2617 if(temp==-1) {
2618 save_regs(reglist);
2619 regs_saved=1;
2620 temp=(rs==0)?2:0;
2621 }
2622 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2623 temp2=1;
2624 emit_readword((int)&mem_rtab,temp);
2625 emit_shrimm(rs,12,temp2);
2626 emit_readword_dualindexedx4(temp,temp2,temp2);
2627 emit_lsls_imm(temp2,1,temp2);
2628 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2629 switch(type) {
2630 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2631 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2632 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2633 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2634 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2635 default: assert(0);
2636 }
2637 }
2638 if(regs_saved) {
2639 restore_jump=out;
2640 emit_jcc(0); // jump to reg restore
2641 }
2642 else
2643 emit_jcc(stubs[n].retaddr); // return address
2644
2645 if(!regs_saved)
2646 save_regs(reglist);
2647 int handler=0;
2648 if(type==LOADB_STUB||type==LOADBU_STUB)
2649 handler=(int)jump_handler_read8;
2650 if(type==LOADH_STUB||type==LOADHU_STUB)
2651 handler=(int)jump_handler_read16;
2652 if(type==LOADW_STUB)
2653 handler=(int)jump_handler_read32;
2654 assert(handler!=0);
2655 pass_args(rs,temp2);
2656 int cc=get_reg(i_regmap,CCREG);
2657 if(cc<0)
2658 emit_loadreg(CCREG,2);
2659 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
2660 emit_call(handler);
2661 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2662 mov_loadtype_adj(type,0,rt);
2663 }
2664 if(restore_jump)
2665 set_jump_target(restore_jump, out);
2666 restore_regs(reglist);
2667 emit_jmp(stubs[n].retaddr); // return address
2668}
2669
2670// return memhandler, or get directly accessable address and return 0
2671static u_int get_direct_memhandler(void *table,u_int addr,enum stub_type type,u_int *addr_host)
2672{
2673 u_int l1,l2=0;
2674 l1=((u_int *)table)[addr>>12];
2675 if((l1&(1<<31))==0) {
2676 u_int v=l1<<1;
2677 *addr_host=v+addr;
2678 return 0;
2679 }
2680 else {
2681 l1<<=1;
2682 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2683 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
2684 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
2685 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2686 else
2687 l2=((u_int *)l1)[(addr&0xfff)/4];
2688 if((l2&(1<<31))==0) {
2689 u_int v=l2<<1;
2690 *addr_host=v+(addr&0xfff);
2691 return 0;
2692 }
2693 return l2<<1;
2694 }
2695}
2696
2697static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2698{
2699 int rs=get_reg(regmap,target);
2700 int rt=get_reg(regmap,target);
2701 if(rs<0) rs=get_reg(regmap,-1);
2702 assert(rs>=0);
2703 u_int handler,host_addr=0,is_dynamic,far_call=0;
2704 int cc=get_reg(regmap,CCREG);
2705 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2706 return;
2707 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2708 if (handler==0) {
2709 if(rt<0||rt1[i]==0)
2710 return;
2711 if(addr!=host_addr)
2712 emit_movimm_from(addr,rs,host_addr,rs);
2713 switch(type) {
2714 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2715 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2716 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2717 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2718 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2719 default: assert(0);
2720 }
2721 return;
2722 }
2723 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2724 if(is_dynamic) {
2725 if(type==LOADB_STUB||type==LOADBU_STUB)
2726 handler=(int)jump_handler_read8;
2727 if(type==LOADH_STUB||type==LOADHU_STUB)
2728 handler=(int)jump_handler_read16;
2729 if(type==LOADW_STUB)
2730 handler=(int)jump_handler_read32;
2731 }
2732
2733 // call a memhandler
2734 if(rt>=0&&rt1[i]!=0)
2735 reglist&=~(1<<rt);
2736 save_regs(reglist);
2737 if(target==0)
2738 emit_movimm(addr,0);
2739 else if(rs!=0)
2740 emit_mov(rs,0);
2741 int offset=(int)handler-(int)out-8;
2742 if(offset<-33554432||offset>=33554432) {
2743 // unreachable memhandler, a plugin func perhaps
2744 emit_movimm(handler,12);
2745 far_call=1;
2746 }
2747 if(cc<0)
2748 emit_loadreg(CCREG,2);
2749 if(is_dynamic) {
2750 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2751 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2752 }
2753 else {
2754 emit_readword((int)&last_count,3);
2755 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2756 emit_add(2,3,2);
2757 emit_writeword(2,(int)&Count);
2758 }
2759
2760 if(far_call)
2761 emit_callreg(12);
2762 else
2763 emit_call(handler);
2764
2765 if(rt>=0&&rt1[i]!=0) {
2766 switch(type) {
2767 case LOADB_STUB: emit_signextend8(0,rt); break;
2768 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2769 case LOADH_STUB: emit_signextend16(0,rt); break;
2770 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2771 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2772 default: assert(0);
2773 }
2774 }
2775 restore_regs(reglist);
2776}
2777
2778static void do_writestub(int n)
2779{
2780 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
2781 literal_pool(256);
2782 set_jump_target(stubs[n].addr, out);
2783 enum stub_type type=stubs[n].type;
2784 int i=stubs[n].a;
2785 int rs=stubs[n].b;
2786 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2787 u_int reglist=stubs[n].e;
2788 signed char *i_regmap=i_regs->regmap;
2789 int rt,r;
2790 if(itype[i]==C1LS||itype[i]==C2LS) {
2791 rt=get_reg(i_regmap,r=FTEMP);
2792 }else{
2793 rt=get_reg(i_regmap,r=rs2[i]);
2794 }
2795 assert(rs>=0);
2796 assert(rt>=0);
2797 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
2798 void *restore_jump = NULL;
2799 int reglist2=reglist|(1<<rs)|(1<<rt);
2800 for(rtmp=0;rtmp<=12;rtmp++) {
2801 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2802 temp=rtmp; break;
2803 }
2804 }
2805 if(temp==-1) {
2806 save_regs(reglist);
2807 regs_saved=1;
2808 for(rtmp=0;rtmp<=3;rtmp++)
2809 if(rtmp!=rs&&rtmp!=rt)
2810 {temp=rtmp;break;}
2811 }
2812 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2813 temp2=3;
2814 emit_readword((int)&mem_wtab,temp);
2815 emit_shrimm(rs,12,temp2);
2816 emit_readword_dualindexedx4(temp,temp2,temp2);
2817 emit_lsls_imm(temp2,1,temp2);
2818 switch(type) {
2819 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2820 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2821 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2822 default: assert(0);
2823 }
2824 if(regs_saved) {
2825 restore_jump=out;
2826 emit_jcc(0); // jump to reg restore
2827 }
2828 else
2829 emit_jcc(stubs[n].retaddr); // return address (invcode check)
2830
2831 if(!regs_saved)
2832 save_regs(reglist);
2833 int handler=0;
2834 switch(type) {
2835 case STOREB_STUB: handler=(int)jump_handler_write8; break;
2836 case STOREH_STUB: handler=(int)jump_handler_write16; break;
2837 case STOREW_STUB: handler=(int)jump_handler_write32; break;
2838 default: assert(0);
2839 }
2840 assert(handler!=0);
2841 pass_args(rs,rt);
2842 if(temp2!=3)
2843 emit_mov(temp2,3);
2844 int cc=get_reg(i_regmap,CCREG);
2845 if(cc<0)
2846 emit_loadreg(CCREG,2);
2847 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
2848 // returns new cycle_count
2849 emit_call(handler);
2850 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
2851 if(cc<0)
2852 emit_storereg(CCREG,2);
2853 if(restore_jump)
2854 set_jump_target(restore_jump, out);
2855 restore_regs(reglist);
2856 emit_jmp(stubs[n].retaddr);
2857}
2858
2859static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2860{
2861 int rs=get_reg(regmap,-1);
2862 int rt=get_reg(regmap,target);
2863 assert(rs>=0);
2864 assert(rt>=0);
2865 u_int handler,host_addr=0;
2866 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
2867 if (handler==0) {
2868 if(addr!=host_addr)
2869 emit_movimm_from(addr,rs,host_addr,rs);
2870 switch(type) {
2871 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2872 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2873 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2874 default: assert(0);
2875 }
2876 return;
2877 }
2878
2879 // call a memhandler
2880 save_regs(reglist);
2881 pass_args(rs,rt);
2882 int cc=get_reg(regmap,CCREG);
2883 if(cc<0)
2884 emit_loadreg(CCREG,2);
2885 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2886 emit_movimm(handler,3);
2887 // returns new cycle_count
2888 emit_call((int)jump_handler_write_h);
2889 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
2890 if(cc<0)
2891 emit_storereg(CCREG,2);
2892 restore_regs(reglist);
2893}
2894
2895static void do_unalignedwritestub(int n)
2896{
2897 assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
2898 literal_pool(256);
2899 set_jump_target(stubs[n].addr, out);
2900
2901 int i=stubs[n].a;
2902 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2903 int addr=stubs[n].b;
2904 u_int reglist=stubs[n].e;
2905 signed char *i_regmap=i_regs->regmap;
2906 int temp2=get_reg(i_regmap,FTEMP);
2907 int rt;
2908 rt=get_reg(i_regmap,rs2[i]);
2909 assert(rt>=0);
2910 assert(addr>=0);
2911 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2912 reglist|=(1<<addr);
2913 reglist&=~(1<<temp2);
2914
2915#if 1
2916 // don't bother with it and call write handler
2917 save_regs(reglist);
2918 pass_args(addr,rt);
2919 int cc=get_reg(i_regmap,CCREG);
2920 if(cc<0)
2921 emit_loadreg(CCREG,2);
2922 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
2923 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2924 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
2925 if(cc<0)
2926 emit_storereg(CCREG,2);
2927 restore_regs(reglist);
2928 emit_jmp(stubs[n].retaddr); // return address
2929#else
2930 emit_andimm(addr,0xfffffffc,temp2);
2931 emit_writeword(temp2,(int)&address);
2932
2933 save_regs(reglist);
2934 emit_shrimm(addr,16,1);
2935 int cc=get_reg(i_regmap,CCREG);
2936 if(cc<0) {
2937 emit_loadreg(CCREG,2);
2938 }
2939 emit_movimm((u_int)readmem,0);
2940 emit_addimm(cc<0?2:cc,2*stubs[n].d+2,2);
2941 emit_call((int)&indirect_jump_indexed);
2942 restore_regs(reglist);
2943
2944 emit_readword((int)&readmem_dword,temp2);
2945 int temp=addr; //hmh
2946 emit_shlimm(addr,3,temp);
2947 emit_andimm(temp,24,temp);
2948#ifdef BIG_ENDIAN_MIPS
2949 if (opcode[i]==0x2e) // SWR
2950#else
2951 if (opcode[i]==0x2a) // SWL
2952#endif
2953 emit_xorimm(temp,24,temp);
2954 emit_movimm(-1,HOST_TEMPREG);
2955 if (opcode[i]==0x2a) { // SWL
2956 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2957 emit_orrshr(rt,temp,temp2);
2958 }else{
2959 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2960 emit_orrshl(rt,temp,temp2);
2961 }
2962 emit_readword((int)&address,addr);
2963 emit_writeword(temp2,(int)&word);
2964 //save_regs(reglist); // don't need to, no state changes
2965 emit_shrimm(addr,16,1);
2966 emit_movimm((u_int)writemem,0);
2967 //emit_call((int)&indirect_jump_indexed);
2968 emit_mov(15,14);
2969 emit_readword_dualindexedx4(0,1,15);
2970 emit_readword((int)&Count,HOST_TEMPREG);
2971 emit_readword((int)&next_interupt,2);
2972 emit_addimm(HOST_TEMPREG,-2*stubs[n].d-2,HOST_TEMPREG);
2973 emit_writeword(2,(int)&last_count);
2974 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2975 if(cc<0) {
2976 emit_storereg(CCREG,HOST_TEMPREG);
2977 }
2978 restore_regs(reglist);
2979 emit_jmp(stubs[n].retaddr); // return address
2980#endif
2981}
2982
2983static void do_invstub(int n)
2984{
2985 literal_pool(20);
2986 u_int reglist=stubs[n].a;
2987 set_jump_target(stubs[n].addr, out);
2988 save_regs(reglist);
2989 if(stubs[n].b!=0) emit_mov(stubs[n].b,0);
2990 emit_call((int)&invalidate_addr);
2991 restore_regs(reglist);
2992 emit_jmp(stubs[n].retaddr); // return address
2993}
2994
2995void *do_dirty_stub(int i)
2996{
2997 assem_debug("do_dirty_stub %x\n",start+i*4);
2998 u_int addr=(u_int)source;
2999 // Careful about the code output here, verify_dirty needs to parse it.
3000 #ifndef HAVE_ARMV7
3001 emit_loadlp(addr,1);
3002 emit_loadlp((int)copy,2);
3003 emit_loadlp(slen*4,3);
3004 #else
3005 emit_movw(addr&0x0000FFFF,1);
3006 emit_movw(((u_int)copy)&0x0000FFFF,2);
3007 emit_movt(addr&0xFFFF0000,1);
3008 emit_movt(((u_int)copy)&0xFFFF0000,2);
3009 emit_movw(slen*4,3);
3010 #endif
3011 emit_movimm(start+i*4,0);
3012 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3013 void *entry = out;
3014 load_regs_entry(i);
3015 if (entry == out)
3016 entry = instr_addr[i];
3017 emit_jmp(instr_addr[i]);
3018 return entry;
3019}
3020
3021static void do_dirty_stub_ds()
3022{
3023 // Careful about the code output here, verify_dirty needs to parse it.
3024 #ifndef HAVE_ARMV7
3025 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3026 emit_loadlp((int)copy,2);
3027 emit_loadlp(slen*4,3);
3028 #else
3029 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3030 emit_movw(((u_int)copy)&0x0000FFFF,2);
3031 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3032 emit_movt(((u_int)copy)&0xFFFF0000,2);
3033 emit_movw(slen*4,3);
3034 #endif
3035 emit_movimm(start+1,0);
3036 emit_call((int)&verify_code_ds);
3037}
3038
3039static void do_cop1stub(int n)
3040{
3041 literal_pool(256);
3042 assem_debug("do_cop1stub %x\n",start+stubs[n].a*4);
3043 set_jump_target(stubs[n].addr, out);
3044 int i=stubs[n].a;
3045// int rs=stubs[n].b;
3046 struct regstat *i_regs=(struct regstat *)stubs[n].c;
3047 int ds=stubs[n].d;
3048 if(!ds) {
3049 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3050 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3051 }
3052 //else {printf("fp exception in delay slot\n");}
3053 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3054 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3055 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3056 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3057 emit_jmp(ds?fp_exception_ds:fp_exception);
3058}
3059
3060/* Special assem */
3061
3062static void shift_assemble_arm(int i,struct regstat *i_regs)
3063{
3064 if(rt1[i]) {
3065 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3066 {
3067 signed char s,t,shift;
3068 t=get_reg(i_regs->regmap,rt1[i]);
3069 s=get_reg(i_regs->regmap,rs1[i]);
3070 shift=get_reg(i_regs->regmap,rs2[i]);
3071 if(t>=0){
3072 if(rs1[i]==0)
3073 {
3074 emit_zeroreg(t);
3075 }
3076 else if(rs2[i]==0)
3077 {
3078 assert(s>=0);
3079 if(s!=t) emit_mov(s,t);
3080 }
3081 else
3082 {
3083 emit_andimm(shift,31,HOST_TEMPREG);
3084 if(opcode2[i]==4) // SLLV
3085 {
3086 emit_shl(s,HOST_TEMPREG,t);
3087 }
3088 if(opcode2[i]==6) // SRLV
3089 {
3090 emit_shr(s,HOST_TEMPREG,t);
3091 }
3092 if(opcode2[i]==7) // SRAV
3093 {
3094 emit_sar(s,HOST_TEMPREG,t);
3095 }
3096 }
3097 }
3098 } else { // DSLLV/DSRLV/DSRAV
3099 signed char sh,sl,th,tl,shift;
3100 th=get_reg(i_regs->regmap,rt1[i]|64);
3101 tl=get_reg(i_regs->regmap,rt1[i]);
3102 sh=get_reg(i_regs->regmap,rs1[i]|64);
3103 sl=get_reg(i_regs->regmap,rs1[i]);
3104 shift=get_reg(i_regs->regmap,rs2[i]);
3105 if(tl>=0){
3106 if(rs1[i]==0)
3107 {
3108 emit_zeroreg(tl);
3109 if(th>=0) emit_zeroreg(th);
3110 }
3111 else if(rs2[i]==0)
3112 {
3113 assert(sl>=0);
3114 if(sl!=tl) emit_mov(sl,tl);
3115 if(th>=0&&sh!=th) emit_mov(sh,th);
3116 }
3117 else
3118 {
3119 // FIXME: What if shift==tl ?
3120 assert(shift!=tl);
3121 int temp=get_reg(i_regs->regmap,-1);
3122 int real_th=th;
3123 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3124 assert(sl>=0);
3125 assert(sh>=0);
3126 emit_andimm(shift,31,HOST_TEMPREG);
3127 if(opcode2[i]==0x14) // DSLLV
3128 {
3129 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3130 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3131 emit_orrshr(sl,HOST_TEMPREG,th);
3132 emit_andimm(shift,31,HOST_TEMPREG);
3133 emit_testimm(shift,32);
3134 emit_shl(sl,HOST_TEMPREG,tl);
3135 if(th>=0) emit_cmovne_reg(tl,th);
3136 emit_cmovne_imm(0,tl);
3137 }
3138 if(opcode2[i]==0x16) // DSRLV
3139 {
3140 assert(th>=0);
3141 emit_shr(sl,HOST_TEMPREG,tl);
3142 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3143 emit_orrshl(sh,HOST_TEMPREG,tl);
3144 emit_andimm(shift,31,HOST_TEMPREG);
3145 emit_testimm(shift,32);
3146 emit_shr(sh,HOST_TEMPREG,th);
3147 emit_cmovne_reg(th,tl);
3148 if(real_th>=0) emit_cmovne_imm(0,th);
3149 }
3150 if(opcode2[i]==0x17) // DSRAV
3151 {
3152 assert(th>=0);
3153 emit_shr(sl,HOST_TEMPREG,tl);
3154 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3155 if(real_th>=0) {
3156 assert(temp>=0);
3157 emit_sarimm(th,31,temp);
3158 }
3159 emit_orrshl(sh,HOST_TEMPREG,tl);
3160 emit_andimm(shift,31,HOST_TEMPREG);
3161 emit_testimm(shift,32);
3162 emit_sar(sh,HOST_TEMPREG,th);
3163 emit_cmovne_reg(th,tl);
3164 if(real_th>=0) emit_cmovne_reg(temp,th);
3165 }
3166 }
3167 }
3168 }
3169 }
3170}
3171
3172static void speculate_mov(int rs,int rt)
3173{
3174 if(rt!=0) {
3175 smrv_strong_next|=1<<rt;
3176 smrv[rt]=smrv[rs];
3177 }
3178}
3179
3180static void speculate_mov_weak(int rs,int rt)
3181{
3182 if(rt!=0) {
3183 smrv_weak_next|=1<<rt;
3184 smrv[rt]=smrv[rs];
3185 }
3186}
3187
3188static void speculate_register_values(int i)
3189{
3190 if(i==0) {
3191 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3192 // gp,sp are likely to stay the same throughout the block
3193 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3194 smrv_weak_next=~smrv_strong_next;
3195 //printf(" llr %08x\n", smrv[4]);
3196 }
3197 smrv_strong=smrv_strong_next;
3198 smrv_weak=smrv_weak_next;
3199 switch(itype[i]) {
3200 case ALU:
3201 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3202 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3203 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3204 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3205 else {
3206 smrv_strong_next&=~(1<<rt1[i]);
3207 smrv_weak_next&=~(1<<rt1[i]);
3208 }
3209 break;
3210 case SHIFTIMM:
3211 smrv_strong_next&=~(1<<rt1[i]);
3212 smrv_weak_next&=~(1<<rt1[i]);
3213 // fallthrough
3214 case IMM16:
3215 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3216 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3217 if(hr>=0) {
3218 if(get_final_value(hr,i,&value))
3219 smrv[rt1[i]]=value;
3220 else smrv[rt1[i]]=constmap[i][hr];
3221 smrv_strong_next|=1<<rt1[i];
3222 }
3223 }
3224 else {
3225 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3226 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3227 }
3228 break;
3229 case LOAD:
3230 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3231 // special case for BIOS
3232 smrv[rt1[i]]=0xa0000000;
3233 smrv_strong_next|=1<<rt1[i];
3234 break;
3235 }
3236 // fallthrough
3237 case SHIFT:
3238 case LOADLR:
3239 case MOV:
3240 smrv_strong_next&=~(1<<rt1[i]);
3241 smrv_weak_next&=~(1<<rt1[i]);
3242 break;
3243 case COP0:
3244 case COP2:
3245 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3246 smrv_strong_next&=~(1<<rt1[i]);
3247 smrv_weak_next&=~(1<<rt1[i]);
3248 }
3249 break;
3250 case C2LS:
3251 if (opcode[i]==0x32) { // LWC2
3252 smrv_strong_next&=~(1<<rt1[i]);
3253 smrv_weak_next&=~(1<<rt1[i]);
3254 }
3255 break;
3256 }
3257#if 0
3258 int r=4;
3259 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3260 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3261#endif
3262}
3263
3264enum {
3265 MTYPE_8000 = 0,
3266 MTYPE_8020,
3267 MTYPE_0000,
3268 MTYPE_A000,
3269 MTYPE_1F80,
3270};
3271
3272static int get_ptr_mem_type(u_int a)
3273{
3274 if(a < 0x00200000) {
3275 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3276 // return wrong, must use memhandler for BIOS self-test to pass
3277 // 007 does similar stuff from a00 mirror, weird stuff
3278 return MTYPE_8000;
3279 return MTYPE_0000;
3280 }
3281 if(0x1f800000 <= a && a < 0x1f801000)
3282 return MTYPE_1F80;
3283 if(0x80200000 <= a && a < 0x80800000)
3284 return MTYPE_8020;
3285 if(0xa0000000 <= a && a < 0xa0200000)
3286 return MTYPE_A000;
3287 return MTYPE_8000;
3288}
3289
3290static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3291{
3292 void *jaddr = NULL;
3293 int type=0;
3294 int mr=rs1[i];
3295 if(((smrv_strong|smrv_weak)>>mr)&1) {
3296 type=get_ptr_mem_type(smrv[mr]);
3297 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3298 }
3299 else {
3300 // use the mirror we are running on
3301 type=get_ptr_mem_type(start);
3302 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3303 }
3304
3305 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3306 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3307 addr=*addr_reg_override=HOST_TEMPREG;
3308 type=0;
3309 }
3310 else if(type==MTYPE_0000) { // RAM 0 mirror
3311 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3312 addr=*addr_reg_override=HOST_TEMPREG;
3313 type=0;
3314 }
3315 else if(type==MTYPE_A000) { // RAM A mirror
3316 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3317 addr=*addr_reg_override=HOST_TEMPREG;
3318 type=0;
3319 }
3320 else if(type==MTYPE_1F80) { // scratchpad
3321 if (psxH == (void *)0x1f800000) {
3322 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3323 emit_cmpimm(HOST_TEMPREG,0x1000);
3324 jaddr=out;
3325 emit_jc(0);
3326 }
3327 else {
3328 // do usual RAM check, jump will go to the right handler
3329 type=0;
3330 }
3331 }
3332
3333 if(type==0)
3334 {
3335 emit_cmpimm(addr,RAM_SIZE);
3336 jaddr=out;
3337 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3338 // Hint to branch predictor that the branch is unlikely to be taken
3339 if(rs1[i]>=28)
3340 emit_jno_unlikely(0);
3341 else
3342 #endif
3343 emit_jno(0);
3344 if(ram_offset!=0) {
3345 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3346 addr=*addr_reg_override=HOST_TEMPREG;
3347 }
3348 }
3349
3350 return jaddr;
3351}
3352
3353#define shift_assemble shift_assemble_arm
3354
3355static void loadlr_assemble_arm(int i,struct regstat *i_regs)
3356{
3357 int s,th,tl,temp,temp2,addr,map=-1;
3358 int offset;
3359 void *jaddr=0;
3360 int memtarget=0,c=0;
3361 int fastload_reg_override=0;
3362 u_int hr,reglist=0;
3363 th=get_reg(i_regs->regmap,rt1[i]|64);
3364 tl=get_reg(i_regs->regmap,rt1[i]);
3365 s=get_reg(i_regs->regmap,rs1[i]);
3366 temp=get_reg(i_regs->regmap,-1);
3367 temp2=get_reg(i_regs->regmap,FTEMP);
3368 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3369 assert(addr<0);
3370 offset=imm[i];
3371 for(hr=0;hr<HOST_REGS;hr++) {
3372 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3373 }
3374 reglist|=1<<temp;
3375 if(offset||s<0||c) addr=temp2;
3376 else addr=s;
3377 if(s>=0) {
3378 c=(i_regs->wasconst>>s)&1;
3379 if(c) {
3380 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3381 }
3382 }
3383 if(!c) {
3384 #ifdef RAM_OFFSET
3385 map=get_reg(i_regs->regmap,ROREG);
3386 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3387 #endif
3388 emit_shlimm(addr,3,temp);
3389 if (opcode[i]==0x22||opcode[i]==0x26) {
3390 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3391 }else{
3392 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3393 }
3394 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3395 }
3396 else {
3397 if(ram_offset&&memtarget) {
3398 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
3399 fastload_reg_override=HOST_TEMPREG;
3400 }
3401 if (opcode[i]==0x22||opcode[i]==0x26) {
3402 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3403 }else{
3404 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3405 }
3406 }
3407 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3408 if(!c||memtarget) {
3409 int a=temp2;
3410 if(fastload_reg_override) a=fastload_reg_override;
3411 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3412 emit_readword_indexed_tlb(0,a,map,temp2);
3413 if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
3414 }
3415 else
3416 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3417 if(rt1[i]) {
3418 assert(tl>=0);
3419 emit_andimm(temp,24,temp);
3420#ifdef BIG_ENDIAN_MIPS
3421 if (opcode[i]==0x26) // LWR
3422#else
3423 if (opcode[i]==0x22) // LWL
3424#endif
3425 emit_xorimm(temp,24,temp);
3426 emit_movimm(-1,HOST_TEMPREG);
3427 if (opcode[i]==0x26) {
3428 emit_shr(temp2,temp,temp2);
3429 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3430 }else{
3431 emit_shl(temp2,temp,temp2);
3432 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3433 }
3434 emit_or(temp2,tl,tl);
3435 }
3436 //emit_storereg(rt1[i],tl); // DEBUG
3437 }
3438 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3439 // FIXME: little endian, fastload_reg_override
3440 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3441 if(!c||memtarget) {
3442 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3443 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3444 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3445 if(jaddr) add_stub_r(LOADD_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
3446 }
3447 else
3448 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3449 if(rt1[i]) {
3450 assert(th>=0);
3451 assert(tl>=0);
3452 emit_testimm(temp,32);
3453 emit_andimm(temp,24,temp);
3454 if (opcode[i]==0x1A) { // LDL
3455 emit_rsbimm(temp,32,HOST_TEMPREG);
3456 emit_shl(temp2h,temp,temp2h);
3457 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3458 emit_movimm(-1,HOST_TEMPREG);
3459 emit_shl(temp2,temp,temp2);
3460 emit_cmove_reg(temp2h,th);
3461 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3462 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3463 emit_orreq(temp2,tl,tl);
3464 emit_orrne(temp2,th,th);
3465 }
3466 if (opcode[i]==0x1B) { // LDR
3467 emit_xorimm(temp,24,temp);
3468 emit_rsbimm(temp,32,HOST_TEMPREG);
3469 emit_shr(temp2,temp,temp2);
3470 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3471 emit_movimm(-1,HOST_TEMPREG);
3472 emit_shr(temp2h,temp,temp2h);
3473 emit_cmovne_reg(temp2,tl);
3474 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3475 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3476 emit_orrne(temp2h,th,th);
3477 emit_orreq(temp2h,tl,tl);
3478 }
3479 }
3480 }
3481}
3482#define loadlr_assemble loadlr_assemble_arm
3483
3484static void cop0_assemble(int i,struct regstat *i_regs)
3485{
3486 if(opcode2[i]==0) // MFC0
3487 {
3488 signed char t=get_reg(i_regs->regmap,rt1[i]);
3489 char copr=(source[i]>>11)&0x1f;
3490 //assert(t>=0); // Why does this happen? OOT is weird
3491 if(t>=0&&rt1[i]!=0) {
3492 emit_readword((int)&reg_cop0+copr*4,t);
3493 }
3494 }
3495 else if(opcode2[i]==4) // MTC0
3496 {
3497 signed char s=get_reg(i_regs->regmap,rs1[i]);
3498 char copr=(source[i]>>11)&0x1f;
3499 assert(s>=0);
3500 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3501 if(copr==9||copr==11||copr==12||copr==13) {
3502 emit_readword((int)&last_count,HOST_TEMPREG);
3503 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3504 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3505 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
3506 emit_writeword(HOST_CCREG,(int)&Count);
3507 }
3508 // What a mess. The status register (12) can enable interrupts,
3509 // so needs a special case to handle a pending interrupt.
3510 // The interrupt must be taken immediately, because a subsequent
3511 // instruction might disable interrupts again.
3512 if(copr==12||copr==13) {
3513 if (is_delayslot) {
3514 // burn cycles to cause cc_interrupt, which will
3515 // reschedule next_interupt. Relies on CCREG from above.
3516 assem_debug("MTC0 DS %d\n", copr);
3517 emit_writeword(HOST_CCREG,(int)&last_count);
3518 emit_movimm(0,HOST_CCREG);
3519 emit_storereg(CCREG,HOST_CCREG);
3520 emit_loadreg(rs1[i],1);
3521 emit_movimm(copr,0);
3522 emit_call((int)pcsx_mtc0_ds);
3523 emit_loadreg(rs1[i],s);
3524 return;
3525 }
3526 emit_movimm(start+i*4+4,HOST_TEMPREG);
3527 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
3528 emit_movimm(0,HOST_TEMPREG);
3529 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
3530 }
3531 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3532 //else
3533 if(s==HOST_CCREG)
3534 emit_loadreg(rs1[i],1);
3535 else if(s!=1)
3536 emit_mov(s,1);
3537 emit_movimm(copr,0);
3538 emit_call((int)pcsx_mtc0);
3539 if(copr==9||copr==11||copr==12||copr==13) {
3540 emit_readword((int)&Count,HOST_CCREG);
3541 emit_readword((int)&next_interupt,HOST_TEMPREG);
3542 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
3543 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3544 emit_writeword(HOST_TEMPREG,(int)&last_count);
3545 emit_storereg(CCREG,HOST_CCREG);
3546 }
3547 if(copr==12||copr==13) {
3548 assert(!is_delayslot);
3549 emit_readword((int)&pending_exception,14);
3550 emit_test(14,14);
3551 emit_jne((int)&do_interrupt);
3552 }
3553 emit_loadreg(rs1[i],s);
3554 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3555 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3556 cop1_usable=0;
3557 }
3558 else
3559 {
3560 assert(opcode2[i]==0x10);
3561 if((source[i]&0x3f)==0x10) // RFE
3562 {
3563 emit_readword((int)&Status,0);
3564 emit_andimm(0,0x3c,1);
3565 emit_andimm(0,~0xf,0);
3566 emit_orrshr_imm(1,2,0);
3567 emit_writeword(0,(int)&Status);
3568 }
3569 }
3570}
3571
3572static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3573{
3574 switch (copr) {
3575 case 1:
3576 case 3:
3577 case 5:
3578 case 8:
3579 case 9:
3580 case 10:
3581 case 11:
3582 emit_readword((int)&reg_cop2d[copr],tl);
3583 emit_signextend16(tl,tl);
3584 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3585 break;
3586 case 7:
3587 case 16:
3588 case 17:
3589 case 18:
3590 case 19:
3591 emit_readword((int)&reg_cop2d[copr],tl);
3592 emit_andimm(tl,0xffff,tl);
3593 emit_writeword(tl,(int)&reg_cop2d[copr]);
3594 break;
3595 case 15:
3596 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3597 emit_writeword(tl,(int)&reg_cop2d[copr]);
3598 break;
3599 case 28:
3600 case 29:
3601 emit_readword((int)&reg_cop2d[9],temp);
3602 emit_testimm(temp,0x8000); // do we need this?
3603 emit_andimm(temp,0xf80,temp);
3604 emit_andne_imm(temp,0,temp);
3605 emit_shrimm(temp,7,tl);
3606 emit_readword((int)&reg_cop2d[10],temp);
3607 emit_testimm(temp,0x8000);
3608 emit_andimm(temp,0xf80,temp);
3609 emit_andne_imm(temp,0,temp);
3610 emit_orrshr_imm(temp,2,tl);
3611 emit_readword((int)&reg_cop2d[11],temp);
3612 emit_testimm(temp,0x8000);
3613 emit_andimm(temp,0xf80,temp);
3614 emit_andne_imm(temp,0,temp);
3615 emit_orrshl_imm(temp,3,tl);
3616 emit_writeword(tl,(int)&reg_cop2d[copr]);
3617 break;
3618 default:
3619 emit_readword((int)&reg_cop2d[copr],tl);
3620 break;
3621 }
3622}
3623
3624static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3625{
3626 switch (copr) {
3627 case 15:
3628 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3629 emit_writeword(sl,(int)&reg_cop2d[copr]);
3630 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3631 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3632 emit_writeword(sl,(int)&reg_cop2d[14]);
3633 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3634 break;
3635 case 28:
3636 emit_andimm(sl,0x001f,temp);
3637 emit_shlimm(temp,7,temp);
3638 emit_writeword(temp,(int)&reg_cop2d[9]);
3639 emit_andimm(sl,0x03e0,temp);
3640 emit_shlimm(temp,2,temp);
3641 emit_writeword(temp,(int)&reg_cop2d[10]);
3642 emit_andimm(sl,0x7c00,temp);
3643 emit_shrimm(temp,3,temp);
3644 emit_writeword(temp,(int)&reg_cop2d[11]);
3645 emit_writeword(sl,(int)&reg_cop2d[28]);
3646 break;
3647 case 30:
3648 emit_movs(sl,temp);
3649 emit_mvnmi(temp,temp);
3650#ifdef HAVE_ARMV5
3651 emit_clz(temp,temp);
3652#else
3653 emit_movs(temp,HOST_TEMPREG);
3654 emit_movimm(0,temp);
3655 emit_jeq((int)out+4*4);
3656 emit_addpl_imm(temp,1,temp);
3657 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3658 emit_jns((int)out-2*4);
3659#endif
3660 emit_writeword(sl,(int)&reg_cop2d[30]);
3661 emit_writeword(temp,(int)&reg_cop2d[31]);
3662 break;
3663 case 31:
3664 break;
3665 default:
3666 emit_writeword(sl,(int)&reg_cop2d[copr]);
3667 break;
3668 }
3669}
3670
3671static void cop2_assemble(int i,struct regstat *i_regs)
3672{
3673 u_int copr=(source[i]>>11)&0x1f;
3674 signed char temp=get_reg(i_regs->regmap,-1);
3675 if (opcode2[i]==0) { // MFC2
3676 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3677 if(tl>=0&&rt1[i]!=0)
3678 cop2_get_dreg(copr,tl,temp);
3679 }
3680 else if (opcode2[i]==4) { // MTC2
3681 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3682 cop2_put_dreg(copr,sl,temp);
3683 }
3684 else if (opcode2[i]==2) // CFC2
3685 {
3686 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3687 if(tl>=0&&rt1[i]!=0)
3688 emit_readword((int)&reg_cop2c[copr],tl);
3689 }
3690 else if (opcode2[i]==6) // CTC2
3691 {
3692 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3693 switch(copr) {
3694 case 4:
3695 case 12:
3696 case 20:
3697 case 26:
3698 case 27:
3699 case 29:
3700 case 30:
3701 emit_signextend16(sl,temp);
3702 break;
3703 case 31:
3704 //value = value & 0x7ffff000;
3705 //if (value & 0x7f87e000) value |= 0x80000000;
3706 emit_shrimm(sl,12,temp);
3707 emit_shlimm(temp,12,temp);
3708 emit_testimm(temp,0x7f000000);
3709 emit_testeqimm(temp,0x00870000);
3710 emit_testeqimm(temp,0x0000e000);
3711 emit_orrne_imm(temp,0x80000000,temp);
3712 break;
3713 default:
3714 temp=sl;
3715 break;
3716 }
3717 emit_writeword(temp,(int)&reg_cop2c[copr]);
3718 assert(sl>=0);
3719 }
3720}
3721
3722static void c2op_prologue(u_int op,u_int reglist)
3723{
3724 save_regs_all(reglist);
3725#ifdef PCNT
3726 emit_movimm(op,0);
3727 emit_call((int)pcnt_gte_start);
3728#endif
3729 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3730}
3731
3732static void c2op_epilogue(u_int op,u_int reglist)
3733{
3734#ifdef PCNT
3735 emit_movimm(op,0);
3736 emit_call((int)pcnt_gte_end);
3737#endif
3738 restore_regs_all(reglist);
3739}
3740
3741static void c2op_call_MACtoIR(int lm,int need_flags)
3742{
3743 if(need_flags)
3744 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
3745 else
3746 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
3747}
3748
3749static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3750{
3751 emit_call((int)func);
3752 // func is C code and trashes r0
3753 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3754 if(need_flags||need_ir)
3755 c2op_call_MACtoIR(lm,need_flags);
3756 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
3757}
3758
3759static void c2op_assemble(int i,struct regstat *i_regs)
3760{
3761 u_int c2op=source[i]&0x3f;
3762 u_int hr,reglist_full=0,reglist;
3763 int need_flags,need_ir;
3764 for(hr=0;hr<HOST_REGS;hr++) {
3765 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
3766 }
3767 reglist=reglist_full&CALLER_SAVE_REGS;
3768
3769 if (gte_handlers[c2op]!=NULL) {
3770 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
3771 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
3772 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3773 source[i],gte_unneeded[i+1],need_flags,need_ir);
3774 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3775 need_flags=0;
3776 int shift = (source[i] >> 19) & 1;
3777 int lm = (source[i] >> 10) & 1;
3778 switch(c2op) {
3779#ifndef DRC_DBG
3780 case GTE_MVMVA: {
3781#ifdef HAVE_ARMV5
3782 int v = (source[i] >> 15) & 3;
3783 int cv = (source[i] >> 13) & 3;
3784 int mx = (source[i] >> 17) & 3;
3785 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
3786 c2op_prologue(c2op,reglist);
3787 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3788 if(v<3)
3789 emit_ldrd(v*8,0,4);
3790 else {
3791 emit_movzwl_indexed(9*4,0,4); // gteIR
3792 emit_movzwl_indexed(10*4,0,6);
3793 emit_movzwl_indexed(11*4,0,5);
3794 emit_orrshl_imm(6,16,4);
3795 }
3796 if(mx<3)
3797 emit_addimm(0,32*4+mx*8*4,6);
3798 else
3799 emit_readword((int)&zeromem_ptr,6);
3800 if(cv<3)
3801 emit_addimm(0,32*4+(cv*8+5)*4,7);
3802 else
3803 emit_readword((int)&zeromem_ptr,7);
3804#ifdef __ARM_NEON__
3805 emit_movimm(source[i],1); // opcode
3806 emit_call((int)gteMVMVA_part_neon);
3807 if(need_flags) {
3808 emit_movimm(lm,1);
3809 emit_call((int)gteMACtoIR_flags_neon);
3810 }
3811#else
3812 if(cv==3&&shift)
3813 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3814 else {
3815 emit_movimm(shift,1);
3816 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3817 }
3818 if(need_flags||need_ir)
3819 c2op_call_MACtoIR(lm,need_flags);
3820#endif
3821#else /* if not HAVE_ARMV5 */
3822 c2op_prologue(c2op,reglist);
3823 emit_movimm(source[i],1); // opcode
3824 emit_writeword(1,(int)&psxRegs.code);
3825 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3826#endif
3827 break;
3828 }
3829 case GTE_OP:
3830 c2op_prologue(c2op,reglist);
3831 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
3832 if(need_flags||need_ir) {
3833 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3834 c2op_call_MACtoIR(lm,need_flags);
3835 }
3836 break;
3837 case GTE_DPCS:
3838 c2op_prologue(c2op,reglist);
3839 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3840 break;
3841 case GTE_INTPL:
3842 c2op_prologue(c2op,reglist);
3843 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3844 break;
3845 case GTE_SQR:
3846 c2op_prologue(c2op,reglist);
3847 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
3848 if(need_flags||need_ir) {
3849 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3850 c2op_call_MACtoIR(lm,need_flags);
3851 }
3852 break;
3853 case GTE_DCPL:
3854 c2op_prologue(c2op,reglist);
3855 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3856 break;
3857 case GTE_GPF:
3858 c2op_prologue(c2op,reglist);
3859 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3860 break;
3861 case GTE_GPL:
3862 c2op_prologue(c2op,reglist);
3863 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3864 break;
3865#endif
3866 default:
3867 c2op_prologue(c2op,reglist);
3868#ifdef DRC_DBG
3869 emit_movimm(source[i],1); // opcode
3870 emit_writeword(1,(int)&psxRegs.code);
3871#endif
3872 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3873 break;
3874 }
3875 c2op_epilogue(c2op,reglist);
3876 }
3877}
3878
3879static void cop1_unusable(int i,struct regstat *i_regs)
3880{
3881 // XXX: should just just do the exception instead
3882 if(!cop1_usable) {
3883 void *jaddr=out;
3884 emit_jmp(0);
3885 add_stub_r(FP_STUB,jaddr,out,i,0,i_regs,is_delayslot,0);
3886 cop1_usable=1;
3887 }
3888}
3889
3890static void cop1_assemble(int i,struct regstat *i_regs)
3891{
3892 cop1_unusable(i, i_regs);
3893}
3894
3895static void fconv_assemble_arm(int i,struct regstat *i_regs)
3896{
3897 cop1_unusable(i, i_regs);
3898}
3899#define fconv_assemble fconv_assemble_arm
3900
3901static void fcomp_assemble(int i,struct regstat *i_regs)
3902{
3903 cop1_unusable(i, i_regs);
3904}
3905
3906static void float_assemble(int i,struct regstat *i_regs)
3907{
3908 cop1_unusable(i, i_regs);
3909}
3910
3911static void multdiv_assemble_arm(int i,struct regstat *i_regs)
3912{
3913 // case 0x18: MULT
3914 // case 0x19: MULTU
3915 // case 0x1A: DIV
3916 // case 0x1B: DIVU
3917 // case 0x1C: DMULT
3918 // case 0x1D: DMULTU
3919 // case 0x1E: DDIV
3920 // case 0x1F: DDIVU
3921 if(rs1[i]&&rs2[i])
3922 {
3923 if((opcode2[i]&4)==0) // 32-bit
3924 {
3925 if(opcode2[i]==0x18) // MULT
3926 {
3927 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3928 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3929 signed char hi=get_reg(i_regs->regmap,HIREG);
3930 signed char lo=get_reg(i_regs->regmap,LOREG);
3931 assert(m1>=0);
3932 assert(m2>=0);
3933 assert(hi>=0);
3934 assert(lo>=0);
3935 emit_smull(m1,m2,hi,lo);
3936 }
3937 if(opcode2[i]==0x19) // MULTU
3938 {
3939 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3940 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3941 signed char hi=get_reg(i_regs->regmap,HIREG);
3942 signed char lo=get_reg(i_regs->regmap,LOREG);
3943 assert(m1>=0);
3944 assert(m2>=0);
3945 assert(hi>=0);
3946 assert(lo>=0);
3947 emit_umull(m1,m2,hi,lo);
3948 }
3949 if(opcode2[i]==0x1A) // DIV
3950 {
3951 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3952 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3953 assert(d1>=0);
3954 assert(d2>=0);
3955 signed char quotient=get_reg(i_regs->regmap,LOREG);
3956 signed char remainder=get_reg(i_regs->regmap,HIREG);
3957 assert(quotient>=0);
3958 assert(remainder>=0);
3959 emit_movs(d1,remainder);
3960 emit_movimm(0xffffffff,quotient);
3961 emit_negmi(quotient,quotient); // .. quotient and ..
3962 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
3963 emit_movs(d2,HOST_TEMPREG);
3964 emit_jeq((int)out+52); // Division by zero
3965 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
3966#ifdef HAVE_ARMV5
3967 emit_clz(HOST_TEMPREG,quotient);
3968 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
3969#else
3970 emit_movimm(0,quotient);
3971 emit_addpl_imm(quotient,1,quotient);
3972 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3973 emit_jns((int)out-2*4);
3974#endif
3975 emit_orimm(quotient,1<<31,quotient);
3976 emit_shr(quotient,quotient,quotient);
3977 emit_cmp(remainder,HOST_TEMPREG);
3978 emit_subcs(remainder,HOST_TEMPREG,remainder);
3979 emit_adcs(quotient,quotient,quotient);
3980 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
3981 emit_jcc(out-16); // -4
3982 emit_teq(d1,d2);
3983 emit_negmi(quotient,quotient);
3984 emit_test(d1,d1);
3985 emit_negmi(remainder,remainder);
3986 }
3987 if(opcode2[i]==0x1B) // DIVU
3988 {
3989 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3990 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3991 assert(d1>=0);
3992 assert(d2>=0);
3993 signed char quotient=get_reg(i_regs->regmap,LOREG);
3994 signed char remainder=get_reg(i_regs->regmap,HIREG);
3995 assert(quotient>=0);
3996 assert(remainder>=0);
3997 emit_mov(d1,remainder);
3998 emit_movimm(0xffffffff,quotient); // div0 case
3999 emit_test(d2,d2);
4000 emit_jeq((int)out+40); // Division by zero
4001#ifdef HAVE_ARMV5
4002 emit_clz(d2,HOST_TEMPREG);
4003 emit_movimm(1<<31,quotient);
4004 emit_shl(d2,HOST_TEMPREG,d2);
4005#else
4006 emit_movimm(0,HOST_TEMPREG);
4007 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
4008 emit_lslpls_imm(d2,1,d2);
4009 emit_jns((int)out-2*4);
4010 emit_movimm(1<<31,quotient);
4011#endif
4012 emit_shr(quotient,HOST_TEMPREG,quotient);
4013 emit_cmp(remainder,d2);
4014 emit_subcs(remainder,d2,remainder);
4015 emit_adcs(quotient,quotient,quotient);
4016 emit_shrcc_imm(d2,1,d2);
4017 emit_jcc(out-16); // -4
4018 }
4019 }
4020 else // 64-bit
4021 assert(0);
4022 }
4023 else
4024 {
4025 // Multiply by zero is zero.
4026 // MIPS does not have a divide by zero exception.
4027 // The result is undefined, we return zero.
4028 signed char hr=get_reg(i_regs->regmap,HIREG);
4029 signed char lr=get_reg(i_regs->regmap,LOREG);
4030 if(hr>=0) emit_zeroreg(hr);
4031 if(lr>=0) emit_zeroreg(lr);
4032 }
4033}
4034#define multdiv_assemble multdiv_assemble_arm
4035
4036static void do_preload_rhash(int r) {
4037 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4038 // register. On ARM the hash can be done with a single instruction (below)
4039}
4040
4041static void do_preload_rhtbl(int ht) {
4042 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4043}
4044
4045static void do_rhash(int rs,int rh) {
4046 emit_andimm(rs,0xf8,rh);
4047}
4048
4049static void do_miniht_load(int ht,int rh) {
4050 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4051 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4052}
4053
4054static void do_miniht_jump(int rs,int rh,int ht) {
4055 emit_cmp(rh,rs);
4056 emit_ldreq_indexed(ht,4,15);
4057 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4058 emit_mov(rs,7);
4059 emit_jmp(jump_vaddr_reg[7]);
4060 #else
4061 emit_jmp(jump_vaddr_reg[rs]);
4062 #endif
4063}
4064
4065static void do_miniht_insert(u_int return_address,int rt,int temp) {
4066 #ifndef HAVE_ARMV7
4067 emit_movimm(return_address,rt); // PC into link register
4068 add_to_linker((int)out,return_address,1);
4069 emit_pcreladdr(temp);
4070 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4071 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4072 #else
4073 emit_movw(return_address&0x0000FFFF,rt);
4074 add_to_linker((int)out,return_address,1);
4075 emit_pcreladdr(temp);
4076 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4077 emit_movt(return_address&0xFFFF0000,rt);
4078 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4079 #endif
4080}
4081
4082static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4083{
4084 //if(dirty_pre==dirty) return;
4085 int hr,reg;
4086 for(hr=0;hr<HOST_REGS;hr++) {
4087 if(hr!=EXCLUDE_REG) {
4088 reg=pre[hr];
4089 if(((~u)>>(reg&63))&1) {
4090 if(reg>0) {
4091 if(((dirty_pre&~dirty)>>hr)&1) {
4092 if(reg>0&&reg<34) {
4093 emit_storereg(reg,hr);
4094 if( ((is32_pre&~uu)>>reg)&1 ) {
4095 emit_sarimm(hr,31,HOST_TEMPREG);
4096 emit_storereg(reg|64,HOST_TEMPREG);
4097 }
4098 }
4099 else if(reg>=64) {
4100 emit_storereg(reg,hr);
4101 }
4102 }
4103 }
4104 }
4105 }
4106 }
4107}
4108
4109
4110/* using strd could possibly help but you'd have to allocate registers in pairs
4111static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4112{
4113 int hr;
4114 int wrote=-1;
4115 for(hr=HOST_REGS-1;hr>=0;hr--) {
4116 if(hr!=EXCLUDE_REG) {
4117 if(pre[hr]!=entry[hr]) {
4118 if(pre[hr]>=0) {
4119 if((dirty>>hr)&1) {
4120 if(get_reg(entry,pre[hr])<0) {
4121 if(pre[hr]<64) {
4122 if(!((u>>pre[hr])&1)) {
4123 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4124 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4125 emit_sarimm(hr,31,hr+1);
4126 emit_strdreg(pre[hr],hr);
4127 }
4128 else
4129 emit_storereg(pre[hr],hr);
4130 }else{
4131 emit_storereg(pre[hr],hr);
4132 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4133 emit_sarimm(hr,31,hr);
4134 emit_storereg(pre[hr]|64,hr);
4135 }
4136 }
4137 }
4138 }else{
4139 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4140 emit_storereg(pre[hr],hr);
4141 }
4142 }
4143 wrote=hr;
4144 }
4145 }
4146 }
4147 }
4148 }
4149 }
4150 for(hr=0;hr<HOST_REGS;hr++) {
4151 if(hr!=EXCLUDE_REG) {
4152 if(pre[hr]!=entry[hr]) {
4153 if(pre[hr]>=0) {
4154 int nr;
4155 if((nr=get_reg(entry,pre[hr]))>=0) {
4156 emit_mov(hr,nr);
4157 }
4158 }
4159 }
4160 }
4161 }
4162}
4163#define wb_invalidate wb_invalidate_arm
4164*/
4165
4166static void mark_clear_cache(void *target)
4167{
4168 u_long offset = (char *)target - (char *)BASE_ADDR;
4169 u_int mask = 1u << ((offset >> 12) & 31);
4170 if (!(needs_clear_cache[offset >> 17] & mask)) {
4171 char *start = (char *)((u_long)target & ~4095ul);
4172 start_tcache_write(start, start + 4096);
4173 needs_clear_cache[offset >> 17] |= mask;
4174 }
4175}
4176
4177// Clearing the cache is rather slow on ARM Linux, so mark the areas
4178// that need to be cleared, and then only clear these areas once.
4179static void do_clear_cache()
4180{
4181 int i,j;
4182 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4183 {
4184 u_int bitmap=needs_clear_cache[i];
4185 if(bitmap) {
4186 u_int start,end;
4187 for(j=0;j<32;j++)
4188 {
4189 if(bitmap&(1<<j)) {
4190 start=(u_int)BASE_ADDR+i*131072+j*4096;
4191 end=start+4095;
4192 j++;
4193 while(j<32) {
4194 if(bitmap&(1<<j)) {
4195 end+=4096;
4196 j++;
4197 }else{
4198 end_tcache_write((void *)start,(void *)end);
4199 break;
4200 }
4201 }
4202 }
4203 }
4204 needs_clear_cache[i]=0;
4205 }
4206 }
4207}
4208
4209// CPU-architecture-specific initialization
4210static void arch_init() {
4211}
4212
4213// vim:shiftwidth=2:expandtab