drc: rework for 64bit, part 3
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
29#include "arm_features.h"
30
31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33u_char *translation_cache;
34#else
35u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
44#define unused __attribute__((unused))
45
46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
52extern int cycle_count;
53extern int last_count;
54extern int pcaddr;
55extern int pending_exception;
56extern int branch_target;
57extern uint64_t readmem_dword;
58extern void *dynarec_local;
59extern u_int mini_ht[32][2];
60
61void indirect_jump_indexed();
62void indirect_jump();
63void do_interrupt();
64void jump_vaddr_r0();
65void jump_vaddr_r1();
66void jump_vaddr_r2();
67void jump_vaddr_r3();
68void jump_vaddr_r4();
69void jump_vaddr_r5();
70void jump_vaddr_r6();
71void jump_vaddr_r7();
72void jump_vaddr_r8();
73void jump_vaddr_r9();
74void jump_vaddr_r10();
75void jump_vaddr_r12();
76
77void * const jump_vaddr_reg[16] = {
78 jump_vaddr_r0,
79 jump_vaddr_r1,
80 jump_vaddr_r2,
81 jump_vaddr_r3,
82 jump_vaddr_r4,
83 jump_vaddr_r5,
84 jump_vaddr_r6,
85 jump_vaddr_r7,
86 jump_vaddr_r8,
87 jump_vaddr_r9,
88 jump_vaddr_r10,
89 0,
90 jump_vaddr_r12,
91 0,
92 0,
93 0
94};
95
96void invalidate_addr_r0();
97void invalidate_addr_r1();
98void invalidate_addr_r2();
99void invalidate_addr_r3();
100void invalidate_addr_r4();
101void invalidate_addr_r5();
102void invalidate_addr_r6();
103void invalidate_addr_r7();
104void invalidate_addr_r8();
105void invalidate_addr_r9();
106void invalidate_addr_r10();
107void invalidate_addr_r12();
108
109const u_int invalidate_addr_reg[16] = {
110 (int)invalidate_addr_r0,
111 (int)invalidate_addr_r1,
112 (int)invalidate_addr_r2,
113 (int)invalidate_addr_r3,
114 (int)invalidate_addr_r4,
115 (int)invalidate_addr_r5,
116 (int)invalidate_addr_r6,
117 (int)invalidate_addr_r7,
118 (int)invalidate_addr_r8,
119 (int)invalidate_addr_r9,
120 (int)invalidate_addr_r10,
121 0,
122 (int)invalidate_addr_r12,
123 0,
124 0,
125 0};
126
127static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
128
129/* Linker */
130
131static void set_jump_target(void *addr, void *target_)
132{
133 u_int target = (u_int)target_;
134 u_char *ptr = addr;
135 u_int *ptr2=(u_int *)ptr;
136 if(ptr[3]==0xe2) {
137 assert((target-(u_int)ptr2-8)<1024);
138 assert(((uintptr_t)addr&3)==0);
139 assert((target&3)==0);
140 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
141 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
142 }
143 else if(ptr[3]==0x72) {
144 // generated by emit_jno_unlikely
145 if((target-(u_int)ptr2-8)<1024) {
146 assert(((uintptr_t)addr&3)==0);
147 assert((target&3)==0);
148 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
149 }
150 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
151 assert(((uintptr_t)addr&3)==0);
152 assert((target&3)==0);
153 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
154 }
155 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
156 }
157 else {
158 assert((ptr[3]&0x0e)==0xa);
159 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
160 }
161}
162
163// This optionally copies the instruction from the target of the branch into
164// the space before the branch. Works, but the difference in speed is
165// usually insignificant.
166#if 0
167static void set_jump_target_fillslot(int addr,u_int target,int copy)
168{
169 u_char *ptr=(u_char *)addr;
170 u_int *ptr2=(u_int *)ptr;
171 assert(!copy||ptr2[-1]==0xe28dd000);
172 if(ptr[3]==0xe2) {
173 assert(!copy);
174 assert((target-(u_int)ptr2-8)<4096);
175 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
176 }
177 else {
178 assert((ptr[3]&0x0e)==0xa);
179 u_int target_insn=*(u_int *)target;
180 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
181 copy=0;
182 }
183 if((target_insn&0x0c100000)==0x04100000) { // Load
184 copy=0;
185 }
186 if(target_insn&0x08000000) {
187 copy=0;
188 }
189 if(copy) {
190 ptr2[-1]=target_insn;
191 target+=4;
192 }
193 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
194 }
195}
196#endif
197
198/* Literal pool */
199static void add_literal(int addr,int val)
200{
201 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
202 literals[literalcount][0]=addr;
203 literals[literalcount][1]=val;
204 literalcount++;
205}
206
207// from a pointer to external jump stub (which was produced by emit_extjump2)
208// find where the jumping insn is
209static void *find_extjump_insn(void *stub)
210{
211 int *ptr=(int *)(stub+4);
212 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
213 u_int offset=*ptr&0xfff;
214 void **l_ptr=(void *)ptr+offset+8;
215 return *l_ptr;
216}
217
218// find where external branch is liked to using addr of it's stub:
219// get address that insn one after stub loads (dyna_linker arg1),
220// treat it as a pointer to branch insn,
221// return addr where that branch jumps to
222static void *get_pointer(void *stub)
223{
224 //printf("get_pointer(%x)\n",(int)stub);
225 int *i_ptr=find_extjump_insn(stub);
226 assert((*i_ptr&0x0f000000)==0x0a000000);
227 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
228}
229
230// Find the "clean" entry point from a "dirty" entry point
231// by skipping past the call to verify_code
232static void *get_clean_addr(void *addr)
233{
234 signed int *ptr = addr;
235 #ifndef HAVE_ARMV7
236 ptr+=4;
237 #else
238 ptr+=6;
239 #endif
240 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
241 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
242 ptr++;
243 if((*ptr&0xFF000000)==0xea000000) {
244 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
245 }
246 return ptr;
247}
248
249static int verify_dirty(u_int *ptr)
250{
251 #ifndef HAVE_ARMV7
252 u_int offset;
253 // get from literal pool
254 assert((*ptr&0xFFFF0000)==0xe59f0000);
255 offset=*ptr&0xfff;
256 u_int source=*(u_int*)((void *)ptr+offset+8);
257 ptr++;
258 assert((*ptr&0xFFFF0000)==0xe59f0000);
259 offset=*ptr&0xfff;
260 u_int copy=*(u_int*)((void *)ptr+offset+8);
261 ptr++;
262 assert((*ptr&0xFFFF0000)==0xe59f0000);
263 offset=*ptr&0xfff;
264 u_int len=*(u_int*)((void *)ptr+offset+8);
265 ptr++;
266 ptr++;
267 #else
268 // ARMv7 movw/movt
269 assert((*ptr&0xFFF00000)==0xe3000000);
270 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
271 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
272 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
273 ptr+=6;
274 #endif
275 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
276 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
277 //printf("verify_dirty: %x %x %x\n",source,copy,len);
278 return !memcmp((void *)source,(void *)copy,len);
279}
280
281// This doesn't necessarily find all clean entry points, just
282// guarantees that it's not dirty
283static int isclean(void *addr)
284{
285 #ifndef HAVE_ARMV7
286 u_int *ptr=((u_int *)addr)+4;
287 #else
288 u_int *ptr=((u_int *)addr)+6;
289 #endif
290 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
291 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
292 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
293 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
294 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
295 return 1;
296}
297
298// get source that block at addr was compiled from (host pointers)
299static void get_bounds(void *addr,u_int *start,u_int *end)
300{
301 u_int *ptr = addr;
302 #ifndef HAVE_ARMV7
303 u_int offset;
304 // get from literal pool
305 assert((*ptr&0xFFFF0000)==0xe59f0000);
306 offset=*ptr&0xfff;
307 u_int source=*(u_int*)((void *)ptr+offset+8);
308 ptr++;
309 //assert((*ptr&0xFFFF0000)==0xe59f0000);
310 //offset=*ptr&0xfff;
311 //u_int copy=*(u_int*)((void *)ptr+offset+8);
312 ptr++;
313 assert((*ptr&0xFFFF0000)==0xe59f0000);
314 offset=*ptr&0xfff;
315 u_int len=*(u_int*)((void *)ptr+offset+8);
316 ptr++;
317 ptr++;
318 #else
319 // ARMv7 movw/movt
320 assert((*ptr&0xFFF00000)==0xe3000000);
321 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
322 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
323 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
324 ptr+=6;
325 #endif
326 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
327 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
328 *start=source;
329 *end=source+len;
330}
331
332/* Register allocation */
333
334// Note: registers are allocated clean (unmodified state)
335// if you intend to modify the register, you must call dirty_reg().
336static void alloc_reg(struct regstat *cur,int i,signed char reg)
337{
338 int r,hr;
339 int preferred_reg = (reg&7);
340 if(reg==CCREG) preferred_reg=HOST_CCREG;
341 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
342
343 // Don't allocate unused registers
344 if((cur->u>>reg)&1) return;
345
346 // see if it's already allocated
347 for(hr=0;hr<HOST_REGS;hr++)
348 {
349 if(cur->regmap[hr]==reg) return;
350 }
351
352 // Keep the same mapping if the register was already allocated in a loop
353 preferred_reg = loop_reg(i,reg,preferred_reg);
354
355 // Try to allocate the preferred register
356 if(cur->regmap[preferred_reg]==-1) {
357 cur->regmap[preferred_reg]=reg;
358 cur->dirty&=~(1<<preferred_reg);
359 cur->isconst&=~(1<<preferred_reg);
360 return;
361 }
362 r=cur->regmap[preferred_reg];
363 if(r<64&&((cur->u>>r)&1)) {
364 cur->regmap[preferred_reg]=reg;
365 cur->dirty&=~(1<<preferred_reg);
366 cur->isconst&=~(1<<preferred_reg);
367 return;
368 }
369 if(r>=64&&((cur->uu>>(r&63))&1)) {
370 cur->regmap[preferred_reg]=reg;
371 cur->dirty&=~(1<<preferred_reg);
372 cur->isconst&=~(1<<preferred_reg);
373 return;
374 }
375
376 // Clear any unneeded registers
377 // We try to keep the mapping consistent, if possible, because it
378 // makes branches easier (especially loops). So we try to allocate
379 // first (see above) before removing old mappings. If this is not
380 // possible then go ahead and clear out the registers that are no
381 // longer needed.
382 for(hr=0;hr<HOST_REGS;hr++)
383 {
384 r=cur->regmap[hr];
385 if(r>=0) {
386 if(r<64) {
387 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
388 }
389 else
390 {
391 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
392 }
393 }
394 }
395 // Try to allocate any available register, but prefer
396 // registers that have not been used recently.
397 if(i>0) {
398 for(hr=0;hr<HOST_REGS;hr++) {
399 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
400 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
401 cur->regmap[hr]=reg;
402 cur->dirty&=~(1<<hr);
403 cur->isconst&=~(1<<hr);
404 return;
405 }
406 }
407 }
408 }
409 // Try to allocate any available register
410 for(hr=0;hr<HOST_REGS;hr++) {
411 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
412 cur->regmap[hr]=reg;
413 cur->dirty&=~(1<<hr);
414 cur->isconst&=~(1<<hr);
415 return;
416 }
417 }
418
419 // Ok, now we have to evict someone
420 // Pick a register we hopefully won't need soon
421 u_char hsn[MAXREG+1];
422 memset(hsn,10,sizeof(hsn));
423 int j;
424 lsn(hsn,i,&preferred_reg);
425 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
426 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
427 if(i>0) {
428 // Don't evict the cycle count at entry points, otherwise the entry
429 // stub will have to write it.
430 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
431 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
432 for(j=10;j>=3;j--)
433 {
434 // Alloc preferred register if available
435 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
436 for(hr=0;hr<HOST_REGS;hr++) {
437 // Evict both parts of a 64-bit register
438 if((cur->regmap[hr]&63)==r) {
439 cur->regmap[hr]=-1;
440 cur->dirty&=~(1<<hr);
441 cur->isconst&=~(1<<hr);
442 }
443 }
444 cur->regmap[preferred_reg]=reg;
445 return;
446 }
447 for(r=1;r<=MAXREG;r++)
448 {
449 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
450 for(hr=0;hr<HOST_REGS;hr++) {
451 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
452 if(cur->regmap[hr]==r+64) {
453 cur->regmap[hr]=reg;
454 cur->dirty&=~(1<<hr);
455 cur->isconst&=~(1<<hr);
456 return;
457 }
458 }
459 }
460 for(hr=0;hr<HOST_REGS;hr++) {
461 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
462 if(cur->regmap[hr]==r) {
463 cur->regmap[hr]=reg;
464 cur->dirty&=~(1<<hr);
465 cur->isconst&=~(1<<hr);
466 return;
467 }
468 }
469 }
470 }
471 }
472 }
473 }
474 for(j=10;j>=0;j--)
475 {
476 for(r=1;r<=MAXREG;r++)
477 {
478 if(hsn[r]==j) {
479 for(hr=0;hr<HOST_REGS;hr++) {
480 if(cur->regmap[hr]==r+64) {
481 cur->regmap[hr]=reg;
482 cur->dirty&=~(1<<hr);
483 cur->isconst&=~(1<<hr);
484 return;
485 }
486 }
487 for(hr=0;hr<HOST_REGS;hr++) {
488 if(cur->regmap[hr]==r) {
489 cur->regmap[hr]=reg;
490 cur->dirty&=~(1<<hr);
491 cur->isconst&=~(1<<hr);
492 return;
493 }
494 }
495 }
496 }
497 }
498 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
499}
500
501static void alloc_reg64(struct regstat *cur,int i,signed char reg)
502{
503 int preferred_reg = 8+(reg&1);
504 int r,hr;
505
506 // allocate the lower 32 bits
507 alloc_reg(cur,i,reg);
508
509 // Don't allocate unused registers
510 if((cur->uu>>reg)&1) return;
511
512 // see if the upper half is already allocated
513 for(hr=0;hr<HOST_REGS;hr++)
514 {
515 if(cur->regmap[hr]==reg+64) return;
516 }
517
518 // Keep the same mapping if the register was already allocated in a loop
519 preferred_reg = loop_reg(i,reg,preferred_reg);
520
521 // Try to allocate the preferred register
522 if(cur->regmap[preferred_reg]==-1) {
523 cur->regmap[preferred_reg]=reg|64;
524 cur->dirty&=~(1<<preferred_reg);
525 cur->isconst&=~(1<<preferred_reg);
526 return;
527 }
528 r=cur->regmap[preferred_reg];
529 if(r<64&&((cur->u>>r)&1)) {
530 cur->regmap[preferred_reg]=reg|64;
531 cur->dirty&=~(1<<preferred_reg);
532 cur->isconst&=~(1<<preferred_reg);
533 return;
534 }
535 if(r>=64&&((cur->uu>>(r&63))&1)) {
536 cur->regmap[preferred_reg]=reg|64;
537 cur->dirty&=~(1<<preferred_reg);
538 cur->isconst&=~(1<<preferred_reg);
539 return;
540 }
541
542 // Clear any unneeded registers
543 // We try to keep the mapping consistent, if possible, because it
544 // makes branches easier (especially loops). So we try to allocate
545 // first (see above) before removing old mappings. If this is not
546 // possible then go ahead and clear out the registers that are no
547 // longer needed.
548 for(hr=HOST_REGS-1;hr>=0;hr--)
549 {
550 r=cur->regmap[hr];
551 if(r>=0) {
552 if(r<64) {
553 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
554 }
555 else
556 {
557 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
558 }
559 }
560 }
561 // Try to allocate any available register, but prefer
562 // registers that have not been used recently.
563 if(i>0) {
564 for(hr=0;hr<HOST_REGS;hr++) {
565 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
566 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
567 cur->regmap[hr]=reg|64;
568 cur->dirty&=~(1<<hr);
569 cur->isconst&=~(1<<hr);
570 return;
571 }
572 }
573 }
574 }
575 // Try to allocate any available register
576 for(hr=0;hr<HOST_REGS;hr++) {
577 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
578 cur->regmap[hr]=reg|64;
579 cur->dirty&=~(1<<hr);
580 cur->isconst&=~(1<<hr);
581 return;
582 }
583 }
584
585 // Ok, now we have to evict someone
586 // Pick a register we hopefully won't need soon
587 u_char hsn[MAXREG+1];
588 memset(hsn,10,sizeof(hsn));
589 int j;
590 lsn(hsn,i,&preferred_reg);
591 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
592 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
593 if(i>0) {
594 // Don't evict the cycle count at entry points, otherwise the entry
595 // stub will have to write it.
596 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
597 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
598 for(j=10;j>=3;j--)
599 {
600 // Alloc preferred register if available
601 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
602 for(hr=0;hr<HOST_REGS;hr++) {
603 // Evict both parts of a 64-bit register
604 if((cur->regmap[hr]&63)==r) {
605 cur->regmap[hr]=-1;
606 cur->dirty&=~(1<<hr);
607 cur->isconst&=~(1<<hr);
608 }
609 }
610 cur->regmap[preferred_reg]=reg|64;
611 return;
612 }
613 for(r=1;r<=MAXREG;r++)
614 {
615 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
616 for(hr=0;hr<HOST_REGS;hr++) {
617 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
618 if(cur->regmap[hr]==r+64) {
619 cur->regmap[hr]=reg|64;
620 cur->dirty&=~(1<<hr);
621 cur->isconst&=~(1<<hr);
622 return;
623 }
624 }
625 }
626 for(hr=0;hr<HOST_REGS;hr++) {
627 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
628 if(cur->regmap[hr]==r) {
629 cur->regmap[hr]=reg|64;
630 cur->dirty&=~(1<<hr);
631 cur->isconst&=~(1<<hr);
632 return;
633 }
634 }
635 }
636 }
637 }
638 }
639 }
640 for(j=10;j>=0;j--)
641 {
642 for(r=1;r<=MAXREG;r++)
643 {
644 if(hsn[r]==j) {
645 for(hr=0;hr<HOST_REGS;hr++) {
646 if(cur->regmap[hr]==r+64) {
647 cur->regmap[hr]=reg|64;
648 cur->dirty&=~(1<<hr);
649 cur->isconst&=~(1<<hr);
650 return;
651 }
652 }
653 for(hr=0;hr<HOST_REGS;hr++) {
654 if(cur->regmap[hr]==r) {
655 cur->regmap[hr]=reg|64;
656 cur->dirty&=~(1<<hr);
657 cur->isconst&=~(1<<hr);
658 return;
659 }
660 }
661 }
662 }
663 }
664 SysPrintf("This shouldn't happen");exit(1);
665}
666
667// Allocate a temporary register. This is done without regard to
668// dirty status or whether the register we request is on the unneeded list
669// Note: This will only allocate one register, even if called multiple times
670static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
671{
672 int r,hr;
673 int preferred_reg = -1;
674
675 // see if it's already allocated
676 for(hr=0;hr<HOST_REGS;hr++)
677 {
678 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
679 }
680
681 // Try to allocate any available register
682 for(hr=HOST_REGS-1;hr>=0;hr--) {
683 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
684 cur->regmap[hr]=reg;
685 cur->dirty&=~(1<<hr);
686 cur->isconst&=~(1<<hr);
687 return;
688 }
689 }
690
691 // Find an unneeded register
692 for(hr=HOST_REGS-1;hr>=0;hr--)
693 {
694 r=cur->regmap[hr];
695 if(r>=0) {
696 if(r<64) {
697 if((cur->u>>r)&1) {
698 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
699 cur->regmap[hr]=reg;
700 cur->dirty&=~(1<<hr);
701 cur->isconst&=~(1<<hr);
702 return;
703 }
704 }
705 }
706 else
707 {
708 if((cur->uu>>(r&63))&1) {
709 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
710 cur->regmap[hr]=reg;
711 cur->dirty&=~(1<<hr);
712 cur->isconst&=~(1<<hr);
713 return;
714 }
715 }
716 }
717 }
718 }
719
720 // Ok, now we have to evict someone
721 // Pick a register we hopefully won't need soon
722 // TODO: we might want to follow unconditional jumps here
723 // TODO: get rid of dupe code and make this into a function
724 u_char hsn[MAXREG+1];
725 memset(hsn,10,sizeof(hsn));
726 int j;
727 lsn(hsn,i,&preferred_reg);
728 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
729 if(i>0) {
730 // Don't evict the cycle count at entry points, otherwise the entry
731 // stub will have to write it.
732 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
733 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
734 for(j=10;j>=3;j--)
735 {
736 for(r=1;r<=MAXREG;r++)
737 {
738 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
739 for(hr=0;hr<HOST_REGS;hr++) {
740 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
741 if(cur->regmap[hr]==r+64) {
742 cur->regmap[hr]=reg;
743 cur->dirty&=~(1<<hr);
744 cur->isconst&=~(1<<hr);
745 return;
746 }
747 }
748 }
749 for(hr=0;hr<HOST_REGS;hr++) {
750 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
751 if(cur->regmap[hr]==r) {
752 cur->regmap[hr]=reg;
753 cur->dirty&=~(1<<hr);
754 cur->isconst&=~(1<<hr);
755 return;
756 }
757 }
758 }
759 }
760 }
761 }
762 }
763 for(j=10;j>=0;j--)
764 {
765 for(r=1;r<=MAXREG;r++)
766 {
767 if(hsn[r]==j) {
768 for(hr=0;hr<HOST_REGS;hr++) {
769 if(cur->regmap[hr]==r+64) {
770 cur->regmap[hr]=reg;
771 cur->dirty&=~(1<<hr);
772 cur->isconst&=~(1<<hr);
773 return;
774 }
775 }
776 for(hr=0;hr<HOST_REGS;hr++) {
777 if(cur->regmap[hr]==r) {
778 cur->regmap[hr]=reg;
779 cur->dirty&=~(1<<hr);
780 cur->isconst&=~(1<<hr);
781 return;
782 }
783 }
784 }
785 }
786 }
787 SysPrintf("This shouldn't happen");exit(1);
788}
789
790// Allocate a specific ARM register.
791static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
792{
793 int n;
794 int dirty=0;
795
796 // see if it's already allocated (and dealloc it)
797 for(n=0;n<HOST_REGS;n++)
798 {
799 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
800 dirty=(cur->dirty>>n)&1;
801 cur->regmap[n]=-1;
802 }
803 }
804
805 cur->regmap[hr]=reg;
806 cur->dirty&=~(1<<hr);
807 cur->dirty|=dirty<<hr;
808 cur->isconst&=~(1<<hr);
809}
810
811// Alloc cycle count into dedicated register
812static void alloc_cc(struct regstat *cur,int i)
813{
814 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
815}
816
817/* Special alloc */
818
819
820/* Assembler */
821
822static unused char regname[16][4] = {
823 "r0",
824 "r1",
825 "r2",
826 "r3",
827 "r4",
828 "r5",
829 "r6",
830 "r7",
831 "r8",
832 "r9",
833 "r10",
834 "fp",
835 "r12",
836 "sp",
837 "lr",
838 "pc"};
839
840static void output_w32(u_int word)
841{
842 *((u_int *)out)=word;
843 out+=4;
844}
845
846static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
847{
848 assert(rd<16);
849 assert(rn<16);
850 assert(rm<16);
851 return((rn<<16)|(rd<<12)|rm);
852}
853
854static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
855{
856 assert(rd<16);
857 assert(rn<16);
858 assert(imm<256);
859 assert((shift&1)==0);
860 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
861}
862
863static u_int genimm(u_int imm,u_int *encoded)
864{
865 *encoded=0;
866 if(imm==0) return 1;
867 int i=32;
868 while(i>0)
869 {
870 if(imm<256) {
871 *encoded=((i&30)<<7)|imm;
872 return 1;
873 }
874 imm=(imm>>2)|(imm<<30);i-=2;
875 }
876 return 0;
877}
878
879static void genimm_checked(u_int imm,u_int *encoded)
880{
881 u_int ret=genimm(imm,encoded);
882 assert(ret);
883 (void)ret;
884}
885
886static u_int genjmp(u_int addr)
887{
888 int offset=addr-(int)out-8;
889 if(offset<-33554432||offset>=33554432) {
890 if (addr>2) {
891 SysPrintf("genjmp: out of range: %08x\n", offset);
892 exit(1);
893 }
894 return 0;
895 }
896 return ((u_int)offset>>2)&0xffffff;
897}
898
899static void emit_mov(int rs,int rt)
900{
901 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
902 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
903}
904
905static void emit_movs(int rs,int rt)
906{
907 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
908 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
909}
910
911static void emit_add(int rs1,int rs2,int rt)
912{
913 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
914 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
915}
916
917static void emit_adds(int rs1,int rs2,int rt)
918{
919 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
920 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
921}
922
923static void emit_adcs(int rs1,int rs2,int rt)
924{
925 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
926 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
927}
928
929static void emit_sbc(int rs1,int rs2,int rt)
930{
931 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
932 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
933}
934
935static void emit_sbcs(int rs1,int rs2,int rt)
936{
937 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
938 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
939}
940
941static void emit_neg(int rs, int rt)
942{
943 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
944 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
945}
946
947static void emit_negs(int rs, int rt)
948{
949 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
950 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
951}
952
953static void emit_sub(int rs1,int rs2,int rt)
954{
955 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
956 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
957}
958
959static void emit_subs(int rs1,int rs2,int rt)
960{
961 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
962 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
963}
964
965static void emit_zeroreg(int rt)
966{
967 assem_debug("mov %s,#0\n",regname[rt]);
968 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
969}
970
971static void emit_loadlp(u_int imm,u_int rt)
972{
973 add_literal((int)out,imm);
974 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
975 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
976}
977
978static void emit_movw(u_int imm,u_int rt)
979{
980 assert(imm<65536);
981 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
982 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
983}
984
985static void emit_movt(u_int imm,u_int rt)
986{
987 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
988 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
989}
990
991static void emit_movimm(u_int imm,u_int rt)
992{
993 u_int armval;
994 if(genimm(imm,&armval)) {
995 assem_debug("mov %s,#%d\n",regname[rt],imm);
996 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
997 }else if(genimm(~imm,&armval)) {
998 assem_debug("mvn %s,#%d\n",regname[rt],imm);
999 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1000 }else if(imm<65536) {
1001 #ifndef HAVE_ARMV7
1002 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1003 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1004 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1005 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1006 #else
1007 emit_movw(imm,rt);
1008 #endif
1009 }else{
1010 #ifndef HAVE_ARMV7
1011 emit_loadlp(imm,rt);
1012 #else
1013 emit_movw(imm&0x0000FFFF,rt);
1014 emit_movt(imm&0xFFFF0000,rt);
1015 #endif
1016 }
1017}
1018
1019static void emit_pcreladdr(u_int rt)
1020{
1021 assem_debug("add %s,pc,#?\n",regname[rt]);
1022 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1023}
1024
1025static void emit_loadreg(int r, int hr)
1026{
1027 if(r&64) {
1028 SysPrintf("64bit load in 32bit mode!\n");
1029 assert(0);
1030 return;
1031 }
1032 if((r&63)==0)
1033 emit_zeroreg(hr);
1034 else {
1035 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1036 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1037 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1038 if(r==CCREG) addr=(int)&cycle_count;
1039 if(r==CSREG) addr=(int)&Status;
1040 if(r==FSREG) addr=(int)&FCR31;
1041 if(r==INVCP) addr=(int)&invc_ptr;
1042 u_int offset = addr-(u_int)&dynarec_local;
1043 assert(offset<4096);
1044 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1045 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1046 }
1047}
1048
1049static void emit_storereg(int r, int hr)
1050{
1051 if(r&64) {
1052 SysPrintf("64bit store in 32bit mode!\n");
1053 assert(0);
1054 return;
1055 }
1056 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1057 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1058 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1059 if(r==CCREG) addr=(int)&cycle_count;
1060 if(r==FSREG) addr=(int)&FCR31;
1061 u_int offset = addr-(u_int)&dynarec_local;
1062 assert(offset<4096);
1063 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1064 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1065}
1066
1067static void emit_test(int rs, int rt)
1068{
1069 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1070 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1071}
1072
1073static void emit_testimm(int rs,int imm)
1074{
1075 u_int armval;
1076 assem_debug("tst %s,#%d\n",regname[rs],imm);
1077 genimm_checked(imm,&armval);
1078 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1079}
1080
1081static void emit_testeqimm(int rs,int imm)
1082{
1083 u_int armval;
1084 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
1085 genimm_checked(imm,&armval);
1086 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1087}
1088
1089static void emit_not(int rs,int rt)
1090{
1091 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1092 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1093}
1094
1095static void emit_mvnmi(int rs,int rt)
1096{
1097 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1098 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1099}
1100
1101static void emit_and(u_int rs1,u_int rs2,u_int rt)
1102{
1103 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1104 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1105}
1106
1107static void emit_or(u_int rs1,u_int rs2,u_int rt)
1108{
1109 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1110 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1111}
1112
1113static void emit_or_and_set_flags(int rs1,int rs2,int rt)
1114{
1115 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1116 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1117}
1118
1119static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1120{
1121 assert(rs<16);
1122 assert(rt<16);
1123 assert(imm<32);
1124 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1125 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1126}
1127
1128static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1129{
1130 assert(rs<16);
1131 assert(rt<16);
1132 assert(imm<32);
1133 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1134 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1135}
1136
1137static void emit_xor(u_int rs1,u_int rs2,u_int rt)
1138{
1139 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1140 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1141}
1142
1143static void emit_addimm(u_int rs,int imm,u_int rt)
1144{
1145 assert(rs<16);
1146 assert(rt<16);
1147 if(imm!=0) {
1148 u_int armval;
1149 if(genimm(imm,&armval)) {
1150 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1151 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1152 }else if(genimm(-imm,&armval)) {
1153 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
1154 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1155 #ifdef HAVE_ARMV7
1156 }else if(rt!=rs&&(u_int)imm<65536) {
1157 emit_movw(imm&0x0000ffff,rt);
1158 emit_add(rs,rt,rt);
1159 }else if(rt!=rs&&(u_int)-imm<65536) {
1160 emit_movw(-imm&0x0000ffff,rt);
1161 emit_sub(rs,rt,rt);
1162 #endif
1163 }else if((u_int)-imm<65536) {
1164 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1165 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1166 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1167 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1168 }else {
1169 do {
1170 int shift = (ffs(imm) - 1) & ~1;
1171 int imm8 = imm & (0xff << shift);
1172 genimm_checked(imm8,&armval);
1173 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1174 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1175 rs = rt;
1176 imm &= ~imm8;
1177 }
1178 while (imm != 0);
1179 }
1180 }
1181 else if(rs!=rt) emit_mov(rs,rt);
1182}
1183
1184static void emit_addimm_and_set_flags(int imm,int rt)
1185{
1186 assert(imm>-65536&&imm<65536);
1187 u_int armval;
1188 if(genimm(imm,&armval)) {
1189 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1190 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1191 }else if(genimm(-imm,&armval)) {
1192 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1193 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1194 }else if(imm<0) {
1195 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1196 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1197 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1198 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1199 }else{
1200 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1201 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1202 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1203 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1204 }
1205}
1206
1207static void emit_addimm_no_flags(u_int imm,u_int rt)
1208{
1209 emit_addimm(rt,imm,rt);
1210}
1211
1212static void emit_addnop(u_int r)
1213{
1214 assert(r<16);
1215 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1216 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1217}
1218
1219static void emit_adcimm(u_int rs,int imm,u_int rt)
1220{
1221 u_int armval;
1222 genimm_checked(imm,&armval);
1223 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1224 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1225}
1226
1227static void emit_rscimm(int rs,int imm,u_int rt)
1228{
1229 assert(0);
1230 u_int armval;
1231 genimm_checked(imm,&armval);
1232 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1233 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1234}
1235
1236static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1237{
1238 // TODO: if(genimm(imm,&armval)) ...
1239 // else
1240 emit_movimm(imm,HOST_TEMPREG);
1241 emit_adds(HOST_TEMPREG,rsl,rtl);
1242 emit_adcimm(rsh,0,rth);
1243}
1244
1245static void emit_andimm(int rs,int imm,int rt)
1246{
1247 u_int armval;
1248 if(imm==0) {
1249 emit_zeroreg(rt);
1250 }else if(genimm(imm,&armval)) {
1251 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1252 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1253 }else if(genimm(~imm,&armval)) {
1254 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1255 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1256 }else if(imm==65535) {
1257 #ifndef HAVE_ARMV6
1258 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1259 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1260 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1261 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1262 #else
1263 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1264 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1265 #endif
1266 }else{
1267 assert(imm>0&&imm<65535);
1268 #ifndef HAVE_ARMV7
1269 assem_debug("mov r14,#%d\n",imm&0xFF00);
1270 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1271 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1272 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1273 #else
1274 emit_movw(imm,HOST_TEMPREG);
1275 #endif
1276 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1277 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1278 }
1279}
1280
1281static void emit_orimm(int rs,int imm,int rt)
1282{
1283 u_int armval;
1284 if(imm==0) {
1285 if(rs!=rt) emit_mov(rs,rt);
1286 }else if(genimm(imm,&armval)) {
1287 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1288 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1289 }else{
1290 assert(imm>0&&imm<65536);
1291 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1292 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1293 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1294 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1295 }
1296}
1297
1298static void emit_xorimm(int rs,int imm,int rt)
1299{
1300 u_int armval;
1301 if(imm==0) {
1302 if(rs!=rt) emit_mov(rs,rt);
1303 }else if(genimm(imm,&armval)) {
1304 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1305 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1306 }else{
1307 assert(imm>0&&imm<65536);
1308 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1309 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1310 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1311 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1312 }
1313}
1314
1315static void emit_shlimm(int rs,u_int imm,int rt)
1316{
1317 assert(imm>0);
1318 assert(imm<32);
1319 //if(imm==1) ...
1320 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1321 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1322}
1323
1324static void emit_lsls_imm(int rs,int imm,int rt)
1325{
1326 assert(imm>0);
1327 assert(imm<32);
1328 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1329 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1330}
1331
1332static unused void emit_lslpls_imm(int rs,int imm,int rt)
1333{
1334 assert(imm>0);
1335 assert(imm<32);
1336 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1337 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1338}
1339
1340static void emit_shrimm(int rs,u_int imm,int rt)
1341{
1342 assert(imm>0);
1343 assert(imm<32);
1344 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1345 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1346}
1347
1348static void emit_sarimm(int rs,u_int imm,int rt)
1349{
1350 assert(imm>0);
1351 assert(imm<32);
1352 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1353 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1354}
1355
1356static void emit_rorimm(int rs,u_int imm,int rt)
1357{
1358 assert(imm>0);
1359 assert(imm<32);
1360 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1361 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1362}
1363
1364static void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1365{
1366 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1367 assert(imm>0);
1368 assert(imm<32);
1369 //if(imm==1) ...
1370 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1371 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1372 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1373 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1374}
1375
1376static void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1377{
1378 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1379 assert(imm>0);
1380 assert(imm<32);
1381 //if(imm==1) ...
1382 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1383 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1384 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1385 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1386}
1387
1388static void emit_signextend16(int rs,int rt)
1389{
1390 #ifndef HAVE_ARMV6
1391 emit_shlimm(rs,16,rt);
1392 emit_sarimm(rt,16,rt);
1393 #else
1394 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1395 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1396 #endif
1397}
1398
1399static void emit_signextend8(int rs,int rt)
1400{
1401 #ifndef HAVE_ARMV6
1402 emit_shlimm(rs,24,rt);
1403 emit_sarimm(rt,24,rt);
1404 #else
1405 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1406 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1407 #endif
1408}
1409
1410static void emit_shl(u_int rs,u_int shift,u_int rt)
1411{
1412 assert(rs<16);
1413 assert(rt<16);
1414 assert(shift<16);
1415 //if(imm==1) ...
1416 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1417 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1418}
1419
1420static void emit_shr(u_int rs,u_int shift,u_int rt)
1421{
1422 assert(rs<16);
1423 assert(rt<16);
1424 assert(shift<16);
1425 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1426 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1427}
1428
1429static void emit_sar(u_int rs,u_int shift,u_int rt)
1430{
1431 assert(rs<16);
1432 assert(rt<16);
1433 assert(shift<16);
1434 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1435 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1436}
1437
1438static void emit_orrshl(u_int rs,u_int shift,u_int rt)
1439{
1440 assert(rs<16);
1441 assert(rt<16);
1442 assert(shift<16);
1443 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1444 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1445}
1446
1447static void emit_orrshr(u_int rs,u_int shift,u_int rt)
1448{
1449 assert(rs<16);
1450 assert(rt<16);
1451 assert(shift<16);
1452 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1453 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1454}
1455
1456static void emit_cmpimm(int rs,int imm)
1457{
1458 u_int armval;
1459 if(genimm(imm,&armval)) {
1460 assem_debug("cmp %s,#%d\n",regname[rs],imm);
1461 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1462 }else if(genimm(-imm,&armval)) {
1463 assem_debug("cmn %s,#%d\n",regname[rs],imm);
1464 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1465 }else if(imm>0) {
1466 assert(imm<65536);
1467 emit_movimm(imm,HOST_TEMPREG);
1468 assem_debug("cmp %s,r14\n",regname[rs]);
1469 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1470 }else{
1471 assert(imm>-65536);
1472 emit_movimm(-imm,HOST_TEMPREG);
1473 assem_debug("cmn %s,r14\n",regname[rs]);
1474 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1475 }
1476}
1477
1478static void emit_cmovne_imm(int imm,int rt)
1479{
1480 assem_debug("movne %s,#%d\n",regname[rt],imm);
1481 u_int armval;
1482 genimm_checked(imm,&armval);
1483 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1484}
1485
1486static void emit_cmovl_imm(int imm,int rt)
1487{
1488 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1489 u_int armval;
1490 genimm_checked(imm,&armval);
1491 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1492}
1493
1494static void emit_cmovb_imm(int imm,int rt)
1495{
1496 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1497 u_int armval;
1498 genimm_checked(imm,&armval);
1499 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1500}
1501
1502static void emit_cmovs_imm(int imm,int rt)
1503{
1504 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1505 u_int armval;
1506 genimm_checked(imm,&armval);
1507 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1508}
1509
1510static void emit_cmove_reg(int rs,int rt)
1511{
1512 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1513 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1514}
1515
1516static void emit_cmovne_reg(int rs,int rt)
1517{
1518 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1519 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1520}
1521
1522static void emit_cmovl_reg(int rs,int rt)
1523{
1524 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1525 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1526}
1527
1528static void emit_cmovs_reg(int rs,int rt)
1529{
1530 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1531 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1532}
1533
1534static void emit_slti32(int rs,int imm,int rt)
1535{
1536 if(rs!=rt) emit_zeroreg(rt);
1537 emit_cmpimm(rs,imm);
1538 if(rs==rt) emit_movimm(0,rt);
1539 emit_cmovl_imm(1,rt);
1540}
1541
1542static void emit_sltiu32(int rs,int imm,int rt)
1543{
1544 if(rs!=rt) emit_zeroreg(rt);
1545 emit_cmpimm(rs,imm);
1546 if(rs==rt) emit_movimm(0,rt);
1547 emit_cmovb_imm(1,rt);
1548}
1549
1550static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1551{
1552 assert(rsh!=rt);
1553 emit_slti32(rsl,imm,rt);
1554 if(imm>=0)
1555 {
1556 emit_test(rsh,rsh);
1557 emit_cmovne_imm(0,rt);
1558 emit_cmovs_imm(1,rt);
1559 }
1560 else
1561 {
1562 emit_cmpimm(rsh,-1);
1563 emit_cmovne_imm(0,rt);
1564 emit_cmovl_imm(1,rt);
1565 }
1566}
1567
1568static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1569{
1570 assert(rsh!=rt);
1571 emit_sltiu32(rsl,imm,rt);
1572 if(imm>=0)
1573 {
1574 emit_test(rsh,rsh);
1575 emit_cmovne_imm(0,rt);
1576 }
1577 else
1578 {
1579 emit_cmpimm(rsh,-1);
1580 emit_cmovne_imm(1,rt);
1581 }
1582}
1583
1584static void emit_cmp(int rs,int rt)
1585{
1586 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1587 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1588}
1589
1590static void emit_set_gz32(int rs, int rt)
1591{
1592 //assem_debug("set_gz32\n");
1593 emit_cmpimm(rs,1);
1594 emit_movimm(1,rt);
1595 emit_cmovl_imm(0,rt);
1596}
1597
1598static void emit_set_nz32(int rs, int rt)
1599{
1600 //assem_debug("set_nz32\n");
1601 if(rs!=rt) emit_movs(rs,rt);
1602 else emit_test(rs,rs);
1603 emit_cmovne_imm(1,rt);
1604}
1605
1606static void emit_set_gz64_32(int rsh, int rsl, int rt)
1607{
1608 //assem_debug("set_gz64\n");
1609 emit_set_gz32(rsl,rt);
1610 emit_test(rsh,rsh);
1611 emit_cmovne_imm(1,rt);
1612 emit_cmovs_imm(0,rt);
1613}
1614
1615static void emit_set_nz64_32(int rsh, int rsl, int rt)
1616{
1617 //assem_debug("set_nz64\n");
1618 emit_or_and_set_flags(rsh,rsl,rt);
1619 emit_cmovne_imm(1,rt);
1620}
1621
1622static void emit_set_if_less32(int rs1, int rs2, int rt)
1623{
1624 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1625 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1626 emit_cmp(rs1,rs2);
1627 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1628 emit_cmovl_imm(1,rt);
1629}
1630
1631static void emit_set_if_carry32(int rs1, int rs2, int rt)
1632{
1633 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1634 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1635 emit_cmp(rs1,rs2);
1636 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1637 emit_cmovb_imm(1,rt);
1638}
1639
1640static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1641{
1642 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1643 assert(u1!=rt);
1644 assert(u2!=rt);
1645 emit_cmp(l1,l2);
1646 emit_movimm(0,rt);
1647 emit_sbcs(u1,u2,HOST_TEMPREG);
1648 emit_cmovl_imm(1,rt);
1649}
1650
1651static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1652{
1653 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1654 assert(u1!=rt);
1655 assert(u2!=rt);
1656 emit_cmp(l1,l2);
1657 emit_movimm(0,rt);
1658 emit_sbcs(u1,u2,HOST_TEMPREG);
1659 emit_cmovb_imm(1,rt);
1660}
1661
1662#ifdef DRC_DBG
1663extern void gen_interupt();
1664extern void do_insn_cmp();
1665#define FUNCNAME(f) { (intptr_t)f, " " #f }
1666static const struct {
1667 intptr_t addr;
1668 const char *name;
1669} function_names[] = {
1670 FUNCNAME(cc_interrupt),
1671 FUNCNAME(gen_interupt),
1672 FUNCNAME(get_addr_ht),
1673 FUNCNAME(get_addr),
1674 FUNCNAME(jump_handler_read8),
1675 FUNCNAME(jump_handler_read16),
1676 FUNCNAME(jump_handler_read32),
1677 FUNCNAME(jump_handler_write8),
1678 FUNCNAME(jump_handler_write16),
1679 FUNCNAME(jump_handler_write32),
1680 FUNCNAME(invalidate_addr),
1681 FUNCNAME(verify_code_vm),
1682 FUNCNAME(verify_code),
1683 FUNCNAME(jump_hlecall),
1684 FUNCNAME(jump_syscall_hle),
1685 FUNCNAME(new_dyna_leave),
1686 FUNCNAME(pcsx_mtc0),
1687 FUNCNAME(pcsx_mtc0_ds),
1688 FUNCNAME(do_insn_cmp),
1689};
1690
1691static const char *func_name(intptr_t a)
1692{
1693 int i;
1694 for (i = 0; i < sizeof(function_names)/sizeof(function_names[0]); i++)
1695 if (function_names[i].addr == a)
1696 return function_names[i].name;
1697 return "";
1698}
1699#else
1700#define func_name(x) ""
1701#endif
1702
1703static void emit_call(const void *a_)
1704{
1705 int a = (int)a_;
1706 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1707 u_int offset=genjmp(a);
1708 output_w32(0xeb000000|offset);
1709}
1710
1711static void emit_jmp(const void *a_)
1712{
1713 int a = (int)a_;
1714 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1715 u_int offset=genjmp(a);
1716 output_w32(0xea000000|offset);
1717}
1718
1719static void emit_jne(const void *a_)
1720{
1721 int a = (int)a_;
1722 assem_debug("bne %x\n",a);
1723 u_int offset=genjmp(a);
1724 output_w32(0x1a000000|offset);
1725}
1726
1727static void emit_jeq(int a)
1728{
1729 assem_debug("beq %x\n",a);
1730 u_int offset=genjmp(a);
1731 output_w32(0x0a000000|offset);
1732}
1733
1734static void emit_js(int a)
1735{
1736 assem_debug("bmi %x\n",a);
1737 u_int offset=genjmp(a);
1738 output_w32(0x4a000000|offset);
1739}
1740
1741static void emit_jns(int a)
1742{
1743 assem_debug("bpl %x\n",a);
1744 u_int offset=genjmp(a);
1745 output_w32(0x5a000000|offset);
1746}
1747
1748static void emit_jl(int a)
1749{
1750 assem_debug("blt %x\n",a);
1751 u_int offset=genjmp(a);
1752 output_w32(0xba000000|offset);
1753}
1754
1755static void emit_jge(int a)
1756{
1757 assem_debug("bge %x\n",a);
1758 u_int offset=genjmp(a);
1759 output_w32(0xaa000000|offset);
1760}
1761
1762static void emit_jno(int a)
1763{
1764 assem_debug("bvc %x\n",a);
1765 u_int offset=genjmp(a);
1766 output_w32(0x7a000000|offset);
1767}
1768
1769static void emit_jc(int a)
1770{
1771 assem_debug("bcs %x\n",a);
1772 u_int offset=genjmp(a);
1773 output_w32(0x2a000000|offset);
1774}
1775
1776static void emit_jcc(void *a_)
1777{
1778 int a = (int)a_;
1779 assem_debug("bcc %x\n",a);
1780 u_int offset=genjmp(a);
1781 output_w32(0x3a000000|offset);
1782}
1783
1784static void emit_callreg(u_int r)
1785{
1786 assert(r<15);
1787 assem_debug("blx %s\n",regname[r]);
1788 output_w32(0xe12fff30|r);
1789}
1790
1791static void emit_jmpreg(u_int r)
1792{
1793 assem_debug("mov pc,%s\n",regname[r]);
1794 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1795}
1796
1797static void emit_readword_indexed(int offset, int rs, int rt)
1798{
1799 assert(offset>-4096&&offset<4096);
1800 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1801 if(offset>=0) {
1802 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1803 }else{
1804 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1805 }
1806}
1807
1808static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1809{
1810 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1811 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1812}
1813
1814static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1815{
1816 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1817 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1818}
1819
1820static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1821{
1822 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1823 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1824}
1825
1826static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1827{
1828 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1829 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1830}
1831
1832static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1833{
1834 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1835 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1836}
1837
1838static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1839{
1840 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1841 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1842}
1843
1844static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1845{
1846 if(map<0) emit_readword_indexed(addr, rs, rt);
1847 else {
1848 assert(addr==0);
1849 emit_readword_dualindexedx4(rs, map, rt);
1850 }
1851}
1852
1853static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1854{
1855 if(map<0) {
1856 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1857 emit_readword_indexed(addr+4, rs, rl);
1858 }else{
1859 assert(rh!=rs);
1860 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1861 emit_addimm(map,1,map);
1862 emit_readword_indexed_tlb(addr, rs, map, rl);
1863 }
1864}
1865
1866static void emit_movsbl_indexed(int offset, int rs, int rt)
1867{
1868 assert(offset>-256&&offset<256);
1869 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1870 if(offset>=0) {
1871 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1872 }else{
1873 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1874 }
1875}
1876
1877static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1878{
1879 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1880 else {
1881 if(addr==0) {
1882 emit_shlimm(map,2,map);
1883 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1884 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1885 }else{
1886 assert(addr>-256&&addr<256);
1887 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1888 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1889 emit_movsbl_indexed(addr, rt, rt);
1890 }
1891 }
1892}
1893
1894static void emit_movswl_indexed(int offset, int rs, int rt)
1895{
1896 assert(offset>-256&&offset<256);
1897 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1898 if(offset>=0) {
1899 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1900 }else{
1901 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1902 }
1903}
1904
1905static void emit_movzbl_indexed(int offset, int rs, int rt)
1906{
1907 assert(offset>-4096&&offset<4096);
1908 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1909 if(offset>=0) {
1910 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1911 }else{
1912 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1913 }
1914}
1915
1916static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1917{
1918 assert(rs2>=0);
1919 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1920 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1921}
1922
1923static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1924{
1925 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1926 else {
1927 if(addr==0) {
1928 emit_movzbl_dualindexedx4(rs, map, rt);
1929 }else{
1930 emit_addimm(rs,addr,rt);
1931 emit_movzbl_dualindexedx4(rt, map, rt);
1932 }
1933 }
1934}
1935
1936static void emit_movzwl_indexed(int offset, int rs, int rt)
1937{
1938 assert(offset>-256&&offset<256);
1939 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1940 if(offset>=0) {
1941 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1942 }else{
1943 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1944 }
1945}
1946
1947static void emit_ldrd(int offset, int rs, int rt)
1948{
1949 assert(offset>-256&&offset<256);
1950 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1951 if(offset>=0) {
1952 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1953 }else{
1954 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1955 }
1956}
1957
1958static void emit_readword(void *addr, int rt)
1959{
1960 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1961 assert(offset<4096);
1962 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1963 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1964}
1965
1966static unused void emit_movsbl(int addr, int rt)
1967{
1968 u_int offset = addr-(u_int)&dynarec_local;
1969 assert(offset<256);
1970 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1971 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1972}
1973
1974static unused void emit_movswl(int addr, int rt)
1975{
1976 u_int offset = addr-(u_int)&dynarec_local;
1977 assert(offset<256);
1978 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1979 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1980}
1981
1982static unused void emit_movzbl(int addr, int rt)
1983{
1984 u_int offset = addr-(u_int)&dynarec_local;
1985 assert(offset<4096);
1986 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1987 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1988}
1989
1990static unused void emit_movzwl(int addr, int rt)
1991{
1992 u_int offset = addr-(u_int)&dynarec_local;
1993 assert(offset<256);
1994 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1995 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1996}
1997
1998static void emit_writeword_indexed(int rt, int offset, int rs)
1999{
2000 assert(offset>-4096&&offset<4096);
2001 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
2002 if(offset>=0) {
2003 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
2004 }else{
2005 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
2006 }
2007}
2008
2009static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
2010{
2011 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2012 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2013}
2014
2015static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2016{
2017 if(map<0) emit_writeword_indexed(rt, addr, rs);
2018 else {
2019 assert(addr==0);
2020 emit_writeword_dualindexedx4(rt, rs, map);
2021 }
2022}
2023
2024static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2025{
2026 if(map<0) {
2027 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2028 emit_writeword_indexed(rl, addr+4, rs);
2029 }else{
2030 assert(rh>=0);
2031 if(temp!=rs) emit_addimm(map,1,temp);
2032 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2033 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2034 else {
2035 emit_addimm(rs,4,rs);
2036 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2037 }
2038 }
2039}
2040
2041static void emit_writehword_indexed(int rt, int offset, int rs)
2042{
2043 assert(offset>-256&&offset<256);
2044 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2045 if(offset>=0) {
2046 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2047 }else{
2048 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2049 }
2050}
2051
2052static void emit_writebyte_indexed(int rt, int offset, int rs)
2053{
2054 assert(offset>-4096&&offset<4096);
2055 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2056 if(offset>=0) {
2057 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2058 }else{
2059 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2060 }
2061}
2062
2063static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2064{
2065 assert(rs2>=0);
2066 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2067 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2068}
2069
2070static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2071{
2072 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2073 else {
2074 if(addr==0) {
2075 emit_writebyte_dualindexedx4(rt, rs, map);
2076 }else{
2077 emit_addimm(rs,addr,temp);
2078 emit_writebyte_dualindexedx4(rt, temp, map);
2079 }
2080 }
2081}
2082
2083static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2084{
2085 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2086 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2087}
2088
2089static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2090{
2091 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2092 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2093}
2094
2095static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2096{
2097 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2098 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2099}
2100
2101static void emit_writeword(int rt, void *addr)
2102{
2103 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
2104 assert(offset<4096);
2105 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2106 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2107}
2108
2109static unused void emit_writehword(int rt, void *addr)
2110{
2111 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
2112 assert(offset<256);
2113 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2114 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2115}
2116
2117static unused void emit_writebyte(int rt, void *addr)
2118{
2119 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
2120 assert(offset<4096);
2121 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
2122 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2123}
2124
2125static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2126{
2127 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2128 assert(rs1<16);
2129 assert(rs2<16);
2130 assert(hi<16);
2131 assert(lo<16);
2132 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2133}
2134
2135static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2136{
2137 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2138 assert(rs1<16);
2139 assert(rs2<16);
2140 assert(hi<16);
2141 assert(lo<16);
2142 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2143}
2144
2145static void emit_clz(int rs,int rt)
2146{
2147 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2148 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2149}
2150
2151static void emit_subcs(int rs1,int rs2,int rt)
2152{
2153 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2154 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2155}
2156
2157static void emit_shrcc_imm(int rs,u_int imm,int rt)
2158{
2159 assert(imm>0);
2160 assert(imm<32);
2161 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2162 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2163}
2164
2165static void emit_shrne_imm(int rs,u_int imm,int rt)
2166{
2167 assert(imm>0);
2168 assert(imm<32);
2169 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2170 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2171}
2172
2173static void emit_negmi(int rs, int rt)
2174{
2175 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2176 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2177}
2178
2179static void emit_negsmi(int rs, int rt)
2180{
2181 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2182 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2183}
2184
2185static void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2186{
2187 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2188 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2189}
2190
2191static void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2192{
2193 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2194 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2195}
2196
2197static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2198{
2199 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2200 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2201}
2202
2203static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2204{
2205 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2206 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2207}
2208
2209static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2210{
2211 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2212 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2213}
2214
2215static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2216{
2217 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2218 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2219}
2220
2221static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2222{
2223 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2224 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2225}
2226
2227static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2228{
2229 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2230 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2231}
2232
2233static void emit_teq(int rs, int rt)
2234{
2235 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2236 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2237}
2238
2239static void emit_rsbimm(int rs, int imm, int rt)
2240{
2241 u_int armval;
2242 genimm_checked(imm,&armval);
2243 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2244 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2245}
2246
2247// Load 2 immediates optimizing for small code size
2248static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2249{
2250 emit_movimm(imm1,rt1);
2251 u_int armval;
2252 if(genimm(imm2-imm1,&armval)) {
2253 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2254 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2255 }else if(genimm(imm1-imm2,&armval)) {
2256 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2257 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2258 }
2259 else emit_movimm(imm2,rt2);
2260}
2261
2262// Conditionally select one of two immediates, optimizing for small code size
2263// This will only be called if HAVE_CMOV_IMM is defined
2264static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2265{
2266 u_int armval;
2267 if(genimm(imm2-imm1,&armval)) {
2268 emit_movimm(imm1,rt);
2269 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2270 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2271 }else if(genimm(imm1-imm2,&armval)) {
2272 emit_movimm(imm1,rt);
2273 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2274 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2275 }
2276 else {
2277 #ifndef HAVE_ARMV7
2278 emit_movimm(imm1,rt);
2279 add_literal((int)out,imm2);
2280 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2281 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2282 #else
2283 emit_movw(imm1&0x0000FFFF,rt);
2284 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2285 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2286 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2287 }
2288 emit_movt(imm1&0xFFFF0000,rt);
2289 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2290 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2291 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2292 }
2293 #endif
2294 }
2295}
2296
2297// special case for checking invalid_code
2298static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2299{
2300 assert(imm<128&&imm>=0);
2301 assert(r>=0&&r<16);
2302 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2303 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2304 emit_cmpimm(HOST_TEMPREG,imm);
2305}
2306
2307static void emit_callne(int a)
2308{
2309 assem_debug("blne %x\n",a);
2310 u_int offset=genjmp(a);
2311 output_w32(0x1b000000|offset);
2312}
2313
2314// Used to preload hash table entries
2315static unused void emit_prefetchreg(int r)
2316{
2317 assem_debug("pld %s\n",regname[r]);
2318 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2319}
2320
2321// Special case for mini_ht
2322static void emit_ldreq_indexed(int rs, u_int offset, int rt)
2323{
2324 assert(offset<4096);
2325 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2326 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2327}
2328
2329static unused void emit_bicne_imm(int rs,int imm,int rt)
2330{
2331 u_int armval;
2332 genimm_checked(imm,&armval);
2333 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2334 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2335}
2336
2337static unused void emit_biccs_imm(int rs,int imm,int rt)
2338{
2339 u_int armval;
2340 genimm_checked(imm,&armval);
2341 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2342 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2343}
2344
2345static unused void emit_bicvc_imm(int rs,int imm,int rt)
2346{
2347 u_int armval;
2348 genimm_checked(imm,&armval);
2349 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2350 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2351}
2352
2353static unused void emit_bichi_imm(int rs,int imm,int rt)
2354{
2355 u_int armval;
2356 genimm_checked(imm,&armval);
2357 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2358 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2359}
2360
2361static unused void emit_orrvs_imm(int rs,int imm,int rt)
2362{
2363 u_int armval;
2364 genimm_checked(imm,&armval);
2365 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2366 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2367}
2368
2369static void emit_orrne_imm(int rs,int imm,int rt)
2370{
2371 u_int armval;
2372 genimm_checked(imm,&armval);
2373 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2374 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2375}
2376
2377static void emit_andne_imm(int rs,int imm,int rt)
2378{
2379 u_int armval;
2380 genimm_checked(imm,&armval);
2381 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2382 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2383}
2384
2385static unused void emit_addpl_imm(int rs,int imm,int rt)
2386{
2387 u_int armval;
2388 genimm_checked(imm,&armval);
2389 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2390 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2391}
2392
2393static void emit_jno_unlikely(int a)
2394{
2395 //emit_jno(a);
2396 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2397 output_w32(0x72800000|rd_rn_rm(15,15,0));
2398}
2399
2400static void save_regs_all(u_int reglist)
2401{
2402 int i;
2403 if(!reglist) return;
2404 assem_debug("stmia fp,{");
2405 for(i=0;i<16;i++)
2406 if(reglist&(1<<i))
2407 assem_debug("r%d,",i);
2408 assem_debug("}\n");
2409 output_w32(0xe88b0000|reglist);
2410}
2411
2412static void restore_regs_all(u_int reglist)
2413{
2414 int i;
2415 if(!reglist) return;
2416 assem_debug("ldmia fp,{");
2417 for(i=0;i<16;i++)
2418 if(reglist&(1<<i))
2419 assem_debug("r%d,",i);
2420 assem_debug("}\n");
2421 output_w32(0xe89b0000|reglist);
2422}
2423
2424// Save registers before function call
2425static void save_regs(u_int reglist)
2426{
2427 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
2428 save_regs_all(reglist);
2429}
2430
2431// Restore registers after function call
2432static void restore_regs(u_int reglist)
2433{
2434 reglist&=CALLER_SAVE_REGS;
2435 restore_regs_all(reglist);
2436}
2437
2438/* Stubs/epilogue */
2439
2440static void literal_pool(int n)
2441{
2442 if(!literalcount) return;
2443 if(n) {
2444 if((int)out-literals[0][0]<4096-n) return;
2445 }
2446 u_int *ptr;
2447 int i;
2448 for(i=0;i<literalcount;i++)
2449 {
2450 u_int l_addr=(u_int)out;
2451 int j;
2452 for(j=0;j<i;j++) {
2453 if(literals[j][1]==literals[i][1]) {
2454 //printf("dup %08x\n",literals[i][1]);
2455 l_addr=literals[j][0];
2456 break;
2457 }
2458 }
2459 ptr=(u_int *)literals[i][0];
2460 u_int offset=l_addr-(u_int)ptr-8;
2461 assert(offset<4096);
2462 assert(!(offset&3));
2463 *ptr|=offset;
2464 if(l_addr==(u_int)out) {
2465 literals[i][0]=l_addr; // remember for dupes
2466 output_w32(literals[i][1]);
2467 }
2468 }
2469 literalcount=0;
2470}
2471
2472static void literal_pool_jumpover(int n)
2473{
2474 if(!literalcount) return;
2475 if(n) {
2476 if((int)out-literals[0][0]<4096-n) return;
2477 }
2478 void *jaddr = out;
2479 emit_jmp(0);
2480 literal_pool(0);
2481 set_jump_target(jaddr, out);
2482}
2483
2484static void emit_extjump2(u_char *addr, int target, void *linker)
2485{
2486 u_char *ptr=(u_char *)addr;
2487 assert((ptr[3]&0x0e)==0xa);
2488 (void)ptr;
2489
2490 emit_loadlp(target,0);
2491 emit_loadlp((u_int)addr,1);
2492 assert(addr>=translation_cache&&addr<(translation_cache+(1<<TARGET_SIZE_2)));
2493 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2494//DEBUG >
2495#ifdef DEBUG_CYCLE_COUNT
2496 emit_readword(&last_count,ECX);
2497 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2498 emit_readword(&next_interupt,ECX);
2499 emit_writeword(HOST_CCREG,&Count);
2500 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2501 emit_writeword(ECX,&last_count);
2502#endif
2503//DEBUG <
2504 emit_jmp(linker);
2505}
2506
2507static void emit_extjump(void *addr, int target)
2508{
2509 emit_extjump2(addr, target, dyna_linker);
2510}
2511
2512static void emit_extjump_ds(void *addr, int target)
2513{
2514 emit_extjump2(addr, target, dyna_linker_ds);
2515}
2516
2517// put rt_val into rt, potentially making use of rs with value rs_val
2518static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2519{
2520 u_int armval;
2521 int diff;
2522 if(genimm(rt_val,&armval)) {
2523 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2524 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2525 return;
2526 }
2527 if(genimm(~rt_val,&armval)) {
2528 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2529 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2530 return;
2531 }
2532 diff=rt_val-rs_val;
2533 if(genimm(diff,&armval)) {
2534 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2535 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2536 return;
2537 }else if(genimm(-diff,&armval)) {
2538 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2539 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2540 return;
2541 }
2542 emit_movimm(rt_val,rt);
2543}
2544
2545// return 1 if above function can do it's job cheaply
2546static int is_similar_value(u_int v1,u_int v2)
2547{
2548 u_int xs;
2549 int diff;
2550 if(v1==v2) return 1;
2551 diff=v2-v1;
2552 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
2553 ;
2554 if(xs<0x100) return 1;
2555 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2556 ;
2557 if(xs<0x100) return 1;
2558 return 0;
2559}
2560
2561// trashes r2
2562static void pass_args(int a0, int a1)
2563{
2564 if(a0==1&&a1==0) {
2565 // must swap
2566 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2567 }
2568 else if(a0!=0&&a1==0) {
2569 emit_mov(a1,1);
2570 if (a0>=0) emit_mov(a0,0);
2571 }
2572 else {
2573 if(a0>=0&&a0!=0) emit_mov(a0,0);
2574 if(a1>=0&&a1!=1) emit_mov(a1,1);
2575 }
2576}
2577
2578static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
2579{
2580 switch(type) {
2581 case LOADB_STUB: emit_signextend8(rs,rt); break;
2582 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2583 case LOADH_STUB: emit_signextend16(rs,rt); break;
2584 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2585 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2586 default: assert(0);
2587 }
2588}
2589
2590#include "pcsxmem.h"
2591#include "pcsxmem_inline.c"
2592
2593static void do_readstub(int n)
2594{
2595 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
2596 literal_pool(256);
2597 set_jump_target(stubs[n].addr, out);
2598 enum stub_type type=stubs[n].type;
2599 int i=stubs[n].a;
2600 int rs=stubs[n].b;
2601 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2602 u_int reglist=stubs[n].e;
2603 signed char *i_regmap=i_regs->regmap;
2604 int rt;
2605 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
2606 rt=get_reg(i_regmap,FTEMP);
2607 }else{
2608 rt=get_reg(i_regmap,rt1[i]);
2609 }
2610 assert(rs>=0);
2611 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
2612 void *restore_jump = NULL;
2613 reglist|=(1<<rs);
2614 for(r=0;r<=12;r++) {
2615 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2616 temp=r; break;
2617 }
2618 }
2619 if(rt>=0&&rt1[i]!=0)
2620 reglist&=~(1<<rt);
2621 if(temp==-1) {
2622 save_regs(reglist);
2623 regs_saved=1;
2624 temp=(rs==0)?2:0;
2625 }
2626 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2627 temp2=1;
2628 emit_readword(&mem_rtab,temp);
2629 emit_shrimm(rs,12,temp2);
2630 emit_readword_dualindexedx4(temp,temp2,temp2);
2631 emit_lsls_imm(temp2,1,temp2);
2632 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2633 switch(type) {
2634 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2635 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2636 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2637 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2638 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2639 default: assert(0);
2640 }
2641 }
2642 if(regs_saved) {
2643 restore_jump=out;
2644 emit_jcc(0); // jump to reg restore
2645 }
2646 else
2647 emit_jcc(stubs[n].retaddr); // return address
2648
2649 if(!regs_saved)
2650 save_regs(reglist);
2651 void *handler=NULL;
2652 if(type==LOADB_STUB||type==LOADBU_STUB)
2653 handler=jump_handler_read8;
2654 if(type==LOADH_STUB||type==LOADHU_STUB)
2655 handler=jump_handler_read16;
2656 if(type==LOADW_STUB)
2657 handler=jump_handler_read32;
2658 assert(handler);
2659 pass_args(rs,temp2);
2660 int cc=get_reg(i_regmap,CCREG);
2661 if(cc<0)
2662 emit_loadreg(CCREG,2);
2663 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
2664 emit_call(handler);
2665 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2666 mov_loadtype_adj(type,0,rt);
2667 }
2668 if(restore_jump)
2669 set_jump_target(restore_jump, out);
2670 restore_regs(reglist);
2671 emit_jmp(stubs[n].retaddr); // return address
2672}
2673
2674// return memhandler, or get directly accessable address and return 0
2675static void *get_direct_memhandler(void *table,u_int addr,enum stub_type type,u_int *addr_host)
2676{
2677 u_int l1,l2=0;
2678 l1=((u_int *)table)[addr>>12];
2679 if((l1&(1<<31))==0) {
2680 u_int v=l1<<1;
2681 *addr_host=v+addr;
2682 return NULL;
2683 }
2684 else {
2685 l1<<=1;
2686 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2687 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
2688 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
2689 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2690 else
2691 l2=((u_int *)l1)[(addr&0xfff)/4];
2692 if((l2&(1<<31))==0) {
2693 u_int v=l2<<1;
2694 *addr_host=v+(addr&0xfff);
2695 return NULL;
2696 }
2697 return (void *)(l2<<1);
2698 }
2699}
2700
2701static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2702{
2703 int rs=get_reg(regmap,target);
2704 int rt=get_reg(regmap,target);
2705 if(rs<0) rs=get_reg(regmap,-1);
2706 assert(rs>=0);
2707 u_int host_addr=0,is_dynamic,far_call=0;
2708 void *handler;
2709 int cc=get_reg(regmap,CCREG);
2710 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2711 return;
2712 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
2713 if (handler == NULL) {
2714 if(rt<0||rt1[i]==0)
2715 return;
2716 if(addr!=host_addr)
2717 emit_movimm_from(addr,rs,host_addr,rs);
2718 switch(type) {
2719 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2720 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2721 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2722 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2723 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2724 default: assert(0);
2725 }
2726 return;
2727 }
2728 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2729 if(is_dynamic) {
2730 if(type==LOADB_STUB||type==LOADBU_STUB)
2731 handler=jump_handler_read8;
2732 if(type==LOADH_STUB||type==LOADHU_STUB)
2733 handler=jump_handler_read16;
2734 if(type==LOADW_STUB)
2735 handler=jump_handler_read32;
2736 }
2737
2738 // call a memhandler
2739 if(rt>=0&&rt1[i]!=0)
2740 reglist&=~(1<<rt);
2741 save_regs(reglist);
2742 if(target==0)
2743 emit_movimm(addr,0);
2744 else if(rs!=0)
2745 emit_mov(rs,0);
2746 int offset=(u_char *)handler-out-8;
2747 if(offset<-33554432||offset>=33554432) {
2748 // unreachable memhandler, a plugin func perhaps
2749 emit_movimm((u_int)handler,12);
2750 far_call=1;
2751 }
2752 if(cc<0)
2753 emit_loadreg(CCREG,2);
2754 if(is_dynamic) {
2755 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2756 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2757 }
2758 else {
2759 emit_readword(&last_count,3);
2760 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2761 emit_add(2,3,2);
2762 emit_writeword(2,&Count);
2763 }
2764
2765 if(far_call)
2766 emit_callreg(12);
2767 else
2768 emit_call(handler);
2769
2770 if(rt>=0&&rt1[i]!=0) {
2771 switch(type) {
2772 case LOADB_STUB: emit_signextend8(0,rt); break;
2773 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2774 case LOADH_STUB: emit_signextend16(0,rt); break;
2775 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2776 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2777 default: assert(0);
2778 }
2779 }
2780 restore_regs(reglist);
2781}
2782
2783static void do_writestub(int n)
2784{
2785 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
2786 literal_pool(256);
2787 set_jump_target(stubs[n].addr, out);
2788 enum stub_type type=stubs[n].type;
2789 int i=stubs[n].a;
2790 int rs=stubs[n].b;
2791 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2792 u_int reglist=stubs[n].e;
2793 signed char *i_regmap=i_regs->regmap;
2794 int rt,r;
2795 if(itype[i]==C1LS||itype[i]==C2LS) {
2796 rt=get_reg(i_regmap,r=FTEMP);
2797 }else{
2798 rt=get_reg(i_regmap,r=rs2[i]);
2799 }
2800 assert(rs>=0);
2801 assert(rt>=0);
2802 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
2803 void *restore_jump = NULL;
2804 int reglist2=reglist|(1<<rs)|(1<<rt);
2805 for(rtmp=0;rtmp<=12;rtmp++) {
2806 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2807 temp=rtmp; break;
2808 }
2809 }
2810 if(temp==-1) {
2811 save_regs(reglist);
2812 regs_saved=1;
2813 for(rtmp=0;rtmp<=3;rtmp++)
2814 if(rtmp!=rs&&rtmp!=rt)
2815 {temp=rtmp;break;}
2816 }
2817 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2818 temp2=3;
2819 emit_readword(&mem_wtab,temp);
2820 emit_shrimm(rs,12,temp2);
2821 emit_readword_dualindexedx4(temp,temp2,temp2);
2822 emit_lsls_imm(temp2,1,temp2);
2823 switch(type) {
2824 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2825 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2826 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2827 default: assert(0);
2828 }
2829 if(regs_saved) {
2830 restore_jump=out;
2831 emit_jcc(0); // jump to reg restore
2832 }
2833 else
2834 emit_jcc(stubs[n].retaddr); // return address (invcode check)
2835
2836 if(!regs_saved)
2837 save_regs(reglist);
2838 void *handler=NULL;
2839 switch(type) {
2840 case STOREB_STUB: handler=jump_handler_write8; break;
2841 case STOREH_STUB: handler=jump_handler_write16; break;
2842 case STOREW_STUB: handler=jump_handler_write32; break;
2843 default: assert(0);
2844 }
2845 assert(handler);
2846 pass_args(rs,rt);
2847 if(temp2!=3)
2848 emit_mov(temp2,3);
2849 int cc=get_reg(i_regmap,CCREG);
2850 if(cc<0)
2851 emit_loadreg(CCREG,2);
2852 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
2853 // returns new cycle_count
2854 emit_call(handler);
2855 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
2856 if(cc<0)
2857 emit_storereg(CCREG,2);
2858 if(restore_jump)
2859 set_jump_target(restore_jump, out);
2860 restore_regs(reglist);
2861 emit_jmp(stubs[n].retaddr);
2862}
2863
2864static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2865{
2866 int rs=get_reg(regmap,-1);
2867 int rt=get_reg(regmap,target);
2868 assert(rs>=0);
2869 assert(rt>=0);
2870 u_int host_addr=0;
2871 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
2872 if (handler == NULL) {
2873 if(addr!=host_addr)
2874 emit_movimm_from(addr,rs,host_addr,rs);
2875 switch(type) {
2876 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2877 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2878 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2879 default: assert(0);
2880 }
2881 return;
2882 }
2883
2884 // call a memhandler
2885 save_regs(reglist);
2886 pass_args(rs,rt);
2887 int cc=get_reg(regmap,CCREG);
2888 if(cc<0)
2889 emit_loadreg(CCREG,2);
2890 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2891 emit_movimm((u_int)handler,3);
2892 // returns new cycle_count
2893 emit_call(jump_handler_write_h);
2894 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
2895 if(cc<0)
2896 emit_storereg(CCREG,2);
2897 restore_regs(reglist);
2898}
2899
2900static void do_unalignedwritestub(int n)
2901{
2902 assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
2903 literal_pool(256);
2904 set_jump_target(stubs[n].addr, out);
2905
2906 int i=stubs[n].a;
2907 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2908 int addr=stubs[n].b;
2909 u_int reglist=stubs[n].e;
2910 signed char *i_regmap=i_regs->regmap;
2911 int temp2=get_reg(i_regmap,FTEMP);
2912 int rt;
2913 rt=get_reg(i_regmap,rs2[i]);
2914 assert(rt>=0);
2915 assert(addr>=0);
2916 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2917 reglist|=(1<<addr);
2918 reglist&=~(1<<temp2);
2919
2920#if 1
2921 // don't bother with it and call write handler
2922 save_regs(reglist);
2923 pass_args(addr,rt);
2924 int cc=get_reg(i_regmap,CCREG);
2925 if(cc<0)
2926 emit_loadreg(CCREG,2);
2927 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
2928 emit_call((opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2929 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
2930 if(cc<0)
2931 emit_storereg(CCREG,2);
2932 restore_regs(reglist);
2933 emit_jmp(stubs[n].retaddr); // return address
2934#else
2935 emit_andimm(addr,0xfffffffc,temp2);
2936 emit_writeword(temp2,&address);
2937
2938 save_regs(reglist);
2939 emit_shrimm(addr,16,1);
2940 int cc=get_reg(i_regmap,CCREG);
2941 if(cc<0) {
2942 emit_loadreg(CCREG,2);
2943 }
2944 emit_movimm((u_int)readmem,0);
2945 emit_addimm(cc<0?2:cc,2*stubs[n].d+2,2);
2946 emit_call((int)&indirect_jump_indexed);
2947 restore_regs(reglist);
2948
2949 emit_readword(&readmem_dword,temp2);
2950 int temp=addr; //hmh
2951 emit_shlimm(addr,3,temp);
2952 emit_andimm(temp,24,temp);
2953#ifdef BIG_ENDIAN_MIPS
2954 if (opcode[i]==0x2e) // SWR
2955#else
2956 if (opcode[i]==0x2a) // SWL
2957#endif
2958 emit_xorimm(temp,24,temp);
2959 emit_movimm(-1,HOST_TEMPREG);
2960 if (opcode[i]==0x2a) { // SWL
2961 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2962 emit_orrshr(rt,temp,temp2);
2963 }else{
2964 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2965 emit_orrshl(rt,temp,temp2);
2966 }
2967 emit_readword(&address,addr);
2968 emit_writeword(temp2,&word);
2969 //save_regs(reglist); // don't need to, no state changes
2970 emit_shrimm(addr,16,1);
2971 emit_movimm((u_int)writemem,0);
2972 //emit_call((int)&indirect_jump_indexed);
2973 emit_mov(15,14);
2974 emit_readword_dualindexedx4(0,1,15);
2975 emit_readword(&Count,HOST_TEMPREG);
2976 emit_readword(&next_interupt,2);
2977 emit_addimm(HOST_TEMPREG,-2*stubs[n].d-2,HOST_TEMPREG);
2978 emit_writeword(2,&last_count);
2979 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2980 if(cc<0) {
2981 emit_storereg(CCREG,HOST_TEMPREG);
2982 }
2983 restore_regs(reglist);
2984 emit_jmp(stubs[n].retaddr); // return address
2985#endif
2986}
2987
2988static void do_invstub(int n)
2989{
2990 literal_pool(20);
2991 u_int reglist=stubs[n].a;
2992 set_jump_target(stubs[n].addr, out);
2993 save_regs(reglist);
2994 if(stubs[n].b!=0) emit_mov(stubs[n].b,0);
2995 emit_call(&invalidate_addr);
2996 restore_regs(reglist);
2997 emit_jmp(stubs[n].retaddr); // return address
2998}
2999
3000void *do_dirty_stub(int i)
3001{
3002 assem_debug("do_dirty_stub %x\n",start+i*4);
3003 u_int addr=(u_int)source;
3004 // Careful about the code output here, verify_dirty needs to parse it.
3005 #ifndef HAVE_ARMV7
3006 emit_loadlp(addr,1);
3007 emit_loadlp((int)copy,2);
3008 emit_loadlp(slen*4,3);
3009 #else
3010 emit_movw(addr&0x0000FFFF,1);
3011 emit_movw(((u_int)copy)&0x0000FFFF,2);
3012 emit_movt(addr&0xFFFF0000,1);
3013 emit_movt(((u_int)copy)&0xFFFF0000,2);
3014 emit_movw(slen*4,3);
3015 #endif
3016 emit_movimm(start+i*4,0);
3017 emit_call((int)start<(int)0xC0000000?&verify_code:&verify_code_vm);
3018 void *entry = out;
3019 load_regs_entry(i);
3020 if (entry == out)
3021 entry = instr_addr[i];
3022 emit_jmp(instr_addr[i]);
3023 return entry;
3024}
3025
3026static void do_dirty_stub_ds()
3027{
3028 // Careful about the code output here, verify_dirty needs to parse it.
3029 #ifndef HAVE_ARMV7
3030 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3031 emit_loadlp((int)copy,2);
3032 emit_loadlp(slen*4,3);
3033 #else
3034 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3035 emit_movw(((u_int)copy)&0x0000FFFF,2);
3036 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3037 emit_movt(((u_int)copy)&0xFFFF0000,2);
3038 emit_movw(slen*4,3);
3039 #endif
3040 emit_movimm(start+1,0);
3041 emit_call(&verify_code_ds);
3042}
3043
3044static void do_cop1stub(int n)
3045{
3046 literal_pool(256);
3047 assem_debug("do_cop1stub %x\n",start+stubs[n].a*4);
3048 set_jump_target(stubs[n].addr, out);
3049 int i=stubs[n].a;
3050// int rs=stubs[n].b;
3051 struct regstat *i_regs=(struct regstat *)stubs[n].c;
3052 int ds=stubs[n].d;
3053 if(!ds) {
3054 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3055 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3056 }
3057 //else {printf("fp exception in delay slot\n");}
3058 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3059 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3060 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3061 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3062 emit_jmp(ds?fp_exception_ds:fp_exception);
3063}
3064
3065/* Special assem */
3066
3067static void shift_assemble_arm(int i,struct regstat *i_regs)
3068{
3069 if(rt1[i]) {
3070 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3071 {
3072 signed char s,t,shift;
3073 t=get_reg(i_regs->regmap,rt1[i]);
3074 s=get_reg(i_regs->regmap,rs1[i]);
3075 shift=get_reg(i_regs->regmap,rs2[i]);
3076 if(t>=0){
3077 if(rs1[i]==0)
3078 {
3079 emit_zeroreg(t);
3080 }
3081 else if(rs2[i]==0)
3082 {
3083 assert(s>=0);
3084 if(s!=t) emit_mov(s,t);
3085 }
3086 else
3087 {
3088 emit_andimm(shift,31,HOST_TEMPREG);
3089 if(opcode2[i]==4) // SLLV
3090 {
3091 emit_shl(s,HOST_TEMPREG,t);
3092 }
3093 if(opcode2[i]==6) // SRLV
3094 {
3095 emit_shr(s,HOST_TEMPREG,t);
3096 }
3097 if(opcode2[i]==7) // SRAV
3098 {
3099 emit_sar(s,HOST_TEMPREG,t);
3100 }
3101 }
3102 }
3103 } else { // DSLLV/DSRLV/DSRAV
3104 signed char sh,sl,th,tl,shift;
3105 th=get_reg(i_regs->regmap,rt1[i]|64);
3106 tl=get_reg(i_regs->regmap,rt1[i]);
3107 sh=get_reg(i_regs->regmap,rs1[i]|64);
3108 sl=get_reg(i_regs->regmap,rs1[i]);
3109 shift=get_reg(i_regs->regmap,rs2[i]);
3110 if(tl>=0){
3111 if(rs1[i]==0)
3112 {
3113 emit_zeroreg(tl);
3114 if(th>=0) emit_zeroreg(th);
3115 }
3116 else if(rs2[i]==0)
3117 {
3118 assert(sl>=0);
3119 if(sl!=tl) emit_mov(sl,tl);
3120 if(th>=0&&sh!=th) emit_mov(sh,th);
3121 }
3122 else
3123 {
3124 // FIXME: What if shift==tl ?
3125 assert(shift!=tl);
3126 int temp=get_reg(i_regs->regmap,-1);
3127 int real_th=th;
3128 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3129 assert(sl>=0);
3130 assert(sh>=0);
3131 emit_andimm(shift,31,HOST_TEMPREG);
3132 if(opcode2[i]==0x14) // DSLLV
3133 {
3134 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3135 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3136 emit_orrshr(sl,HOST_TEMPREG,th);
3137 emit_andimm(shift,31,HOST_TEMPREG);
3138 emit_testimm(shift,32);
3139 emit_shl(sl,HOST_TEMPREG,tl);
3140 if(th>=0) emit_cmovne_reg(tl,th);
3141 emit_cmovne_imm(0,tl);
3142 }
3143 if(opcode2[i]==0x16) // DSRLV
3144 {
3145 assert(th>=0);
3146 emit_shr(sl,HOST_TEMPREG,tl);
3147 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3148 emit_orrshl(sh,HOST_TEMPREG,tl);
3149 emit_andimm(shift,31,HOST_TEMPREG);
3150 emit_testimm(shift,32);
3151 emit_shr(sh,HOST_TEMPREG,th);
3152 emit_cmovne_reg(th,tl);
3153 if(real_th>=0) emit_cmovne_imm(0,th);
3154 }
3155 if(opcode2[i]==0x17) // DSRAV
3156 {
3157 assert(th>=0);
3158 emit_shr(sl,HOST_TEMPREG,tl);
3159 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3160 if(real_th>=0) {
3161 assert(temp>=0);
3162 emit_sarimm(th,31,temp);
3163 }
3164 emit_orrshl(sh,HOST_TEMPREG,tl);
3165 emit_andimm(shift,31,HOST_TEMPREG);
3166 emit_testimm(shift,32);
3167 emit_sar(sh,HOST_TEMPREG,th);
3168 emit_cmovne_reg(th,tl);
3169 if(real_th>=0) emit_cmovne_reg(temp,th);
3170 }
3171 }
3172 }
3173 }
3174 }
3175}
3176
3177static void speculate_mov(int rs,int rt)
3178{
3179 if(rt!=0) {
3180 smrv_strong_next|=1<<rt;
3181 smrv[rt]=smrv[rs];
3182 }
3183}
3184
3185static void speculate_mov_weak(int rs,int rt)
3186{
3187 if(rt!=0) {
3188 smrv_weak_next|=1<<rt;
3189 smrv[rt]=smrv[rs];
3190 }
3191}
3192
3193static void speculate_register_values(int i)
3194{
3195 if(i==0) {
3196 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3197 // gp,sp are likely to stay the same throughout the block
3198 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3199 smrv_weak_next=~smrv_strong_next;
3200 //printf(" llr %08x\n", smrv[4]);
3201 }
3202 smrv_strong=smrv_strong_next;
3203 smrv_weak=smrv_weak_next;
3204 switch(itype[i]) {
3205 case ALU:
3206 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3207 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3208 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3209 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3210 else {
3211 smrv_strong_next&=~(1<<rt1[i]);
3212 smrv_weak_next&=~(1<<rt1[i]);
3213 }
3214 break;
3215 case SHIFTIMM:
3216 smrv_strong_next&=~(1<<rt1[i]);
3217 smrv_weak_next&=~(1<<rt1[i]);
3218 // fallthrough
3219 case IMM16:
3220 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3221 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3222 if(hr>=0) {
3223 if(get_final_value(hr,i,&value))
3224 smrv[rt1[i]]=value;
3225 else smrv[rt1[i]]=constmap[i][hr];
3226 smrv_strong_next|=1<<rt1[i];
3227 }
3228 }
3229 else {
3230 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3231 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3232 }
3233 break;
3234 case LOAD:
3235 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3236 // special case for BIOS
3237 smrv[rt1[i]]=0xa0000000;
3238 smrv_strong_next|=1<<rt1[i];
3239 break;
3240 }
3241 // fallthrough
3242 case SHIFT:
3243 case LOADLR:
3244 case MOV:
3245 smrv_strong_next&=~(1<<rt1[i]);
3246 smrv_weak_next&=~(1<<rt1[i]);
3247 break;
3248 case COP0:
3249 case COP2:
3250 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3251 smrv_strong_next&=~(1<<rt1[i]);
3252 smrv_weak_next&=~(1<<rt1[i]);
3253 }
3254 break;
3255 case C2LS:
3256 if (opcode[i]==0x32) { // LWC2
3257 smrv_strong_next&=~(1<<rt1[i]);
3258 smrv_weak_next&=~(1<<rt1[i]);
3259 }
3260 break;
3261 }
3262#if 0
3263 int r=4;
3264 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3265 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3266#endif
3267}
3268
3269enum {
3270 MTYPE_8000 = 0,
3271 MTYPE_8020,
3272 MTYPE_0000,
3273 MTYPE_A000,
3274 MTYPE_1F80,
3275};
3276
3277static int get_ptr_mem_type(u_int a)
3278{
3279 if(a < 0x00200000) {
3280 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3281 // return wrong, must use memhandler for BIOS self-test to pass
3282 // 007 does similar stuff from a00 mirror, weird stuff
3283 return MTYPE_8000;
3284 return MTYPE_0000;
3285 }
3286 if(0x1f800000 <= a && a < 0x1f801000)
3287 return MTYPE_1F80;
3288 if(0x80200000 <= a && a < 0x80800000)
3289 return MTYPE_8020;
3290 if(0xa0000000 <= a && a < 0xa0200000)
3291 return MTYPE_A000;
3292 return MTYPE_8000;
3293}
3294
3295static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3296{
3297 void *jaddr = NULL;
3298 int type=0;
3299 int mr=rs1[i];
3300 if(((smrv_strong|smrv_weak)>>mr)&1) {
3301 type=get_ptr_mem_type(smrv[mr]);
3302 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3303 }
3304 else {
3305 // use the mirror we are running on
3306 type=get_ptr_mem_type(start);
3307 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3308 }
3309
3310 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3311 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3312 addr=*addr_reg_override=HOST_TEMPREG;
3313 type=0;
3314 }
3315 else if(type==MTYPE_0000) { // RAM 0 mirror
3316 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3317 addr=*addr_reg_override=HOST_TEMPREG;
3318 type=0;
3319 }
3320 else if(type==MTYPE_A000) { // RAM A mirror
3321 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3322 addr=*addr_reg_override=HOST_TEMPREG;
3323 type=0;
3324 }
3325 else if(type==MTYPE_1F80) { // scratchpad
3326 if (psxH == (void *)0x1f800000) {
3327 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3328 emit_cmpimm(HOST_TEMPREG,0x1000);
3329 jaddr=out;
3330 emit_jc(0);
3331 }
3332 else {
3333 // do usual RAM check, jump will go to the right handler
3334 type=0;
3335 }
3336 }
3337
3338 if(type==0)
3339 {
3340 emit_cmpimm(addr,RAM_SIZE);
3341 jaddr=out;
3342 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3343 // Hint to branch predictor that the branch is unlikely to be taken
3344 if(rs1[i]>=28)
3345 emit_jno_unlikely(0);
3346 else
3347 #endif
3348 emit_jno(0);
3349 if(ram_offset!=0) {
3350 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3351 addr=*addr_reg_override=HOST_TEMPREG;
3352 }
3353 }
3354
3355 return jaddr;
3356}
3357
3358#define shift_assemble shift_assemble_arm
3359
3360static void loadlr_assemble_arm(int i,struct regstat *i_regs)
3361{
3362 int s,th,tl,temp,temp2,addr,map=-1;
3363 int offset;
3364 void *jaddr=0;
3365 int memtarget=0,c=0;
3366 int fastload_reg_override=0;
3367 u_int hr,reglist=0;
3368 th=get_reg(i_regs->regmap,rt1[i]|64);
3369 tl=get_reg(i_regs->regmap,rt1[i]);
3370 s=get_reg(i_regs->regmap,rs1[i]);
3371 temp=get_reg(i_regs->regmap,-1);
3372 temp2=get_reg(i_regs->regmap,FTEMP);
3373 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3374 assert(addr<0);
3375 offset=imm[i];
3376 for(hr=0;hr<HOST_REGS;hr++) {
3377 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3378 }
3379 reglist|=1<<temp;
3380 if(offset||s<0||c) addr=temp2;
3381 else addr=s;
3382 if(s>=0) {
3383 c=(i_regs->wasconst>>s)&1;
3384 if(c) {
3385 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3386 }
3387 }
3388 if(!c) {
3389 #ifdef RAM_OFFSET
3390 map=get_reg(i_regs->regmap,ROREG);
3391 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3392 #endif
3393 emit_shlimm(addr,3,temp);
3394 if (opcode[i]==0x22||opcode[i]==0x26) {
3395 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3396 }else{
3397 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3398 }
3399 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3400 }
3401 else {
3402 if(ram_offset&&memtarget) {
3403 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
3404 fastload_reg_override=HOST_TEMPREG;
3405 }
3406 if (opcode[i]==0x22||opcode[i]==0x26) {
3407 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3408 }else{
3409 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3410 }
3411 }
3412 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3413 if(!c||memtarget) {
3414 int a=temp2;
3415 if(fastload_reg_override) a=fastload_reg_override;
3416 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3417 emit_readword_indexed_tlb(0,a,map,temp2);
3418 if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
3419 }
3420 else
3421 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3422 if(rt1[i]) {
3423 assert(tl>=0);
3424 emit_andimm(temp,24,temp);
3425#ifdef BIG_ENDIAN_MIPS
3426 if (opcode[i]==0x26) // LWR
3427#else
3428 if (opcode[i]==0x22) // LWL
3429#endif
3430 emit_xorimm(temp,24,temp);
3431 emit_movimm(-1,HOST_TEMPREG);
3432 if (opcode[i]==0x26) {
3433 emit_shr(temp2,temp,temp2);
3434 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3435 }else{
3436 emit_shl(temp2,temp,temp2);
3437 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3438 }
3439 emit_or(temp2,tl,tl);
3440 }
3441 //emit_storereg(rt1[i],tl); // DEBUG
3442 }
3443 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3444 // FIXME: little endian, fastload_reg_override
3445 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3446 if(!c||memtarget) {
3447 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3448 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3449 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3450 if(jaddr) add_stub_r(LOADD_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
3451 }
3452 else
3453 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3454 if(rt1[i]) {
3455 assert(th>=0);
3456 assert(tl>=0);
3457 emit_testimm(temp,32);
3458 emit_andimm(temp,24,temp);
3459 if (opcode[i]==0x1A) { // LDL
3460 emit_rsbimm(temp,32,HOST_TEMPREG);
3461 emit_shl(temp2h,temp,temp2h);
3462 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3463 emit_movimm(-1,HOST_TEMPREG);
3464 emit_shl(temp2,temp,temp2);
3465 emit_cmove_reg(temp2h,th);
3466 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3467 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3468 emit_orreq(temp2,tl,tl);
3469 emit_orrne(temp2,th,th);
3470 }
3471 if (opcode[i]==0x1B) { // LDR
3472 emit_xorimm(temp,24,temp);
3473 emit_rsbimm(temp,32,HOST_TEMPREG);
3474 emit_shr(temp2,temp,temp2);
3475 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3476 emit_movimm(-1,HOST_TEMPREG);
3477 emit_shr(temp2h,temp,temp2h);
3478 emit_cmovne_reg(temp2,tl);
3479 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3480 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3481 emit_orrne(temp2h,th,th);
3482 emit_orreq(temp2h,tl,tl);
3483 }
3484 }
3485 }
3486}
3487#define loadlr_assemble loadlr_assemble_arm
3488
3489static void cop0_assemble(int i,struct regstat *i_regs)
3490{
3491 if(opcode2[i]==0) // MFC0
3492 {
3493 signed char t=get_reg(i_regs->regmap,rt1[i]);
3494 u_int copr=(source[i]>>11)&0x1f;
3495 //assert(t>=0); // Why does this happen? OOT is weird
3496 if(t>=0&&rt1[i]!=0) {
3497 emit_readword(&reg_cop0[copr],t);
3498 }
3499 }
3500 else if(opcode2[i]==4) // MTC0
3501 {
3502 signed char s=get_reg(i_regs->regmap,rs1[i]);
3503 char copr=(source[i]>>11)&0x1f;
3504 assert(s>=0);
3505 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3506 if(copr==9||copr==11||copr==12||copr==13) {
3507 emit_readword(&last_count,HOST_TEMPREG);
3508 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3509 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3510 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
3511 emit_writeword(HOST_CCREG,&Count);
3512 }
3513 // What a mess. The status register (12) can enable interrupts,
3514 // so needs a special case to handle a pending interrupt.
3515 // The interrupt must be taken immediately, because a subsequent
3516 // instruction might disable interrupts again.
3517 if(copr==12||copr==13) {
3518 if (is_delayslot) {
3519 // burn cycles to cause cc_interrupt, which will
3520 // reschedule next_interupt. Relies on CCREG from above.
3521 assem_debug("MTC0 DS %d\n", copr);
3522 emit_writeword(HOST_CCREG,&last_count);
3523 emit_movimm(0,HOST_CCREG);
3524 emit_storereg(CCREG,HOST_CCREG);
3525 emit_loadreg(rs1[i],1);
3526 emit_movimm(copr,0);
3527 emit_call(pcsx_mtc0_ds);
3528 emit_loadreg(rs1[i],s);
3529 return;
3530 }
3531 emit_movimm(start+i*4+4,HOST_TEMPREG);
3532 emit_writeword(HOST_TEMPREG,&pcaddr);
3533 emit_movimm(0,HOST_TEMPREG);
3534 emit_writeword(HOST_TEMPREG,&pending_exception);
3535 }
3536 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3537 //else
3538 if(s==HOST_CCREG)
3539 emit_loadreg(rs1[i],1);
3540 else if(s!=1)
3541 emit_mov(s,1);
3542 emit_movimm(copr,0);
3543 emit_call(pcsx_mtc0);
3544 if(copr==9||copr==11||copr==12||copr==13) {
3545 emit_readword(&Count,HOST_CCREG);
3546 emit_readword(&next_interupt,HOST_TEMPREG);
3547 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
3548 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3549 emit_writeword(HOST_TEMPREG,&last_count);
3550 emit_storereg(CCREG,HOST_CCREG);
3551 }
3552 if(copr==12||copr==13) {
3553 assert(!is_delayslot);
3554 emit_readword(&pending_exception,14);
3555 emit_test(14,14);
3556 emit_jne(&do_interrupt);
3557 }
3558 emit_loadreg(rs1[i],s);
3559 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3560 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3561 cop1_usable=0;
3562 }
3563 else
3564 {
3565 assert(opcode2[i]==0x10);
3566 if((source[i]&0x3f)==0x10) // RFE
3567 {
3568 emit_readword(&Status,0);
3569 emit_andimm(0,0x3c,1);
3570 emit_andimm(0,~0xf,0);
3571 emit_orrshr_imm(1,2,0);
3572 emit_writeword(0,&Status);
3573 }
3574 }
3575}
3576
3577static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3578{
3579 switch (copr) {
3580 case 1:
3581 case 3:
3582 case 5:
3583 case 8:
3584 case 9:
3585 case 10:
3586 case 11:
3587 emit_readword(&reg_cop2d[copr],tl);
3588 emit_signextend16(tl,tl);
3589 emit_writeword(tl,&reg_cop2d[copr]); // hmh
3590 break;
3591 case 7:
3592 case 16:
3593 case 17:
3594 case 18:
3595 case 19:
3596 emit_readword(&reg_cop2d[copr],tl);
3597 emit_andimm(tl,0xffff,tl);
3598 emit_writeword(tl,&reg_cop2d[copr]);
3599 break;
3600 case 15:
3601 emit_readword(&reg_cop2d[14],tl); // SXY2
3602 emit_writeword(tl,&reg_cop2d[copr]);
3603 break;
3604 case 28:
3605 case 29:
3606 emit_readword(&reg_cop2d[9],temp);
3607 emit_testimm(temp,0x8000); // do we need this?
3608 emit_andimm(temp,0xf80,temp);
3609 emit_andne_imm(temp,0,temp);
3610 emit_shrimm(temp,7,tl);
3611 emit_readword(&reg_cop2d[10],temp);
3612 emit_testimm(temp,0x8000);
3613 emit_andimm(temp,0xf80,temp);
3614 emit_andne_imm(temp,0,temp);
3615 emit_orrshr_imm(temp,2,tl);
3616 emit_readword(&reg_cop2d[11],temp);
3617 emit_testimm(temp,0x8000);
3618 emit_andimm(temp,0xf80,temp);
3619 emit_andne_imm(temp,0,temp);
3620 emit_orrshl_imm(temp,3,tl);
3621 emit_writeword(tl,&reg_cop2d[copr]);
3622 break;
3623 default:
3624 emit_readword(&reg_cop2d[copr],tl);
3625 break;
3626 }
3627}
3628
3629static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3630{
3631 switch (copr) {
3632 case 15:
3633 emit_readword(&reg_cop2d[13],temp); // SXY1
3634 emit_writeword(sl,&reg_cop2d[copr]);
3635 emit_writeword(temp,&reg_cop2d[12]); // SXY0
3636 emit_readword(&reg_cop2d[14],temp); // SXY2
3637 emit_writeword(sl,&reg_cop2d[14]);
3638 emit_writeword(temp,&reg_cop2d[13]); // SXY1
3639 break;
3640 case 28:
3641 emit_andimm(sl,0x001f,temp);
3642 emit_shlimm(temp,7,temp);
3643 emit_writeword(temp,&reg_cop2d[9]);
3644 emit_andimm(sl,0x03e0,temp);
3645 emit_shlimm(temp,2,temp);
3646 emit_writeword(temp,&reg_cop2d[10]);
3647 emit_andimm(sl,0x7c00,temp);
3648 emit_shrimm(temp,3,temp);
3649 emit_writeword(temp,&reg_cop2d[11]);
3650 emit_writeword(sl,&reg_cop2d[28]);
3651 break;
3652 case 30:
3653 emit_movs(sl,temp);
3654 emit_mvnmi(temp,temp);
3655#ifdef HAVE_ARMV5
3656 emit_clz(temp,temp);
3657#else
3658 emit_movs(temp,HOST_TEMPREG);
3659 emit_movimm(0,temp);
3660 emit_jeq((int)out+4*4);
3661 emit_addpl_imm(temp,1,temp);
3662 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3663 emit_jns((int)out-2*4);
3664#endif
3665 emit_writeword(sl,&reg_cop2d[30]);
3666 emit_writeword(temp,&reg_cop2d[31]);
3667 break;
3668 case 31:
3669 break;
3670 default:
3671 emit_writeword(sl,&reg_cop2d[copr]);
3672 break;
3673 }
3674}
3675
3676static void cop2_assemble(int i,struct regstat *i_regs)
3677{
3678 u_int copr=(source[i]>>11)&0x1f;
3679 signed char temp=get_reg(i_regs->regmap,-1);
3680 if (opcode2[i]==0) { // MFC2
3681 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3682 if(tl>=0&&rt1[i]!=0)
3683 cop2_get_dreg(copr,tl,temp);
3684 }
3685 else if (opcode2[i]==4) { // MTC2
3686 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3687 cop2_put_dreg(copr,sl,temp);
3688 }
3689 else if (opcode2[i]==2) // CFC2
3690 {
3691 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3692 if(tl>=0&&rt1[i]!=0)
3693 emit_readword(&reg_cop2c[copr],tl);
3694 }
3695 else if (opcode2[i]==6) // CTC2
3696 {
3697 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3698 switch(copr) {
3699 case 4:
3700 case 12:
3701 case 20:
3702 case 26:
3703 case 27:
3704 case 29:
3705 case 30:
3706 emit_signextend16(sl,temp);
3707 break;
3708 case 31:
3709 //value = value & 0x7ffff000;
3710 //if (value & 0x7f87e000) value |= 0x80000000;
3711 emit_shrimm(sl,12,temp);
3712 emit_shlimm(temp,12,temp);
3713 emit_testimm(temp,0x7f000000);
3714 emit_testeqimm(temp,0x00870000);
3715 emit_testeqimm(temp,0x0000e000);
3716 emit_orrne_imm(temp,0x80000000,temp);
3717 break;
3718 default:
3719 temp=sl;
3720 break;
3721 }
3722 emit_writeword(temp,&reg_cop2c[copr]);
3723 assert(sl>=0);
3724 }
3725}
3726
3727static void c2op_prologue(u_int op,u_int reglist)
3728{
3729 save_regs_all(reglist);
3730#ifdef PCNT
3731 emit_movimm(op,0);
3732 emit_call((int)pcnt_gte_start);
3733#endif
3734 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3735}
3736
3737static void c2op_epilogue(u_int op,u_int reglist)
3738{
3739#ifdef PCNT
3740 emit_movimm(op,0);
3741 emit_call((int)pcnt_gte_end);
3742#endif
3743 restore_regs_all(reglist);
3744}
3745
3746static void c2op_call_MACtoIR(int lm,int need_flags)
3747{
3748 if(need_flags)
3749 emit_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
3750 else
3751 emit_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
3752}
3753
3754static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3755{
3756 emit_call(func);
3757 // func is C code and trashes r0
3758 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3759 if(need_flags||need_ir)
3760 c2op_call_MACtoIR(lm,need_flags);
3761 emit_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
3762}
3763
3764static void c2op_assemble(int i,struct regstat *i_regs)
3765{
3766 u_int c2op=source[i]&0x3f;
3767 u_int hr,reglist_full=0,reglist;
3768 int need_flags,need_ir;
3769 for(hr=0;hr<HOST_REGS;hr++) {
3770 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
3771 }
3772 reglist=reglist_full&CALLER_SAVE_REGS;
3773
3774 if (gte_handlers[c2op]!=NULL) {
3775 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
3776 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
3777 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3778 source[i],gte_unneeded[i+1],need_flags,need_ir);
3779 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3780 need_flags=0;
3781 int shift = (source[i] >> 19) & 1;
3782 int lm = (source[i] >> 10) & 1;
3783 switch(c2op) {
3784#ifndef DRC_DBG
3785 case GTE_MVMVA: {
3786#ifdef HAVE_ARMV5
3787 int v = (source[i] >> 15) & 3;
3788 int cv = (source[i] >> 13) & 3;
3789 int mx = (source[i] >> 17) & 3;
3790 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
3791 c2op_prologue(c2op,reglist);
3792 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3793 if(v<3)
3794 emit_ldrd(v*8,0,4);
3795 else {
3796 emit_movzwl_indexed(9*4,0,4); // gteIR
3797 emit_movzwl_indexed(10*4,0,6);
3798 emit_movzwl_indexed(11*4,0,5);
3799 emit_orrshl_imm(6,16,4);
3800 }
3801 if(mx<3)
3802 emit_addimm(0,32*4+mx*8*4,6);
3803 else
3804 emit_readword(&zeromem_ptr,6);
3805 if(cv<3)
3806 emit_addimm(0,32*4+(cv*8+5)*4,7);
3807 else
3808 emit_readword(&zeromem_ptr,7);
3809#ifdef __ARM_NEON__
3810 emit_movimm(source[i],1); // opcode
3811 emit_call(gteMVMVA_part_neon);
3812 if(need_flags) {
3813 emit_movimm(lm,1);
3814 emit_call(gteMACtoIR_flags_neon);
3815 }
3816#else
3817 if(cv==3&&shift)
3818 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3819 else {
3820 emit_movimm(shift,1);
3821 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3822 }
3823 if(need_flags||need_ir)
3824 c2op_call_MACtoIR(lm,need_flags);
3825#endif
3826#else /* if not HAVE_ARMV5 */
3827 c2op_prologue(c2op,reglist);
3828 emit_movimm(source[i],1); // opcode
3829 emit_writeword(1,&psxRegs.code);
3830 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3831#endif
3832 break;
3833 }
3834 case GTE_OP:
3835 c2op_prologue(c2op,reglist);
3836 emit_call(shift?gteOP_part_shift:gteOP_part_noshift);
3837 if(need_flags||need_ir) {
3838 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3839 c2op_call_MACtoIR(lm,need_flags);
3840 }
3841 break;
3842 case GTE_DPCS:
3843 c2op_prologue(c2op,reglist);
3844 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3845 break;
3846 case GTE_INTPL:
3847 c2op_prologue(c2op,reglist);
3848 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3849 break;
3850 case GTE_SQR:
3851 c2op_prologue(c2op,reglist);
3852 emit_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
3853 if(need_flags||need_ir) {
3854 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3855 c2op_call_MACtoIR(lm,need_flags);
3856 }
3857 break;
3858 case GTE_DCPL:
3859 c2op_prologue(c2op,reglist);
3860 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3861 break;
3862 case GTE_GPF:
3863 c2op_prologue(c2op,reglist);
3864 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3865 break;
3866 case GTE_GPL:
3867 c2op_prologue(c2op,reglist);
3868 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3869 break;
3870#endif
3871 default:
3872 c2op_prologue(c2op,reglist);
3873#ifdef DRC_DBG
3874 emit_movimm(source[i],1); // opcode
3875 emit_writeword(1,&psxRegs.code);
3876#endif
3877 emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
3878 break;
3879 }
3880 c2op_epilogue(c2op,reglist);
3881 }
3882}
3883
3884static void cop1_unusable(int i,struct regstat *i_regs)
3885{
3886 // XXX: should just just do the exception instead
3887 if(!cop1_usable) {
3888 void *jaddr=out;
3889 emit_jmp(0);
3890 add_stub_r(FP_STUB,jaddr,out,i,0,i_regs,is_delayslot,0);
3891 cop1_usable=1;
3892 }
3893}
3894
3895static void cop1_assemble(int i,struct regstat *i_regs)
3896{
3897 cop1_unusable(i, i_regs);
3898}
3899
3900static void fconv_assemble_arm(int i,struct regstat *i_regs)
3901{
3902 cop1_unusable(i, i_regs);
3903}
3904#define fconv_assemble fconv_assemble_arm
3905
3906static void fcomp_assemble(int i,struct regstat *i_regs)
3907{
3908 cop1_unusable(i, i_regs);
3909}
3910
3911static void float_assemble(int i,struct regstat *i_regs)
3912{
3913 cop1_unusable(i, i_regs);
3914}
3915
3916static void multdiv_assemble_arm(int i,struct regstat *i_regs)
3917{
3918 // case 0x18: MULT
3919 // case 0x19: MULTU
3920 // case 0x1A: DIV
3921 // case 0x1B: DIVU
3922 // case 0x1C: DMULT
3923 // case 0x1D: DMULTU
3924 // case 0x1E: DDIV
3925 // case 0x1F: DDIVU
3926 if(rs1[i]&&rs2[i])
3927 {
3928 if((opcode2[i]&4)==0) // 32-bit
3929 {
3930 if(opcode2[i]==0x18) // MULT
3931 {
3932 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3933 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3934 signed char hi=get_reg(i_regs->regmap,HIREG);
3935 signed char lo=get_reg(i_regs->regmap,LOREG);
3936 assert(m1>=0);
3937 assert(m2>=0);
3938 assert(hi>=0);
3939 assert(lo>=0);
3940 emit_smull(m1,m2,hi,lo);
3941 }
3942 if(opcode2[i]==0x19) // MULTU
3943 {
3944 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3945 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3946 signed char hi=get_reg(i_regs->regmap,HIREG);
3947 signed char lo=get_reg(i_regs->regmap,LOREG);
3948 assert(m1>=0);
3949 assert(m2>=0);
3950 assert(hi>=0);
3951 assert(lo>=0);
3952 emit_umull(m1,m2,hi,lo);
3953 }
3954 if(opcode2[i]==0x1A) // DIV
3955 {
3956 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3957 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3958 assert(d1>=0);
3959 assert(d2>=0);
3960 signed char quotient=get_reg(i_regs->regmap,LOREG);
3961 signed char remainder=get_reg(i_regs->regmap,HIREG);
3962 assert(quotient>=0);
3963 assert(remainder>=0);
3964 emit_movs(d1,remainder);
3965 emit_movimm(0xffffffff,quotient);
3966 emit_negmi(quotient,quotient); // .. quotient and ..
3967 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
3968 emit_movs(d2,HOST_TEMPREG);
3969 emit_jeq((int)out+52); // Division by zero
3970 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
3971#ifdef HAVE_ARMV5
3972 emit_clz(HOST_TEMPREG,quotient);
3973 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
3974#else
3975 emit_movimm(0,quotient);
3976 emit_addpl_imm(quotient,1,quotient);
3977 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3978 emit_jns((int)out-2*4);
3979#endif
3980 emit_orimm(quotient,1<<31,quotient);
3981 emit_shr(quotient,quotient,quotient);
3982 emit_cmp(remainder,HOST_TEMPREG);
3983 emit_subcs(remainder,HOST_TEMPREG,remainder);
3984 emit_adcs(quotient,quotient,quotient);
3985 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
3986 emit_jcc(out-16); // -4
3987 emit_teq(d1,d2);
3988 emit_negmi(quotient,quotient);
3989 emit_test(d1,d1);
3990 emit_negmi(remainder,remainder);
3991 }
3992 if(opcode2[i]==0x1B) // DIVU
3993 {
3994 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3995 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3996 assert(d1>=0);
3997 assert(d2>=0);
3998 signed char quotient=get_reg(i_regs->regmap,LOREG);
3999 signed char remainder=get_reg(i_regs->regmap,HIREG);
4000 assert(quotient>=0);
4001 assert(remainder>=0);
4002 emit_mov(d1,remainder);
4003 emit_movimm(0xffffffff,quotient); // div0 case
4004 emit_test(d2,d2);
4005 emit_jeq((int)out+40); // Division by zero
4006#ifdef HAVE_ARMV5
4007 emit_clz(d2,HOST_TEMPREG);
4008 emit_movimm(1<<31,quotient);
4009 emit_shl(d2,HOST_TEMPREG,d2);
4010#else
4011 emit_movimm(0,HOST_TEMPREG);
4012 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
4013 emit_lslpls_imm(d2,1,d2);
4014 emit_jns((int)out-2*4);
4015 emit_movimm(1<<31,quotient);
4016#endif
4017 emit_shr(quotient,HOST_TEMPREG,quotient);
4018 emit_cmp(remainder,d2);
4019 emit_subcs(remainder,d2,remainder);
4020 emit_adcs(quotient,quotient,quotient);
4021 emit_shrcc_imm(d2,1,d2);
4022 emit_jcc(out-16); // -4
4023 }
4024 }
4025 else // 64-bit
4026 assert(0);
4027 }
4028 else
4029 {
4030 // Multiply by zero is zero.
4031 // MIPS does not have a divide by zero exception.
4032 // The result is undefined, we return zero.
4033 signed char hr=get_reg(i_regs->regmap,HIREG);
4034 signed char lr=get_reg(i_regs->regmap,LOREG);
4035 if(hr>=0) emit_zeroreg(hr);
4036 if(lr>=0) emit_zeroreg(lr);
4037 }
4038}
4039#define multdiv_assemble multdiv_assemble_arm
4040
4041static void do_preload_rhash(int r) {
4042 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4043 // register. On ARM the hash can be done with a single instruction (below)
4044}
4045
4046static void do_preload_rhtbl(int ht) {
4047 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4048}
4049
4050static void do_rhash(int rs,int rh) {
4051 emit_andimm(rs,0xf8,rh);
4052}
4053
4054static void do_miniht_load(int ht,int rh) {
4055 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4056 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4057}
4058
4059static void do_miniht_jump(int rs,int rh,int ht) {
4060 emit_cmp(rh,rs);
4061 emit_ldreq_indexed(ht,4,15);
4062 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4063 emit_mov(rs,7);
4064 emit_jmp(jump_vaddr_reg[7]);
4065 #else
4066 emit_jmp(jump_vaddr_reg[rs]);
4067 #endif
4068}
4069
4070static void do_miniht_insert(u_int return_address,int rt,int temp) {
4071 #ifndef HAVE_ARMV7
4072 emit_movimm(return_address,rt); // PC into link register
4073 add_to_linker(out,return_address,1);
4074 emit_pcreladdr(temp);
4075 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
4076 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
4077 #else
4078 emit_movw(return_address&0x0000FFFF,rt);
4079 add_to_linker(out,return_address,1);
4080 emit_pcreladdr(temp);
4081 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
4082 emit_movt(return_address&0xFFFF0000,rt);
4083 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
4084 #endif
4085}
4086
4087static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4088{
4089 //if(dirty_pre==dirty) return;
4090 int hr,reg;
4091 for(hr=0;hr<HOST_REGS;hr++) {
4092 if(hr!=EXCLUDE_REG) {
4093 reg=pre[hr];
4094 if(((~u)>>(reg&63))&1) {
4095 if(reg>0) {
4096 if(((dirty_pre&~dirty)>>hr)&1) {
4097 if(reg>0&&reg<34) {
4098 emit_storereg(reg,hr);
4099 if( ((is32_pre&~uu)>>reg)&1 ) {
4100 emit_sarimm(hr,31,HOST_TEMPREG);
4101 emit_storereg(reg|64,HOST_TEMPREG);
4102 }
4103 }
4104 else if(reg>=64) {
4105 emit_storereg(reg,hr);
4106 }
4107 }
4108 }
4109 }
4110 }
4111 }
4112}
4113
4114
4115/* using strd could possibly help but you'd have to allocate registers in pairs
4116static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4117{
4118 int hr;
4119 int wrote=-1;
4120 for(hr=HOST_REGS-1;hr>=0;hr--) {
4121 if(hr!=EXCLUDE_REG) {
4122 if(pre[hr]!=entry[hr]) {
4123 if(pre[hr]>=0) {
4124 if((dirty>>hr)&1) {
4125 if(get_reg(entry,pre[hr])<0) {
4126 if(pre[hr]<64) {
4127 if(!((u>>pre[hr])&1)) {
4128 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4129 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4130 emit_sarimm(hr,31,hr+1);
4131 emit_strdreg(pre[hr],hr);
4132 }
4133 else
4134 emit_storereg(pre[hr],hr);
4135 }else{
4136 emit_storereg(pre[hr],hr);
4137 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4138 emit_sarimm(hr,31,hr);
4139 emit_storereg(pre[hr]|64,hr);
4140 }
4141 }
4142 }
4143 }else{
4144 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4145 emit_storereg(pre[hr],hr);
4146 }
4147 }
4148 wrote=hr;
4149 }
4150 }
4151 }
4152 }
4153 }
4154 }
4155 for(hr=0;hr<HOST_REGS;hr++) {
4156 if(hr!=EXCLUDE_REG) {
4157 if(pre[hr]!=entry[hr]) {
4158 if(pre[hr]>=0) {
4159 int nr;
4160 if((nr=get_reg(entry,pre[hr]))>=0) {
4161 emit_mov(hr,nr);
4162 }
4163 }
4164 }
4165 }
4166 }
4167}
4168#define wb_invalidate wb_invalidate_arm
4169*/
4170
4171static void mark_clear_cache(void *target)
4172{
4173 u_long offset = (u_char *)target - translation_cache;
4174 u_int mask = 1u << ((offset >> 12) & 31);
4175 if (!(needs_clear_cache[offset >> 17] & mask)) {
4176 char *start = (char *)((u_long)target & ~4095ul);
4177 start_tcache_write(start, start + 4096);
4178 needs_clear_cache[offset >> 17] |= mask;
4179 }
4180}
4181
4182// Clearing the cache is rather slow on ARM Linux, so mark the areas
4183// that need to be cleared, and then only clear these areas once.
4184static void do_clear_cache()
4185{
4186 int i,j;
4187 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4188 {
4189 u_int bitmap=needs_clear_cache[i];
4190 if(bitmap) {
4191 u_char *start, *end;
4192 for(j=0;j<32;j++)
4193 {
4194 if(bitmap&(1<<j)) {
4195 start=translation_cache+i*131072+j*4096;
4196 end=start+4095;
4197 j++;
4198 while(j<32) {
4199 if(bitmap&(1<<j)) {
4200 end+=4096;
4201 j++;
4202 }else{
4203 end_tcache_write(start, end);
4204 break;
4205 }
4206 }
4207 }
4208 }
4209 needs_clear_cache[i]=0;
4210 }
4211 }
4212}
4213
4214// CPU-architecture-specific initialization
4215static void arch_init() {
4216}
4217
4218// vim:shiftwidth=2:expandtab