drc: remove yet more n64 stuff
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
29#include "arm_features.h"
30
31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33u_char *translation_cache;
34#else
35u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
44#define unused __attribute__((unused))
45
46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
52extern int cycle_count;
53extern int last_count;
54extern int pcaddr;
55extern int pending_exception;
56extern int branch_target;
57extern uint64_t readmem_dword;
58extern void *dynarec_local;
59extern u_int mini_ht[32][2];
60
61void indirect_jump_indexed();
62void indirect_jump();
63void do_interrupt();
64void jump_vaddr_r0();
65void jump_vaddr_r1();
66void jump_vaddr_r2();
67void jump_vaddr_r3();
68void jump_vaddr_r4();
69void jump_vaddr_r5();
70void jump_vaddr_r6();
71void jump_vaddr_r7();
72void jump_vaddr_r8();
73void jump_vaddr_r9();
74void jump_vaddr_r10();
75void jump_vaddr_r12();
76
77void * const jump_vaddr_reg[16] = {
78 jump_vaddr_r0,
79 jump_vaddr_r1,
80 jump_vaddr_r2,
81 jump_vaddr_r3,
82 jump_vaddr_r4,
83 jump_vaddr_r5,
84 jump_vaddr_r6,
85 jump_vaddr_r7,
86 jump_vaddr_r8,
87 jump_vaddr_r9,
88 jump_vaddr_r10,
89 0,
90 jump_vaddr_r12,
91 0,
92 0,
93 0
94};
95
96void invalidate_addr_r0();
97void invalidate_addr_r1();
98void invalidate_addr_r2();
99void invalidate_addr_r3();
100void invalidate_addr_r4();
101void invalidate_addr_r5();
102void invalidate_addr_r6();
103void invalidate_addr_r7();
104void invalidate_addr_r8();
105void invalidate_addr_r9();
106void invalidate_addr_r10();
107void invalidate_addr_r12();
108
109const u_int invalidate_addr_reg[16] = {
110 (int)invalidate_addr_r0,
111 (int)invalidate_addr_r1,
112 (int)invalidate_addr_r2,
113 (int)invalidate_addr_r3,
114 (int)invalidate_addr_r4,
115 (int)invalidate_addr_r5,
116 (int)invalidate_addr_r6,
117 (int)invalidate_addr_r7,
118 (int)invalidate_addr_r8,
119 (int)invalidate_addr_r9,
120 (int)invalidate_addr_r10,
121 0,
122 (int)invalidate_addr_r12,
123 0,
124 0,
125 0};
126
127static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
128
129/* Linker */
130
131static void set_jump_target(void *addr, void *target_)
132{
133 u_int target = (u_int)target_;
134 u_char *ptr = addr;
135 u_int *ptr2=(u_int *)ptr;
136 if(ptr[3]==0xe2) {
137 assert((target-(u_int)ptr2-8)<1024);
138 assert(((uintptr_t)addr&3)==0);
139 assert((target&3)==0);
140 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
141 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
142 }
143 else if(ptr[3]==0x72) {
144 // generated by emit_jno_unlikely
145 if((target-(u_int)ptr2-8)<1024) {
146 assert(((uintptr_t)addr&3)==0);
147 assert((target&3)==0);
148 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
149 }
150 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
151 assert(((uintptr_t)addr&3)==0);
152 assert((target&3)==0);
153 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
154 }
155 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
156 }
157 else {
158 assert((ptr[3]&0x0e)==0xa);
159 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
160 }
161}
162
163// This optionally copies the instruction from the target of the branch into
164// the space before the branch. Works, but the difference in speed is
165// usually insignificant.
166#if 0
167static void set_jump_target_fillslot(int addr,u_int target,int copy)
168{
169 u_char *ptr=(u_char *)addr;
170 u_int *ptr2=(u_int *)ptr;
171 assert(!copy||ptr2[-1]==0xe28dd000);
172 if(ptr[3]==0xe2) {
173 assert(!copy);
174 assert((target-(u_int)ptr2-8)<4096);
175 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
176 }
177 else {
178 assert((ptr[3]&0x0e)==0xa);
179 u_int target_insn=*(u_int *)target;
180 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
181 copy=0;
182 }
183 if((target_insn&0x0c100000)==0x04100000) { // Load
184 copy=0;
185 }
186 if(target_insn&0x08000000) {
187 copy=0;
188 }
189 if(copy) {
190 ptr2[-1]=target_insn;
191 target+=4;
192 }
193 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
194 }
195}
196#endif
197
198/* Literal pool */
199static void add_literal(int addr,int val)
200{
201 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
202 literals[literalcount][0]=addr;
203 literals[literalcount][1]=val;
204 literalcount++;
205}
206
207// from a pointer to external jump stub (which was produced by emit_extjump2)
208// find where the jumping insn is
209static void *find_extjump_insn(void *stub)
210{
211 int *ptr=(int *)(stub+4);
212 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
213 u_int offset=*ptr&0xfff;
214 void **l_ptr=(void *)ptr+offset+8;
215 return *l_ptr;
216}
217
218// find where external branch is liked to using addr of it's stub:
219// get address that insn one after stub loads (dyna_linker arg1),
220// treat it as a pointer to branch insn,
221// return addr where that branch jumps to
222static void *get_pointer(void *stub)
223{
224 //printf("get_pointer(%x)\n",(int)stub);
225 int *i_ptr=find_extjump_insn(stub);
226 assert((*i_ptr&0x0f000000)==0x0a000000);
227 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
228}
229
230// Find the "clean" entry point from a "dirty" entry point
231// by skipping past the call to verify_code
232static void *get_clean_addr(void *addr)
233{
234 signed int *ptr = addr;
235 #ifndef HAVE_ARMV7
236 ptr+=4;
237 #else
238 ptr+=6;
239 #endif
240 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
241 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
242 ptr++;
243 if((*ptr&0xFF000000)==0xea000000) {
244 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
245 }
246 return ptr;
247}
248
249static int verify_dirty(u_int *ptr)
250{
251 #ifndef HAVE_ARMV7
252 u_int offset;
253 // get from literal pool
254 assert((*ptr&0xFFFF0000)==0xe59f0000);
255 offset=*ptr&0xfff;
256 u_int source=*(u_int*)((void *)ptr+offset+8);
257 ptr++;
258 assert((*ptr&0xFFFF0000)==0xe59f0000);
259 offset=*ptr&0xfff;
260 u_int copy=*(u_int*)((void *)ptr+offset+8);
261 ptr++;
262 assert((*ptr&0xFFFF0000)==0xe59f0000);
263 offset=*ptr&0xfff;
264 u_int len=*(u_int*)((void *)ptr+offset+8);
265 ptr++;
266 ptr++;
267 #else
268 // ARMv7 movw/movt
269 assert((*ptr&0xFFF00000)==0xe3000000);
270 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
271 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
272 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
273 ptr+=6;
274 #endif
275 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
276 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
277 //printf("verify_dirty: %x %x %x\n",source,copy,len);
278 return !memcmp((void *)source,(void *)copy,len);
279}
280
281// This doesn't necessarily find all clean entry points, just
282// guarantees that it's not dirty
283static int isclean(void *addr)
284{
285 #ifndef HAVE_ARMV7
286 u_int *ptr=((u_int *)addr)+4;
287 #else
288 u_int *ptr=((u_int *)addr)+6;
289 #endif
290 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
291 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
292 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
293 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
294 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
295 return 1;
296}
297
298// get source that block at addr was compiled from (host pointers)
299static void get_bounds(void *addr, u_char **start, u_char **end)
300{
301 u_int *ptr = addr;
302 #ifndef HAVE_ARMV7
303 u_int offset;
304 // get from literal pool
305 assert((*ptr&0xFFFF0000)==0xe59f0000);
306 offset=*ptr&0xfff;
307 u_int source=*(u_int*)((void *)ptr+offset+8);
308 ptr++;
309 //assert((*ptr&0xFFFF0000)==0xe59f0000);
310 //offset=*ptr&0xfff;
311 //u_int copy=*(u_int*)((void *)ptr+offset+8);
312 ptr++;
313 assert((*ptr&0xFFFF0000)==0xe59f0000);
314 offset=*ptr&0xfff;
315 u_int len=*(u_int*)((void *)ptr+offset+8);
316 ptr++;
317 ptr++;
318 #else
319 // ARMv7 movw/movt
320 assert((*ptr&0xFFF00000)==0xe3000000);
321 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
322 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
323 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
324 ptr+=6;
325 #endif
326 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
327 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
328 *start=(u_char *)source;
329 *end=(u_char *)source+len;
330}
331
332/* Register allocation */
333
334// Note: registers are allocated clean (unmodified state)
335// if you intend to modify the register, you must call dirty_reg().
336static void alloc_reg(struct regstat *cur,int i,signed char reg)
337{
338 int r,hr;
339 int preferred_reg = (reg&7);
340 if(reg==CCREG) preferred_reg=HOST_CCREG;
341 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
342
343 // Don't allocate unused registers
344 if((cur->u>>reg)&1) return;
345
346 // see if it's already allocated
347 for(hr=0;hr<HOST_REGS;hr++)
348 {
349 if(cur->regmap[hr]==reg) return;
350 }
351
352 // Keep the same mapping if the register was already allocated in a loop
353 preferred_reg = loop_reg(i,reg,preferred_reg);
354
355 // Try to allocate the preferred register
356 if(cur->regmap[preferred_reg]==-1) {
357 cur->regmap[preferred_reg]=reg;
358 cur->dirty&=~(1<<preferred_reg);
359 cur->isconst&=~(1<<preferred_reg);
360 return;
361 }
362 r=cur->regmap[preferred_reg];
363 assert(r < 64);
364 if((cur->u>>r)&1) {
365 cur->regmap[preferred_reg]=reg;
366 cur->dirty&=~(1<<preferred_reg);
367 cur->isconst&=~(1<<preferred_reg);
368 return;
369 }
370
371 // Clear any unneeded registers
372 // We try to keep the mapping consistent, if possible, because it
373 // makes branches easier (especially loops). So we try to allocate
374 // first (see above) before removing old mappings. If this is not
375 // possible then go ahead and clear out the registers that are no
376 // longer needed.
377 for(hr=0;hr<HOST_REGS;hr++)
378 {
379 r=cur->regmap[hr];
380 if(r>=0) {
381 assert(r < 64);
382 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
383 }
384 }
385 // Try to allocate any available register, but prefer
386 // registers that have not been used recently.
387 if(i>0) {
388 for(hr=0;hr<HOST_REGS;hr++) {
389 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
390 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
391 cur->regmap[hr]=reg;
392 cur->dirty&=~(1<<hr);
393 cur->isconst&=~(1<<hr);
394 return;
395 }
396 }
397 }
398 }
399 // Try to allocate any available register
400 for(hr=0;hr<HOST_REGS;hr++) {
401 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
402 cur->regmap[hr]=reg;
403 cur->dirty&=~(1<<hr);
404 cur->isconst&=~(1<<hr);
405 return;
406 }
407 }
408
409 // Ok, now we have to evict someone
410 // Pick a register we hopefully won't need soon
411 u_char hsn[MAXREG+1];
412 memset(hsn,10,sizeof(hsn));
413 int j;
414 lsn(hsn,i,&preferred_reg);
415 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
416 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
417 if(i>0) {
418 // Don't evict the cycle count at entry points, otherwise the entry
419 // stub will have to write it.
420 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
421 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
422 for(j=10;j>=3;j--)
423 {
424 // Alloc preferred register if available
425 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
426 for(hr=0;hr<HOST_REGS;hr++) {
427 // Evict both parts of a 64-bit register
428 if((cur->regmap[hr]&63)==r) {
429 cur->regmap[hr]=-1;
430 cur->dirty&=~(1<<hr);
431 cur->isconst&=~(1<<hr);
432 }
433 }
434 cur->regmap[preferred_reg]=reg;
435 return;
436 }
437 for(r=1;r<=MAXREG;r++)
438 {
439 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
440 for(hr=0;hr<HOST_REGS;hr++) {
441 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
442 if(cur->regmap[hr]==r+64) {
443 cur->regmap[hr]=reg;
444 cur->dirty&=~(1<<hr);
445 cur->isconst&=~(1<<hr);
446 return;
447 }
448 }
449 }
450 for(hr=0;hr<HOST_REGS;hr++) {
451 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
452 if(cur->regmap[hr]==r) {
453 cur->regmap[hr]=reg;
454 cur->dirty&=~(1<<hr);
455 cur->isconst&=~(1<<hr);
456 return;
457 }
458 }
459 }
460 }
461 }
462 }
463 }
464 for(j=10;j>=0;j--)
465 {
466 for(r=1;r<=MAXREG;r++)
467 {
468 if(hsn[r]==j) {
469 for(hr=0;hr<HOST_REGS;hr++) {
470 if(cur->regmap[hr]==r+64) {
471 cur->regmap[hr]=reg;
472 cur->dirty&=~(1<<hr);
473 cur->isconst&=~(1<<hr);
474 return;
475 }
476 }
477 for(hr=0;hr<HOST_REGS;hr++) {
478 if(cur->regmap[hr]==r) {
479 cur->regmap[hr]=reg;
480 cur->dirty&=~(1<<hr);
481 cur->isconst&=~(1<<hr);
482 return;
483 }
484 }
485 }
486 }
487 }
488 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
489}
490
491static void alloc_reg64(struct regstat *cur,int i,signed char reg)
492{
493 // allocate the lower 32 bits
494 alloc_reg(cur,i,reg);
495}
496
497// Allocate a temporary register. This is done without regard to
498// dirty status or whether the register we request is on the unneeded list
499// Note: This will only allocate one register, even if called multiple times
500static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
501{
502 int r,hr;
503 int preferred_reg = -1;
504
505 // see if it's already allocated
506 for(hr=0;hr<HOST_REGS;hr++)
507 {
508 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
509 }
510
511 // Try to allocate any available register
512 for(hr=HOST_REGS-1;hr>=0;hr--) {
513 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
514 cur->regmap[hr]=reg;
515 cur->dirty&=~(1<<hr);
516 cur->isconst&=~(1<<hr);
517 return;
518 }
519 }
520
521 // Find an unneeded register
522 for(hr=HOST_REGS-1;hr>=0;hr--)
523 {
524 r=cur->regmap[hr];
525 if(r>=0) {
526 assert(r < 64);
527 if((cur->u>>r)&1) {
528 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
529 cur->regmap[hr]=reg;
530 cur->dirty&=~(1<<hr);
531 cur->isconst&=~(1<<hr);
532 return;
533 }
534 }
535 }
536 }
537
538 // Ok, now we have to evict someone
539 // Pick a register we hopefully won't need soon
540 // TODO: we might want to follow unconditional jumps here
541 // TODO: get rid of dupe code and make this into a function
542 u_char hsn[MAXREG+1];
543 memset(hsn,10,sizeof(hsn));
544 int j;
545 lsn(hsn,i,&preferred_reg);
546 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
547 if(i>0) {
548 // Don't evict the cycle count at entry points, otherwise the entry
549 // stub will have to write it.
550 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
551 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
552 for(j=10;j>=3;j--)
553 {
554 for(r=1;r<=MAXREG;r++)
555 {
556 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
557 for(hr=0;hr<HOST_REGS;hr++) {
558 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
559 if(cur->regmap[hr]==r+64) {
560 cur->regmap[hr]=reg;
561 cur->dirty&=~(1<<hr);
562 cur->isconst&=~(1<<hr);
563 return;
564 }
565 }
566 }
567 for(hr=0;hr<HOST_REGS;hr++) {
568 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
569 if(cur->regmap[hr]==r) {
570 cur->regmap[hr]=reg;
571 cur->dirty&=~(1<<hr);
572 cur->isconst&=~(1<<hr);
573 return;
574 }
575 }
576 }
577 }
578 }
579 }
580 }
581 for(j=10;j>=0;j--)
582 {
583 for(r=1;r<=MAXREG;r++)
584 {
585 if(hsn[r]==j) {
586 for(hr=0;hr<HOST_REGS;hr++) {
587 if(cur->regmap[hr]==r+64) {
588 cur->regmap[hr]=reg;
589 cur->dirty&=~(1<<hr);
590 cur->isconst&=~(1<<hr);
591 return;
592 }
593 }
594 for(hr=0;hr<HOST_REGS;hr++) {
595 if(cur->regmap[hr]==r) {
596 cur->regmap[hr]=reg;
597 cur->dirty&=~(1<<hr);
598 cur->isconst&=~(1<<hr);
599 return;
600 }
601 }
602 }
603 }
604 }
605 SysPrintf("This shouldn't happen");exit(1);
606}
607
608// Allocate a specific ARM register.
609static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
610{
611 int n;
612 int dirty=0;
613
614 // see if it's already allocated (and dealloc it)
615 for(n=0;n<HOST_REGS;n++)
616 {
617 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
618 dirty=(cur->dirty>>n)&1;
619 cur->regmap[n]=-1;
620 }
621 }
622
623 cur->regmap[hr]=reg;
624 cur->dirty&=~(1<<hr);
625 cur->dirty|=dirty<<hr;
626 cur->isconst&=~(1<<hr);
627}
628
629// Alloc cycle count into dedicated register
630static void alloc_cc(struct regstat *cur,int i)
631{
632 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
633}
634
635/* Special alloc */
636
637
638/* Assembler */
639
640static unused char regname[16][4] = {
641 "r0",
642 "r1",
643 "r2",
644 "r3",
645 "r4",
646 "r5",
647 "r6",
648 "r7",
649 "r8",
650 "r9",
651 "r10",
652 "fp",
653 "r12",
654 "sp",
655 "lr",
656 "pc"};
657
658static void output_w32(u_int word)
659{
660 *((u_int *)out)=word;
661 out+=4;
662}
663
664static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
665{
666 assert(rd<16);
667 assert(rn<16);
668 assert(rm<16);
669 return((rn<<16)|(rd<<12)|rm);
670}
671
672static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
673{
674 assert(rd<16);
675 assert(rn<16);
676 assert(imm<256);
677 assert((shift&1)==0);
678 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
679}
680
681static u_int genimm(u_int imm,u_int *encoded)
682{
683 *encoded=0;
684 if(imm==0) return 1;
685 int i=32;
686 while(i>0)
687 {
688 if(imm<256) {
689 *encoded=((i&30)<<7)|imm;
690 return 1;
691 }
692 imm=(imm>>2)|(imm<<30);i-=2;
693 }
694 return 0;
695}
696
697static void genimm_checked(u_int imm,u_int *encoded)
698{
699 u_int ret=genimm(imm,encoded);
700 assert(ret);
701 (void)ret;
702}
703
704static u_int genjmp(u_int addr)
705{
706 int offset=addr-(int)out-8;
707 if(offset<-33554432||offset>=33554432) {
708 if (addr>2) {
709 SysPrintf("genjmp: out of range: %08x\n", offset);
710 exit(1);
711 }
712 return 0;
713 }
714 return ((u_int)offset>>2)&0xffffff;
715}
716
717static void emit_mov(int rs,int rt)
718{
719 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
720 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
721}
722
723static void emit_movs(int rs,int rt)
724{
725 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
726 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
727}
728
729static void emit_add(int rs1,int rs2,int rt)
730{
731 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
732 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
733}
734
735static void emit_adds(int rs1,int rs2,int rt)
736{
737 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
738 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
739}
740
741static void emit_adcs(int rs1,int rs2,int rt)
742{
743 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
744 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
745}
746
747static void emit_sbcs(int rs1,int rs2,int rt)
748{
749 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
750 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
751}
752
753static void emit_neg(int rs, int rt)
754{
755 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
756 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
757}
758
759static void emit_sub(int rs1,int rs2,int rt)
760{
761 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
762 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
763}
764
765static void emit_zeroreg(int rt)
766{
767 assem_debug("mov %s,#0\n",regname[rt]);
768 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
769}
770
771static void emit_loadlp(u_int imm,u_int rt)
772{
773 add_literal((int)out,imm);
774 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
775 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
776}
777
778static void emit_movw(u_int imm,u_int rt)
779{
780 assert(imm<65536);
781 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
782 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
783}
784
785static void emit_movt(u_int imm,u_int rt)
786{
787 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
788 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
789}
790
791static void emit_movimm(u_int imm,u_int rt)
792{
793 u_int armval;
794 if(genimm(imm,&armval)) {
795 assem_debug("mov %s,#%d\n",regname[rt],imm);
796 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
797 }else if(genimm(~imm,&armval)) {
798 assem_debug("mvn %s,#%d\n",regname[rt],imm);
799 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
800 }else if(imm<65536) {
801 #ifndef HAVE_ARMV7
802 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
803 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
804 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
805 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
806 #else
807 emit_movw(imm,rt);
808 #endif
809 }else{
810 #ifndef HAVE_ARMV7
811 emit_loadlp(imm,rt);
812 #else
813 emit_movw(imm&0x0000FFFF,rt);
814 emit_movt(imm&0xFFFF0000,rt);
815 #endif
816 }
817}
818
819static void emit_pcreladdr(u_int rt)
820{
821 assem_debug("add %s,pc,#?\n",regname[rt]);
822 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
823}
824
825static void emit_loadreg(int r, int hr)
826{
827 if(r&64) {
828 SysPrintf("64bit load in 32bit mode!\n");
829 assert(0);
830 return;
831 }
832 if((r&63)==0)
833 emit_zeroreg(hr);
834 else {
835 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
836 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
837 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
838 if(r==CCREG) addr=(int)&cycle_count;
839 if(r==CSREG) addr=(int)&Status;
840 if(r==INVCP) addr=(int)&invc_ptr;
841 u_int offset = addr-(u_int)&dynarec_local;
842 assert(offset<4096);
843 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
844 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
845 }
846}
847
848static void emit_storereg(int r, int hr)
849{
850 if(r&64) {
851 SysPrintf("64bit store in 32bit mode!\n");
852 assert(0);
853 return;
854 }
855 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
856 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
857 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
858 if(r==CCREG) addr=(int)&cycle_count;
859 u_int offset = addr-(u_int)&dynarec_local;
860 assert(offset<4096);
861 assem_debug("str %s,fp+%d\n",regname[hr],offset);
862 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
863}
864
865static void emit_test(int rs, int rt)
866{
867 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
868 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
869}
870
871static void emit_testimm(int rs,int imm)
872{
873 u_int armval;
874 assem_debug("tst %s,#%d\n",regname[rs],imm);
875 genimm_checked(imm,&armval);
876 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
877}
878
879static void emit_testeqimm(int rs,int imm)
880{
881 u_int armval;
882 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
883 genimm_checked(imm,&armval);
884 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
885}
886
887static void emit_not(int rs,int rt)
888{
889 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
890 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
891}
892
893static void emit_mvnmi(int rs,int rt)
894{
895 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
896 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
897}
898
899static void emit_and(u_int rs1,u_int rs2,u_int rt)
900{
901 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
902 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
903}
904
905static void emit_or(u_int rs1,u_int rs2,u_int rt)
906{
907 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
908 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
909}
910
911static void emit_or_and_set_flags(int rs1,int rs2,int rt)
912{
913 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
914 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
915}
916
917static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
918{
919 assert(rs<16);
920 assert(rt<16);
921 assert(imm<32);
922 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
923 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
924}
925
926static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
927{
928 assert(rs<16);
929 assert(rt<16);
930 assert(imm<32);
931 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
932 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
933}
934
935static void emit_xor(u_int rs1,u_int rs2,u_int rt)
936{
937 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
938 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
939}
940
941static void emit_addimm(u_int rs,int imm,u_int rt)
942{
943 assert(rs<16);
944 assert(rt<16);
945 if(imm!=0) {
946 u_int armval;
947 if(genimm(imm,&armval)) {
948 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
949 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
950 }else if(genimm(-imm,&armval)) {
951 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
952 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
953 #ifdef HAVE_ARMV7
954 }else if(rt!=rs&&(u_int)imm<65536) {
955 emit_movw(imm&0x0000ffff,rt);
956 emit_add(rs,rt,rt);
957 }else if(rt!=rs&&(u_int)-imm<65536) {
958 emit_movw(-imm&0x0000ffff,rt);
959 emit_sub(rs,rt,rt);
960 #endif
961 }else if((u_int)-imm<65536) {
962 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
963 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
964 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
965 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
966 }else {
967 do {
968 int shift = (ffs(imm) - 1) & ~1;
969 int imm8 = imm & (0xff << shift);
970 genimm_checked(imm8,&armval);
971 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
972 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
973 rs = rt;
974 imm &= ~imm8;
975 }
976 while (imm != 0);
977 }
978 }
979 else if(rs!=rt) emit_mov(rs,rt);
980}
981
982static void emit_addimm_and_set_flags(int imm,int rt)
983{
984 assert(imm>-65536&&imm<65536);
985 u_int armval;
986 if(genimm(imm,&armval)) {
987 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
988 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
989 }else if(genimm(-imm,&armval)) {
990 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
991 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
992 }else if(imm<0) {
993 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
994 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
995 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
996 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
997 }else{
998 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
999 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1000 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1001 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1002 }
1003}
1004
1005static void emit_addimm_no_flags(u_int imm,u_int rt)
1006{
1007 emit_addimm(rt,imm,rt);
1008}
1009
1010static void emit_addnop(u_int r)
1011{
1012 assert(r<16);
1013 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1014 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1015}
1016
1017static void emit_adcimm(u_int rs,int imm,u_int rt)
1018{
1019 u_int armval;
1020 genimm_checked(imm,&armval);
1021 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1022 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1023}
1024
1025static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1026{
1027 // TODO: if(genimm(imm,&armval)) ...
1028 // else
1029 emit_movimm(imm,HOST_TEMPREG);
1030 emit_adds(HOST_TEMPREG,rsl,rtl);
1031 emit_adcimm(rsh,0,rth);
1032}
1033
1034static void emit_andimm(int rs,int imm,int rt)
1035{
1036 u_int armval;
1037 if(imm==0) {
1038 emit_zeroreg(rt);
1039 }else if(genimm(imm,&armval)) {
1040 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1041 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1042 }else if(genimm(~imm,&armval)) {
1043 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1044 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1045 }else if(imm==65535) {
1046 #ifndef HAVE_ARMV6
1047 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1048 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1049 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1050 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1051 #else
1052 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1053 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1054 #endif
1055 }else{
1056 assert(imm>0&&imm<65535);
1057 #ifndef HAVE_ARMV7
1058 assem_debug("mov r14,#%d\n",imm&0xFF00);
1059 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1060 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1061 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1062 #else
1063 emit_movw(imm,HOST_TEMPREG);
1064 #endif
1065 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1066 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1067 }
1068}
1069
1070static void emit_orimm(int rs,int imm,int rt)
1071{
1072 u_int armval;
1073 if(imm==0) {
1074 if(rs!=rt) emit_mov(rs,rt);
1075 }else if(genimm(imm,&armval)) {
1076 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1077 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1078 }else{
1079 assert(imm>0&&imm<65536);
1080 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1081 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1082 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1083 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1084 }
1085}
1086
1087static void emit_xorimm(int rs,int imm,int rt)
1088{
1089 u_int armval;
1090 if(imm==0) {
1091 if(rs!=rt) emit_mov(rs,rt);
1092 }else if(genimm(imm,&armval)) {
1093 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1094 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1095 }else{
1096 assert(imm>0&&imm<65536);
1097 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1098 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1099 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1100 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1101 }
1102}
1103
1104static void emit_shlimm(int rs,u_int imm,int rt)
1105{
1106 assert(imm>0);
1107 assert(imm<32);
1108 //if(imm==1) ...
1109 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1110 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1111}
1112
1113static void emit_lsls_imm(int rs,int imm,int rt)
1114{
1115 assert(imm>0);
1116 assert(imm<32);
1117 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1118 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1119}
1120
1121static unused void emit_lslpls_imm(int rs,int imm,int rt)
1122{
1123 assert(imm>0);
1124 assert(imm<32);
1125 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1126 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1127}
1128
1129static void emit_shrimm(int rs,u_int imm,int rt)
1130{
1131 assert(imm>0);
1132 assert(imm<32);
1133 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1134 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1135}
1136
1137static void emit_sarimm(int rs,u_int imm,int rt)
1138{
1139 assert(imm>0);
1140 assert(imm<32);
1141 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1142 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1143}
1144
1145static void emit_rorimm(int rs,u_int imm,int rt)
1146{
1147 assert(imm>0);
1148 assert(imm<32);
1149 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1150 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1151}
1152
1153static void emit_signextend16(int rs,int rt)
1154{
1155 #ifndef HAVE_ARMV6
1156 emit_shlimm(rs,16,rt);
1157 emit_sarimm(rt,16,rt);
1158 #else
1159 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1160 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1161 #endif
1162}
1163
1164static void emit_signextend8(int rs,int rt)
1165{
1166 #ifndef HAVE_ARMV6
1167 emit_shlimm(rs,24,rt);
1168 emit_sarimm(rt,24,rt);
1169 #else
1170 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1171 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1172 #endif
1173}
1174
1175static void emit_shl(u_int rs,u_int shift,u_int rt)
1176{
1177 assert(rs<16);
1178 assert(rt<16);
1179 assert(shift<16);
1180 //if(imm==1) ...
1181 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1182 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1183}
1184
1185static void emit_shr(u_int rs,u_int shift,u_int rt)
1186{
1187 assert(rs<16);
1188 assert(rt<16);
1189 assert(shift<16);
1190 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1191 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1192}
1193
1194static void emit_sar(u_int rs,u_int shift,u_int rt)
1195{
1196 assert(rs<16);
1197 assert(rt<16);
1198 assert(shift<16);
1199 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1200 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1201}
1202
1203static void emit_orrshl(u_int rs,u_int shift,u_int rt)
1204{
1205 assert(rs<16);
1206 assert(rt<16);
1207 assert(shift<16);
1208 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1209 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1210}
1211
1212static void emit_orrshr(u_int rs,u_int shift,u_int rt)
1213{
1214 assert(rs<16);
1215 assert(rt<16);
1216 assert(shift<16);
1217 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1218 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1219}
1220
1221static void emit_cmpimm(int rs,int imm)
1222{
1223 u_int armval;
1224 if(genimm(imm,&armval)) {
1225 assem_debug("cmp %s,#%d\n",regname[rs],imm);
1226 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1227 }else if(genimm(-imm,&armval)) {
1228 assem_debug("cmn %s,#%d\n",regname[rs],imm);
1229 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1230 }else if(imm>0) {
1231 assert(imm<65536);
1232 emit_movimm(imm,HOST_TEMPREG);
1233 assem_debug("cmp %s,r14\n",regname[rs]);
1234 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1235 }else{
1236 assert(imm>-65536);
1237 emit_movimm(-imm,HOST_TEMPREG);
1238 assem_debug("cmn %s,r14\n",regname[rs]);
1239 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1240 }
1241}
1242
1243static void emit_cmovne_imm(int imm,int rt)
1244{
1245 assem_debug("movne %s,#%d\n",regname[rt],imm);
1246 u_int armval;
1247 genimm_checked(imm,&armval);
1248 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1249}
1250
1251static void emit_cmovl_imm(int imm,int rt)
1252{
1253 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1254 u_int armval;
1255 genimm_checked(imm,&armval);
1256 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1257}
1258
1259static void emit_cmovb_imm(int imm,int rt)
1260{
1261 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1262 u_int armval;
1263 genimm_checked(imm,&armval);
1264 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1265}
1266
1267static void emit_cmovs_imm(int imm,int rt)
1268{
1269 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1270 u_int armval;
1271 genimm_checked(imm,&armval);
1272 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1273}
1274
1275static void emit_cmovne_reg(int rs,int rt)
1276{
1277 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1278 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1279}
1280
1281static void emit_cmovl_reg(int rs,int rt)
1282{
1283 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1284 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1285}
1286
1287static void emit_cmovs_reg(int rs,int rt)
1288{
1289 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1290 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1291}
1292
1293static void emit_slti32(int rs,int imm,int rt)
1294{
1295 if(rs!=rt) emit_zeroreg(rt);
1296 emit_cmpimm(rs,imm);
1297 if(rs==rt) emit_movimm(0,rt);
1298 emit_cmovl_imm(1,rt);
1299}
1300
1301static void emit_sltiu32(int rs,int imm,int rt)
1302{
1303 if(rs!=rt) emit_zeroreg(rt);
1304 emit_cmpimm(rs,imm);
1305 if(rs==rt) emit_movimm(0,rt);
1306 emit_cmovb_imm(1,rt);
1307}
1308
1309static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1310{
1311 assert(rsh!=rt);
1312 emit_slti32(rsl,imm,rt);
1313 if(imm>=0)
1314 {
1315 emit_test(rsh,rsh);
1316 emit_cmovne_imm(0,rt);
1317 emit_cmovs_imm(1,rt);
1318 }
1319 else
1320 {
1321 emit_cmpimm(rsh,-1);
1322 emit_cmovne_imm(0,rt);
1323 emit_cmovl_imm(1,rt);
1324 }
1325}
1326
1327static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1328{
1329 assert(rsh!=rt);
1330 emit_sltiu32(rsl,imm,rt);
1331 if(imm>=0)
1332 {
1333 emit_test(rsh,rsh);
1334 emit_cmovne_imm(0,rt);
1335 }
1336 else
1337 {
1338 emit_cmpimm(rsh,-1);
1339 emit_cmovne_imm(1,rt);
1340 }
1341}
1342
1343static void emit_cmp(int rs,int rt)
1344{
1345 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1346 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1347}
1348
1349static void emit_set_gz32(int rs, int rt)
1350{
1351 //assem_debug("set_gz32\n");
1352 emit_cmpimm(rs,1);
1353 emit_movimm(1,rt);
1354 emit_cmovl_imm(0,rt);
1355}
1356
1357static void emit_set_nz32(int rs, int rt)
1358{
1359 //assem_debug("set_nz32\n");
1360 if(rs!=rt) emit_movs(rs,rt);
1361 else emit_test(rs,rs);
1362 emit_cmovne_imm(1,rt);
1363}
1364
1365static void emit_set_gz64_32(int rsh, int rsl, int rt)
1366{
1367 //assem_debug("set_gz64\n");
1368 emit_set_gz32(rsl,rt);
1369 emit_test(rsh,rsh);
1370 emit_cmovne_imm(1,rt);
1371 emit_cmovs_imm(0,rt);
1372}
1373
1374static void emit_set_nz64_32(int rsh, int rsl, int rt)
1375{
1376 //assem_debug("set_nz64\n");
1377 emit_or_and_set_flags(rsh,rsl,rt);
1378 emit_cmovne_imm(1,rt);
1379}
1380
1381static void emit_set_if_less32(int rs1, int rs2, int rt)
1382{
1383 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1384 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1385 emit_cmp(rs1,rs2);
1386 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1387 emit_cmovl_imm(1,rt);
1388}
1389
1390static void emit_set_if_carry32(int rs1, int rs2, int rt)
1391{
1392 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1393 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1394 emit_cmp(rs1,rs2);
1395 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1396 emit_cmovb_imm(1,rt);
1397}
1398
1399static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1400{
1401 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1402 assert(u1!=rt);
1403 assert(u2!=rt);
1404 emit_cmp(l1,l2);
1405 emit_movimm(0,rt);
1406 emit_sbcs(u1,u2,HOST_TEMPREG);
1407 emit_cmovl_imm(1,rt);
1408}
1409
1410static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1411{
1412 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1413 assert(u1!=rt);
1414 assert(u2!=rt);
1415 emit_cmp(l1,l2);
1416 emit_movimm(0,rt);
1417 emit_sbcs(u1,u2,HOST_TEMPREG);
1418 emit_cmovb_imm(1,rt);
1419}
1420
1421#ifdef DRC_DBG
1422extern void gen_interupt();
1423extern void do_insn_cmp();
1424#define FUNCNAME(f) { (intptr_t)f, " " #f }
1425static const struct {
1426 intptr_t addr;
1427 const char *name;
1428} function_names[] = {
1429 FUNCNAME(cc_interrupt),
1430 FUNCNAME(gen_interupt),
1431 FUNCNAME(get_addr_ht),
1432 FUNCNAME(get_addr),
1433 FUNCNAME(jump_handler_read8),
1434 FUNCNAME(jump_handler_read16),
1435 FUNCNAME(jump_handler_read32),
1436 FUNCNAME(jump_handler_write8),
1437 FUNCNAME(jump_handler_write16),
1438 FUNCNAME(jump_handler_write32),
1439 FUNCNAME(invalidate_addr),
1440 FUNCNAME(verify_code_vm),
1441 FUNCNAME(verify_code),
1442 FUNCNAME(jump_hlecall),
1443 FUNCNAME(jump_syscall_hle),
1444 FUNCNAME(new_dyna_leave),
1445 FUNCNAME(pcsx_mtc0),
1446 FUNCNAME(pcsx_mtc0_ds),
1447 FUNCNAME(do_insn_cmp),
1448};
1449
1450static const char *func_name(intptr_t a)
1451{
1452 int i;
1453 for (i = 0; i < sizeof(function_names)/sizeof(function_names[0]); i++)
1454 if (function_names[i].addr == a)
1455 return function_names[i].name;
1456 return "";
1457}
1458#else
1459#define func_name(x) ""
1460#endif
1461
1462static void emit_call(const void *a_)
1463{
1464 int a = (int)a_;
1465 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1466 u_int offset=genjmp(a);
1467 output_w32(0xeb000000|offset);
1468}
1469
1470static void emit_jmp(const void *a_)
1471{
1472 int a = (int)a_;
1473 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1474 u_int offset=genjmp(a);
1475 output_w32(0xea000000|offset);
1476}
1477
1478static void emit_jne(const void *a_)
1479{
1480 int a = (int)a_;
1481 assem_debug("bne %x\n",a);
1482 u_int offset=genjmp(a);
1483 output_w32(0x1a000000|offset);
1484}
1485
1486static void emit_jeq(int a)
1487{
1488 assem_debug("beq %x\n",a);
1489 u_int offset=genjmp(a);
1490 output_w32(0x0a000000|offset);
1491}
1492
1493static void emit_js(int a)
1494{
1495 assem_debug("bmi %x\n",a);
1496 u_int offset=genjmp(a);
1497 output_w32(0x4a000000|offset);
1498}
1499
1500static void emit_jns(int a)
1501{
1502 assem_debug("bpl %x\n",a);
1503 u_int offset=genjmp(a);
1504 output_w32(0x5a000000|offset);
1505}
1506
1507static void emit_jl(int a)
1508{
1509 assem_debug("blt %x\n",a);
1510 u_int offset=genjmp(a);
1511 output_w32(0xba000000|offset);
1512}
1513
1514static void emit_jge(int a)
1515{
1516 assem_debug("bge %x\n",a);
1517 u_int offset=genjmp(a);
1518 output_w32(0xaa000000|offset);
1519}
1520
1521static void emit_jno(int a)
1522{
1523 assem_debug("bvc %x\n",a);
1524 u_int offset=genjmp(a);
1525 output_w32(0x7a000000|offset);
1526}
1527
1528static void emit_jc(int a)
1529{
1530 assem_debug("bcs %x\n",a);
1531 u_int offset=genjmp(a);
1532 output_w32(0x2a000000|offset);
1533}
1534
1535static void emit_jcc(void *a_)
1536{
1537 int a = (int)a_;
1538 assem_debug("bcc %x\n",a);
1539 u_int offset=genjmp(a);
1540 output_w32(0x3a000000|offset);
1541}
1542
1543static void emit_callreg(u_int r)
1544{
1545 assert(r<15);
1546 assem_debug("blx %s\n",regname[r]);
1547 output_w32(0xe12fff30|r);
1548}
1549
1550static void emit_jmpreg(u_int r)
1551{
1552 assem_debug("mov pc,%s\n",regname[r]);
1553 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1554}
1555
1556static void emit_readword_indexed(int offset, int rs, int rt)
1557{
1558 assert(offset>-4096&&offset<4096);
1559 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1560 if(offset>=0) {
1561 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1562 }else{
1563 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1564 }
1565}
1566
1567static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1568{
1569 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1570 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1571}
1572
1573static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1574{
1575 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1576 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1577}
1578
1579static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1580{
1581 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1582 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1583}
1584
1585static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1586{
1587 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1588 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1589}
1590
1591static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1592{
1593 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1594 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1595}
1596
1597static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1598{
1599 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1600 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1601}
1602
1603static void emit_movsbl_indexed(int offset, int rs, int rt)
1604{
1605 assert(offset>-256&&offset<256);
1606 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1607 if(offset>=0) {
1608 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1609 }else{
1610 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1611 }
1612}
1613
1614static void emit_movswl_indexed(int offset, int rs, int rt)
1615{
1616 assert(offset>-256&&offset<256);
1617 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1618 if(offset>=0) {
1619 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1620 }else{
1621 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1622 }
1623}
1624
1625static void emit_movzbl_indexed(int offset, int rs, int rt)
1626{
1627 assert(offset>-4096&&offset<4096);
1628 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1629 if(offset>=0) {
1630 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1631 }else{
1632 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1633 }
1634}
1635
1636static void emit_movzwl_indexed(int offset, int rs, int rt)
1637{
1638 assert(offset>-256&&offset<256);
1639 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1640 if(offset>=0) {
1641 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1642 }else{
1643 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1644 }
1645}
1646
1647static void emit_ldrd(int offset, int rs, int rt)
1648{
1649 assert(offset>-256&&offset<256);
1650 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1651 if(offset>=0) {
1652 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1653 }else{
1654 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1655 }
1656}
1657
1658static void emit_readword(void *addr, int rt)
1659{
1660 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1661 assert(offset<4096);
1662 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1663 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1664}
1665
1666static void emit_writeword_indexed(int rt, int offset, int rs)
1667{
1668 assert(offset>-4096&&offset<4096);
1669 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1670 if(offset>=0) {
1671 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1672 }else{
1673 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1674 }
1675}
1676
1677static void emit_writehword_indexed(int rt, int offset, int rs)
1678{
1679 assert(offset>-256&&offset<256);
1680 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1681 if(offset>=0) {
1682 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1683 }else{
1684 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1685 }
1686}
1687
1688static void emit_writebyte_indexed(int rt, int offset, int rs)
1689{
1690 assert(offset>-4096&&offset<4096);
1691 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1692 if(offset>=0) {
1693 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1694 }else{
1695 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1696 }
1697}
1698
1699static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1700{
1701 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1702 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1703}
1704
1705static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1706{
1707 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1708 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1709}
1710
1711static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1712{
1713 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1714 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1715}
1716
1717static void emit_writeword(int rt, void *addr)
1718{
1719 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1720 assert(offset<4096);
1721 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1722 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1723}
1724
1725static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1726{
1727 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1728 assert(rs1<16);
1729 assert(rs2<16);
1730 assert(hi<16);
1731 assert(lo<16);
1732 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1733}
1734
1735static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1736{
1737 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1738 assert(rs1<16);
1739 assert(rs2<16);
1740 assert(hi<16);
1741 assert(lo<16);
1742 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1743}
1744
1745static void emit_clz(int rs,int rt)
1746{
1747 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1748 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1749}
1750
1751static void emit_subcs(int rs1,int rs2,int rt)
1752{
1753 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1754 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1755}
1756
1757static void emit_shrcc_imm(int rs,u_int imm,int rt)
1758{
1759 assert(imm>0);
1760 assert(imm<32);
1761 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1762 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1763}
1764
1765static void emit_shrne_imm(int rs,u_int imm,int rt)
1766{
1767 assert(imm>0);
1768 assert(imm<32);
1769 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1770 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1771}
1772
1773static void emit_negmi(int rs, int rt)
1774{
1775 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1776 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1777}
1778
1779static void emit_negsmi(int rs, int rt)
1780{
1781 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1782 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1783}
1784
1785static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1786{
1787 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1788 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1789}
1790
1791static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1792{
1793 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1794 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1795}
1796
1797static void emit_teq(int rs, int rt)
1798{
1799 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1800 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1801}
1802
1803static void emit_rsbimm(int rs, int imm, int rt)
1804{
1805 u_int armval;
1806 genimm_checked(imm,&armval);
1807 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1808 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1809}
1810
1811// Load 2 immediates optimizing for small code size
1812static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
1813{
1814 emit_movimm(imm1,rt1);
1815 u_int armval;
1816 if(genimm(imm2-imm1,&armval)) {
1817 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
1818 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
1819 }else if(genimm(imm1-imm2,&armval)) {
1820 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
1821 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
1822 }
1823 else emit_movimm(imm2,rt2);
1824}
1825
1826// Conditionally select one of two immediates, optimizing for small code size
1827// This will only be called if HAVE_CMOV_IMM is defined
1828static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
1829{
1830 u_int armval;
1831 if(genimm(imm2-imm1,&armval)) {
1832 emit_movimm(imm1,rt);
1833 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1834 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1835 }else if(genimm(imm1-imm2,&armval)) {
1836 emit_movimm(imm1,rt);
1837 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1838 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1839 }
1840 else {
1841 #ifndef HAVE_ARMV7
1842 emit_movimm(imm1,rt);
1843 add_literal((int)out,imm2);
1844 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1845 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1846 #else
1847 emit_movw(imm1&0x0000FFFF,rt);
1848 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1849 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1850 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1851 }
1852 emit_movt(imm1&0xFFFF0000,rt);
1853 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1854 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1855 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1856 }
1857 #endif
1858 }
1859}
1860
1861// special case for checking invalid_code
1862static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
1863{
1864 assert(imm<128&&imm>=0);
1865 assert(r>=0&&r<16);
1866 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1867 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1868 emit_cmpimm(HOST_TEMPREG,imm);
1869}
1870
1871static void emit_callne(int a)
1872{
1873 assem_debug("blne %x\n",a);
1874 u_int offset=genjmp(a);
1875 output_w32(0x1b000000|offset);
1876}
1877
1878// Used to preload hash table entries
1879static unused void emit_prefetchreg(int r)
1880{
1881 assem_debug("pld %s\n",regname[r]);
1882 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1883}
1884
1885// Special case for mini_ht
1886static void emit_ldreq_indexed(int rs, u_int offset, int rt)
1887{
1888 assert(offset<4096);
1889 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1890 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1891}
1892
1893static void emit_orrne_imm(int rs,int imm,int rt)
1894{
1895 u_int armval;
1896 genimm_checked(imm,&armval);
1897 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1898 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1899}
1900
1901static void emit_andne_imm(int rs,int imm,int rt)
1902{
1903 u_int armval;
1904 genimm_checked(imm,&armval);
1905 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1906 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1907}
1908
1909static unused void emit_addpl_imm(int rs,int imm,int rt)
1910{
1911 u_int armval;
1912 genimm_checked(imm,&armval);
1913 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1914 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1915}
1916
1917static void emit_jno_unlikely(int a)
1918{
1919 //emit_jno(a);
1920 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1921 output_w32(0x72800000|rd_rn_rm(15,15,0));
1922}
1923
1924static void save_regs_all(u_int reglist)
1925{
1926 int i;
1927 if(!reglist) return;
1928 assem_debug("stmia fp,{");
1929 for(i=0;i<16;i++)
1930 if(reglist&(1<<i))
1931 assem_debug("r%d,",i);
1932 assem_debug("}\n");
1933 output_w32(0xe88b0000|reglist);
1934}
1935
1936static void restore_regs_all(u_int reglist)
1937{
1938 int i;
1939 if(!reglist) return;
1940 assem_debug("ldmia fp,{");
1941 for(i=0;i<16;i++)
1942 if(reglist&(1<<i))
1943 assem_debug("r%d,",i);
1944 assem_debug("}\n");
1945 output_w32(0xe89b0000|reglist);
1946}
1947
1948// Save registers before function call
1949static void save_regs(u_int reglist)
1950{
1951 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
1952 save_regs_all(reglist);
1953}
1954
1955// Restore registers after function call
1956static void restore_regs(u_int reglist)
1957{
1958 reglist&=CALLER_SAVE_REGS;
1959 restore_regs_all(reglist);
1960}
1961
1962/* Stubs/epilogue */
1963
1964static void literal_pool(int n)
1965{
1966 if(!literalcount) return;
1967 if(n) {
1968 if((int)out-literals[0][0]<4096-n) return;
1969 }
1970 u_int *ptr;
1971 int i;
1972 for(i=0;i<literalcount;i++)
1973 {
1974 u_int l_addr=(u_int)out;
1975 int j;
1976 for(j=0;j<i;j++) {
1977 if(literals[j][1]==literals[i][1]) {
1978 //printf("dup %08x\n",literals[i][1]);
1979 l_addr=literals[j][0];
1980 break;
1981 }
1982 }
1983 ptr=(u_int *)literals[i][0];
1984 u_int offset=l_addr-(u_int)ptr-8;
1985 assert(offset<4096);
1986 assert(!(offset&3));
1987 *ptr|=offset;
1988 if(l_addr==(u_int)out) {
1989 literals[i][0]=l_addr; // remember for dupes
1990 output_w32(literals[i][1]);
1991 }
1992 }
1993 literalcount=0;
1994}
1995
1996static void literal_pool_jumpover(int n)
1997{
1998 if(!literalcount) return;
1999 if(n) {
2000 if((int)out-literals[0][0]<4096-n) return;
2001 }
2002 void *jaddr = out;
2003 emit_jmp(0);
2004 literal_pool(0);
2005 set_jump_target(jaddr, out);
2006}
2007
2008static void emit_extjump2(u_char *addr, int target, void *linker)
2009{
2010 u_char *ptr=(u_char *)addr;
2011 assert((ptr[3]&0x0e)==0xa);
2012 (void)ptr;
2013
2014 emit_loadlp(target,0);
2015 emit_loadlp((u_int)addr,1);
2016 assert(addr>=translation_cache&&addr<(translation_cache+(1<<TARGET_SIZE_2)));
2017 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2018//DEBUG >
2019#ifdef DEBUG_CYCLE_COUNT
2020 emit_readword(&last_count,ECX);
2021 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2022 emit_readword(&next_interupt,ECX);
2023 emit_writeword(HOST_CCREG,&Count);
2024 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2025 emit_writeword(ECX,&last_count);
2026#endif
2027//DEBUG <
2028 emit_jmp(linker);
2029}
2030
2031static void emit_extjump(void *addr, int target)
2032{
2033 emit_extjump2(addr, target, dyna_linker);
2034}
2035
2036static void emit_extjump_ds(void *addr, int target)
2037{
2038 emit_extjump2(addr, target, dyna_linker_ds);
2039}
2040
2041// put rt_val into rt, potentially making use of rs with value rs_val
2042static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2043{
2044 u_int armval;
2045 int diff;
2046 if(genimm(rt_val,&armval)) {
2047 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2048 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2049 return;
2050 }
2051 if(genimm(~rt_val,&armval)) {
2052 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2053 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2054 return;
2055 }
2056 diff=rt_val-rs_val;
2057 if(genimm(diff,&armval)) {
2058 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2059 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2060 return;
2061 }else if(genimm(-diff,&armval)) {
2062 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2063 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2064 return;
2065 }
2066 emit_movimm(rt_val,rt);
2067}
2068
2069// return 1 if above function can do it's job cheaply
2070static int is_similar_value(u_int v1,u_int v2)
2071{
2072 u_int xs;
2073 int diff;
2074 if(v1==v2) return 1;
2075 diff=v2-v1;
2076 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
2077 ;
2078 if(xs<0x100) return 1;
2079 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2080 ;
2081 if(xs<0x100) return 1;
2082 return 0;
2083}
2084
2085// trashes r2
2086static void pass_args(int a0, int a1)
2087{
2088 if(a0==1&&a1==0) {
2089 // must swap
2090 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2091 }
2092 else if(a0!=0&&a1==0) {
2093 emit_mov(a1,1);
2094 if (a0>=0) emit_mov(a0,0);
2095 }
2096 else {
2097 if(a0>=0&&a0!=0) emit_mov(a0,0);
2098 if(a1>=0&&a1!=1) emit_mov(a1,1);
2099 }
2100}
2101
2102static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
2103{
2104 switch(type) {
2105 case LOADB_STUB: emit_signextend8(rs,rt); break;
2106 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2107 case LOADH_STUB: emit_signextend16(rs,rt); break;
2108 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2109 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2110 default: assert(0);
2111 }
2112}
2113
2114#include "pcsxmem.h"
2115#include "pcsxmem_inline.c"
2116
2117static void do_readstub(int n)
2118{
2119 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
2120 literal_pool(256);
2121 set_jump_target(stubs[n].addr, out);
2122 enum stub_type type=stubs[n].type;
2123 int i=stubs[n].a;
2124 int rs=stubs[n].b;
2125 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2126 u_int reglist=stubs[n].e;
2127 signed char *i_regmap=i_regs->regmap;
2128 int rt;
2129 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
2130 rt=get_reg(i_regmap,FTEMP);
2131 }else{
2132 rt=get_reg(i_regmap,rt1[i]);
2133 }
2134 assert(rs>=0);
2135 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
2136 void *restore_jump = NULL;
2137 reglist|=(1<<rs);
2138 for(r=0;r<=12;r++) {
2139 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2140 temp=r; break;
2141 }
2142 }
2143 if(rt>=0&&rt1[i]!=0)
2144 reglist&=~(1<<rt);
2145 if(temp==-1) {
2146 save_regs(reglist);
2147 regs_saved=1;
2148 temp=(rs==0)?2:0;
2149 }
2150 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2151 temp2=1;
2152 emit_readword(&mem_rtab,temp);
2153 emit_shrimm(rs,12,temp2);
2154 emit_readword_dualindexedx4(temp,temp2,temp2);
2155 emit_lsls_imm(temp2,1,temp2);
2156 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2157 switch(type) {
2158 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2159 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2160 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2161 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2162 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2163 default: assert(0);
2164 }
2165 }
2166 if(regs_saved) {
2167 restore_jump=out;
2168 emit_jcc(0); // jump to reg restore
2169 }
2170 else
2171 emit_jcc(stubs[n].retaddr); // return address
2172
2173 if(!regs_saved)
2174 save_regs(reglist);
2175 void *handler=NULL;
2176 if(type==LOADB_STUB||type==LOADBU_STUB)
2177 handler=jump_handler_read8;
2178 if(type==LOADH_STUB||type==LOADHU_STUB)
2179 handler=jump_handler_read16;
2180 if(type==LOADW_STUB)
2181 handler=jump_handler_read32;
2182 assert(handler);
2183 pass_args(rs,temp2);
2184 int cc=get_reg(i_regmap,CCREG);
2185 if(cc<0)
2186 emit_loadreg(CCREG,2);
2187 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
2188 emit_call(handler);
2189 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2190 mov_loadtype_adj(type,0,rt);
2191 }
2192 if(restore_jump)
2193 set_jump_target(restore_jump, out);
2194 restore_regs(reglist);
2195 emit_jmp(stubs[n].retaddr); // return address
2196}
2197
2198// return memhandler, or get directly accessable address and return 0
2199static void *get_direct_memhandler(void *table,u_int addr,enum stub_type type,u_int *addr_host)
2200{
2201 u_int l1,l2=0;
2202 l1=((u_int *)table)[addr>>12];
2203 if((l1&(1<<31))==0) {
2204 u_int v=l1<<1;
2205 *addr_host=v+addr;
2206 return NULL;
2207 }
2208 else {
2209 l1<<=1;
2210 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2211 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
2212 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
2213 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2214 else
2215 l2=((u_int *)l1)[(addr&0xfff)/4];
2216 if((l2&(1<<31))==0) {
2217 u_int v=l2<<1;
2218 *addr_host=v+(addr&0xfff);
2219 return NULL;
2220 }
2221 return (void *)(l2<<1);
2222 }
2223}
2224
2225static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2226{
2227 int rs=get_reg(regmap,target);
2228 int rt=get_reg(regmap,target);
2229 if(rs<0) rs=get_reg(regmap,-1);
2230 assert(rs>=0);
2231 u_int host_addr=0,is_dynamic,far_call=0;
2232 void *handler;
2233 int cc=get_reg(regmap,CCREG);
2234 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2235 return;
2236 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
2237 if (handler == NULL) {
2238 if(rt<0||rt1[i]==0)
2239 return;
2240 if(addr!=host_addr)
2241 emit_movimm_from(addr,rs,host_addr,rs);
2242 switch(type) {
2243 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2244 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2245 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2246 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2247 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2248 default: assert(0);
2249 }
2250 return;
2251 }
2252 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2253 if(is_dynamic) {
2254 if(type==LOADB_STUB||type==LOADBU_STUB)
2255 handler=jump_handler_read8;
2256 if(type==LOADH_STUB||type==LOADHU_STUB)
2257 handler=jump_handler_read16;
2258 if(type==LOADW_STUB)
2259 handler=jump_handler_read32;
2260 }
2261
2262 // call a memhandler
2263 if(rt>=0&&rt1[i]!=0)
2264 reglist&=~(1<<rt);
2265 save_regs(reglist);
2266 if(target==0)
2267 emit_movimm(addr,0);
2268 else if(rs!=0)
2269 emit_mov(rs,0);
2270 int offset=(u_char *)handler-out-8;
2271 if(offset<-33554432||offset>=33554432) {
2272 // unreachable memhandler, a plugin func perhaps
2273 emit_movimm((u_int)handler,12);
2274 far_call=1;
2275 }
2276 if(cc<0)
2277 emit_loadreg(CCREG,2);
2278 if(is_dynamic) {
2279 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2280 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2281 }
2282 else {
2283 emit_readword(&last_count,3);
2284 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2285 emit_add(2,3,2);
2286 emit_writeword(2,&Count);
2287 }
2288
2289 if(far_call)
2290 emit_callreg(12);
2291 else
2292 emit_call(handler);
2293
2294 if(rt>=0&&rt1[i]!=0) {
2295 switch(type) {
2296 case LOADB_STUB: emit_signextend8(0,rt); break;
2297 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2298 case LOADH_STUB: emit_signextend16(0,rt); break;
2299 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2300 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2301 default: assert(0);
2302 }
2303 }
2304 restore_regs(reglist);
2305}
2306
2307static void do_writestub(int n)
2308{
2309 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
2310 literal_pool(256);
2311 set_jump_target(stubs[n].addr, out);
2312 enum stub_type type=stubs[n].type;
2313 int i=stubs[n].a;
2314 int rs=stubs[n].b;
2315 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2316 u_int reglist=stubs[n].e;
2317 signed char *i_regmap=i_regs->regmap;
2318 int rt,r;
2319 if(itype[i]==C1LS||itype[i]==C2LS) {
2320 rt=get_reg(i_regmap,r=FTEMP);
2321 }else{
2322 rt=get_reg(i_regmap,r=rs2[i]);
2323 }
2324 assert(rs>=0);
2325 assert(rt>=0);
2326 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
2327 void *restore_jump = NULL;
2328 int reglist2=reglist|(1<<rs)|(1<<rt);
2329 for(rtmp=0;rtmp<=12;rtmp++) {
2330 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2331 temp=rtmp; break;
2332 }
2333 }
2334 if(temp==-1) {
2335 save_regs(reglist);
2336 regs_saved=1;
2337 for(rtmp=0;rtmp<=3;rtmp++)
2338 if(rtmp!=rs&&rtmp!=rt)
2339 {temp=rtmp;break;}
2340 }
2341 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2342 temp2=3;
2343 emit_readword(&mem_wtab,temp);
2344 emit_shrimm(rs,12,temp2);
2345 emit_readword_dualindexedx4(temp,temp2,temp2);
2346 emit_lsls_imm(temp2,1,temp2);
2347 switch(type) {
2348 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2349 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2350 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2351 default: assert(0);
2352 }
2353 if(regs_saved) {
2354 restore_jump=out;
2355 emit_jcc(0); // jump to reg restore
2356 }
2357 else
2358 emit_jcc(stubs[n].retaddr); // return address (invcode check)
2359
2360 if(!regs_saved)
2361 save_regs(reglist);
2362 void *handler=NULL;
2363 switch(type) {
2364 case STOREB_STUB: handler=jump_handler_write8; break;
2365 case STOREH_STUB: handler=jump_handler_write16; break;
2366 case STOREW_STUB: handler=jump_handler_write32; break;
2367 default: assert(0);
2368 }
2369 assert(handler);
2370 pass_args(rs,rt);
2371 if(temp2!=3)
2372 emit_mov(temp2,3);
2373 int cc=get_reg(i_regmap,CCREG);
2374 if(cc<0)
2375 emit_loadreg(CCREG,2);
2376 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
2377 // returns new cycle_count
2378 emit_call(handler);
2379 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
2380 if(cc<0)
2381 emit_storereg(CCREG,2);
2382 if(restore_jump)
2383 set_jump_target(restore_jump, out);
2384 restore_regs(reglist);
2385 emit_jmp(stubs[n].retaddr);
2386}
2387
2388static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2389{
2390 int rs=get_reg(regmap,-1);
2391 int rt=get_reg(regmap,target);
2392 assert(rs>=0);
2393 assert(rt>=0);
2394 u_int host_addr=0;
2395 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
2396 if (handler == NULL) {
2397 if(addr!=host_addr)
2398 emit_movimm_from(addr,rs,host_addr,rs);
2399 switch(type) {
2400 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2401 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2402 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2403 default: assert(0);
2404 }
2405 return;
2406 }
2407
2408 // call a memhandler
2409 save_regs(reglist);
2410 pass_args(rs,rt);
2411 int cc=get_reg(regmap,CCREG);
2412 if(cc<0)
2413 emit_loadreg(CCREG,2);
2414 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2415 emit_movimm((u_int)handler,3);
2416 // returns new cycle_count
2417 emit_call(jump_handler_write_h);
2418 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
2419 if(cc<0)
2420 emit_storereg(CCREG,2);
2421 restore_regs(reglist);
2422}
2423
2424static void do_unalignedwritestub(int n)
2425{
2426 assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
2427 literal_pool(256);
2428 set_jump_target(stubs[n].addr, out);
2429
2430 int i=stubs[n].a;
2431 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2432 int addr=stubs[n].b;
2433 u_int reglist=stubs[n].e;
2434 signed char *i_regmap=i_regs->regmap;
2435 int temp2=get_reg(i_regmap,FTEMP);
2436 int rt;
2437 rt=get_reg(i_regmap,rs2[i]);
2438 assert(rt>=0);
2439 assert(addr>=0);
2440 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2441 reglist|=(1<<addr);
2442 reglist&=~(1<<temp2);
2443
2444#if 1
2445 // don't bother with it and call write handler
2446 save_regs(reglist);
2447 pass_args(addr,rt);
2448 int cc=get_reg(i_regmap,CCREG);
2449 if(cc<0)
2450 emit_loadreg(CCREG,2);
2451 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
2452 emit_call((opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2453 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
2454 if(cc<0)
2455 emit_storereg(CCREG,2);
2456 restore_regs(reglist);
2457 emit_jmp(stubs[n].retaddr); // return address
2458#else
2459 emit_andimm(addr,0xfffffffc,temp2);
2460 emit_writeword(temp2,&address);
2461
2462 save_regs(reglist);
2463 emit_shrimm(addr,16,1);
2464 int cc=get_reg(i_regmap,CCREG);
2465 if(cc<0) {
2466 emit_loadreg(CCREG,2);
2467 }
2468 emit_movimm((u_int)readmem,0);
2469 emit_addimm(cc<0?2:cc,2*stubs[n].d+2,2);
2470 emit_call((int)&indirect_jump_indexed);
2471 restore_regs(reglist);
2472
2473 emit_readword(&readmem_dword,temp2);
2474 int temp=addr; //hmh
2475 emit_shlimm(addr,3,temp);
2476 emit_andimm(temp,24,temp);
2477#ifdef BIG_ENDIAN_MIPS
2478 if (opcode[i]==0x2e) // SWR
2479#else
2480 if (opcode[i]==0x2a) // SWL
2481#endif
2482 emit_xorimm(temp,24,temp);
2483 emit_movimm(-1,HOST_TEMPREG);
2484 if (opcode[i]==0x2a) { // SWL
2485 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2486 emit_orrshr(rt,temp,temp2);
2487 }else{
2488 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2489 emit_orrshl(rt,temp,temp2);
2490 }
2491 emit_readword(&address,addr);
2492 emit_writeword(temp2,&word);
2493 //save_regs(reglist); // don't need to, no state changes
2494 emit_shrimm(addr,16,1);
2495 emit_movimm((u_int)writemem,0);
2496 //emit_call((int)&indirect_jump_indexed);
2497 emit_mov(15,14);
2498 emit_readword_dualindexedx4(0,1,15);
2499 emit_readword(&Count,HOST_TEMPREG);
2500 emit_readword(&next_interupt,2);
2501 emit_addimm(HOST_TEMPREG,-2*stubs[n].d-2,HOST_TEMPREG);
2502 emit_writeword(2,&last_count);
2503 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2504 if(cc<0) {
2505 emit_storereg(CCREG,HOST_TEMPREG);
2506 }
2507 restore_regs(reglist);
2508 emit_jmp(stubs[n].retaddr); // return address
2509#endif
2510}
2511
2512static void do_invstub(int n)
2513{
2514 literal_pool(20);
2515 u_int reglist=stubs[n].a;
2516 set_jump_target(stubs[n].addr, out);
2517 save_regs(reglist);
2518 if(stubs[n].b!=0) emit_mov(stubs[n].b,0);
2519 emit_call(&invalidate_addr);
2520 restore_regs(reglist);
2521 emit_jmp(stubs[n].retaddr); // return address
2522}
2523
2524void *do_dirty_stub(int i)
2525{
2526 assem_debug("do_dirty_stub %x\n",start+i*4);
2527 u_int addr=(u_int)source;
2528 // Careful about the code output here, verify_dirty needs to parse it.
2529 #ifndef HAVE_ARMV7
2530 emit_loadlp(addr,1);
2531 emit_loadlp((int)copy,2);
2532 emit_loadlp(slen*4,3);
2533 #else
2534 emit_movw(addr&0x0000FFFF,1);
2535 emit_movw(((u_int)copy)&0x0000FFFF,2);
2536 emit_movt(addr&0xFFFF0000,1);
2537 emit_movt(((u_int)copy)&0xFFFF0000,2);
2538 emit_movw(slen*4,3);
2539 #endif
2540 emit_movimm(start+i*4,0);
2541 emit_call((int)start<(int)0xC0000000?&verify_code:&verify_code_vm);
2542 void *entry = out;
2543 load_regs_entry(i);
2544 if (entry == out)
2545 entry = instr_addr[i];
2546 emit_jmp(instr_addr[i]);
2547 return entry;
2548}
2549
2550static void do_dirty_stub_ds()
2551{
2552 // Careful about the code output here, verify_dirty needs to parse it.
2553 #ifndef HAVE_ARMV7
2554 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2555 emit_loadlp((int)copy,2);
2556 emit_loadlp(slen*4,3);
2557 #else
2558 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2559 emit_movw(((u_int)copy)&0x0000FFFF,2);
2560 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2561 emit_movt(((u_int)copy)&0xFFFF0000,2);
2562 emit_movw(slen*4,3);
2563 #endif
2564 emit_movimm(start+1,0);
2565 emit_call(&verify_code_ds);
2566}
2567
2568// FP_STUB
2569static void do_cop1stub(int n)
2570{
2571 literal_pool(256);
2572 assem_debug("do_cop1stub %x\n",start+stubs[n].a*4);
2573 set_jump_target(stubs[n].addr, out);
2574 int i=stubs[n].a;
2575// int rs=stubs[n].b;
2576 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2577 int ds=stubs[n].d;
2578 if(!ds) {
2579 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2580 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2581 }
2582 //else {printf("fp exception in delay slot\n");}
2583 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2584 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2585 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2586 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
2587 emit_jmp(ds?fp_exception_ds:fp_exception);
2588}
2589
2590/* Special assem */
2591
2592static void shift_assemble_arm(int i,struct regstat *i_regs)
2593{
2594 if(rt1[i]) {
2595 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2596 {
2597 signed char s,t,shift;
2598 t=get_reg(i_regs->regmap,rt1[i]);
2599 s=get_reg(i_regs->regmap,rs1[i]);
2600 shift=get_reg(i_regs->regmap,rs2[i]);
2601 if(t>=0){
2602 if(rs1[i]==0)
2603 {
2604 emit_zeroreg(t);
2605 }
2606 else if(rs2[i]==0)
2607 {
2608 assert(s>=0);
2609 if(s!=t) emit_mov(s,t);
2610 }
2611 else
2612 {
2613 emit_andimm(shift,31,HOST_TEMPREG);
2614 if(opcode2[i]==4) // SLLV
2615 {
2616 emit_shl(s,HOST_TEMPREG,t);
2617 }
2618 if(opcode2[i]==6) // SRLV
2619 {
2620 emit_shr(s,HOST_TEMPREG,t);
2621 }
2622 if(opcode2[i]==7) // SRAV
2623 {
2624 emit_sar(s,HOST_TEMPREG,t);
2625 }
2626 }
2627 }
2628 } else { // DSLLV/DSRLV/DSRAV
2629 signed char sh,sl,th,tl,shift;
2630 th=get_reg(i_regs->regmap,rt1[i]|64);
2631 tl=get_reg(i_regs->regmap,rt1[i]);
2632 sh=get_reg(i_regs->regmap,rs1[i]|64);
2633 sl=get_reg(i_regs->regmap,rs1[i]);
2634 shift=get_reg(i_regs->regmap,rs2[i]);
2635 if(tl>=0){
2636 if(rs1[i]==0)
2637 {
2638 emit_zeroreg(tl);
2639 if(th>=0) emit_zeroreg(th);
2640 }
2641 else if(rs2[i]==0)
2642 {
2643 assert(sl>=0);
2644 if(sl!=tl) emit_mov(sl,tl);
2645 if(th>=0&&sh!=th) emit_mov(sh,th);
2646 }
2647 else
2648 {
2649 // FIXME: What if shift==tl ?
2650 assert(shift!=tl);
2651 int temp=get_reg(i_regs->regmap,-1);
2652 int real_th=th;
2653 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
2654 assert(sl>=0);
2655 assert(sh>=0);
2656 emit_andimm(shift,31,HOST_TEMPREG);
2657 if(opcode2[i]==0x14) // DSLLV
2658 {
2659 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
2660 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2661 emit_orrshr(sl,HOST_TEMPREG,th);
2662 emit_andimm(shift,31,HOST_TEMPREG);
2663 emit_testimm(shift,32);
2664 emit_shl(sl,HOST_TEMPREG,tl);
2665 if(th>=0) emit_cmovne_reg(tl,th);
2666 emit_cmovne_imm(0,tl);
2667 }
2668 if(opcode2[i]==0x16) // DSRLV
2669 {
2670 assert(th>=0);
2671 emit_shr(sl,HOST_TEMPREG,tl);
2672 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2673 emit_orrshl(sh,HOST_TEMPREG,tl);
2674 emit_andimm(shift,31,HOST_TEMPREG);
2675 emit_testimm(shift,32);
2676 emit_shr(sh,HOST_TEMPREG,th);
2677 emit_cmovne_reg(th,tl);
2678 if(real_th>=0) emit_cmovne_imm(0,th);
2679 }
2680 if(opcode2[i]==0x17) // DSRAV
2681 {
2682 assert(th>=0);
2683 emit_shr(sl,HOST_TEMPREG,tl);
2684 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2685 if(real_th>=0) {
2686 assert(temp>=0);
2687 emit_sarimm(th,31,temp);
2688 }
2689 emit_orrshl(sh,HOST_TEMPREG,tl);
2690 emit_andimm(shift,31,HOST_TEMPREG);
2691 emit_testimm(shift,32);
2692 emit_sar(sh,HOST_TEMPREG,th);
2693 emit_cmovne_reg(th,tl);
2694 if(real_th>=0) emit_cmovne_reg(temp,th);
2695 }
2696 }
2697 }
2698 }
2699 }
2700}
2701
2702static void speculate_mov(int rs,int rt)
2703{
2704 if(rt!=0) {
2705 smrv_strong_next|=1<<rt;
2706 smrv[rt]=smrv[rs];
2707 }
2708}
2709
2710static void speculate_mov_weak(int rs,int rt)
2711{
2712 if(rt!=0) {
2713 smrv_weak_next|=1<<rt;
2714 smrv[rt]=smrv[rs];
2715 }
2716}
2717
2718static void speculate_register_values(int i)
2719{
2720 if(i==0) {
2721 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
2722 // gp,sp are likely to stay the same throughout the block
2723 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
2724 smrv_weak_next=~smrv_strong_next;
2725 //printf(" llr %08x\n", smrv[4]);
2726 }
2727 smrv_strong=smrv_strong_next;
2728 smrv_weak=smrv_weak_next;
2729 switch(itype[i]) {
2730 case ALU:
2731 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
2732 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
2733 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
2734 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
2735 else {
2736 smrv_strong_next&=~(1<<rt1[i]);
2737 smrv_weak_next&=~(1<<rt1[i]);
2738 }
2739 break;
2740 case SHIFTIMM:
2741 smrv_strong_next&=~(1<<rt1[i]);
2742 smrv_weak_next&=~(1<<rt1[i]);
2743 // fallthrough
2744 case IMM16:
2745 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
2746 int value,hr=get_reg(regs[i].regmap,rt1[i]);
2747 if(hr>=0) {
2748 if(get_final_value(hr,i,&value))
2749 smrv[rt1[i]]=value;
2750 else smrv[rt1[i]]=constmap[i][hr];
2751 smrv_strong_next|=1<<rt1[i];
2752 }
2753 }
2754 else {
2755 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
2756 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
2757 }
2758 break;
2759 case LOAD:
2760 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
2761 // special case for BIOS
2762 smrv[rt1[i]]=0xa0000000;
2763 smrv_strong_next|=1<<rt1[i];
2764 break;
2765 }
2766 // fallthrough
2767 case SHIFT:
2768 case LOADLR:
2769 case MOV:
2770 smrv_strong_next&=~(1<<rt1[i]);
2771 smrv_weak_next&=~(1<<rt1[i]);
2772 break;
2773 case COP0:
2774 case COP2:
2775 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
2776 smrv_strong_next&=~(1<<rt1[i]);
2777 smrv_weak_next&=~(1<<rt1[i]);
2778 }
2779 break;
2780 case C2LS:
2781 if (opcode[i]==0x32) { // LWC2
2782 smrv_strong_next&=~(1<<rt1[i]);
2783 smrv_weak_next&=~(1<<rt1[i]);
2784 }
2785 break;
2786 }
2787#if 0
2788 int r=4;
2789 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
2790 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
2791#endif
2792}
2793
2794enum {
2795 MTYPE_8000 = 0,
2796 MTYPE_8020,
2797 MTYPE_0000,
2798 MTYPE_A000,
2799 MTYPE_1F80,
2800};
2801
2802static int get_ptr_mem_type(u_int a)
2803{
2804 if(a < 0x00200000) {
2805 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
2806 // return wrong, must use memhandler for BIOS self-test to pass
2807 // 007 does similar stuff from a00 mirror, weird stuff
2808 return MTYPE_8000;
2809 return MTYPE_0000;
2810 }
2811 if(0x1f800000 <= a && a < 0x1f801000)
2812 return MTYPE_1F80;
2813 if(0x80200000 <= a && a < 0x80800000)
2814 return MTYPE_8020;
2815 if(0xa0000000 <= a && a < 0xa0200000)
2816 return MTYPE_A000;
2817 return MTYPE_8000;
2818}
2819
2820static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
2821{
2822 void *jaddr = NULL;
2823 int type=0;
2824 int mr=rs1[i];
2825 if(((smrv_strong|smrv_weak)>>mr)&1) {
2826 type=get_ptr_mem_type(smrv[mr]);
2827 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
2828 }
2829 else {
2830 // use the mirror we are running on
2831 type=get_ptr_mem_type(start);
2832 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
2833 }
2834
2835 if(type==MTYPE_8020) { // RAM 80200000+ mirror
2836 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
2837 addr=*addr_reg_override=HOST_TEMPREG;
2838 type=0;
2839 }
2840 else if(type==MTYPE_0000) { // RAM 0 mirror
2841 emit_orimm(addr,0x80000000,HOST_TEMPREG);
2842 addr=*addr_reg_override=HOST_TEMPREG;
2843 type=0;
2844 }
2845 else if(type==MTYPE_A000) { // RAM A mirror
2846 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
2847 addr=*addr_reg_override=HOST_TEMPREG;
2848 type=0;
2849 }
2850 else if(type==MTYPE_1F80) { // scratchpad
2851 if (psxH == (void *)0x1f800000) {
2852 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
2853 emit_cmpimm(HOST_TEMPREG,0x1000);
2854 jaddr=out;
2855 emit_jc(0);
2856 }
2857 else {
2858 // do usual RAM check, jump will go to the right handler
2859 type=0;
2860 }
2861 }
2862
2863 if(type==0)
2864 {
2865 emit_cmpimm(addr,RAM_SIZE);
2866 jaddr=out;
2867 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2868 // Hint to branch predictor that the branch is unlikely to be taken
2869 if(rs1[i]>=28)
2870 emit_jno_unlikely(0);
2871 else
2872 #endif
2873 emit_jno(0);
2874 if(ram_offset!=0) {
2875 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2876 addr=*addr_reg_override=HOST_TEMPREG;
2877 }
2878 }
2879
2880 return jaddr;
2881}
2882
2883#define shift_assemble shift_assemble_arm
2884
2885static void loadlr_assemble_arm(int i,struct regstat *i_regs)
2886{
2887 int s,tl,temp,temp2,addr;
2888 int offset;
2889 void *jaddr=0;
2890 int memtarget=0,c=0;
2891 int fastload_reg_override=0;
2892 u_int hr,reglist=0;
2893 tl=get_reg(i_regs->regmap,rt1[i]);
2894 s=get_reg(i_regs->regmap,rs1[i]);
2895 temp=get_reg(i_regs->regmap,-1);
2896 temp2=get_reg(i_regs->regmap,FTEMP);
2897 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
2898 assert(addr<0);
2899 offset=imm[i];
2900 for(hr=0;hr<HOST_REGS;hr++) {
2901 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2902 }
2903 reglist|=1<<temp;
2904 if(offset||s<0||c) addr=temp2;
2905 else addr=s;
2906 if(s>=0) {
2907 c=(i_regs->wasconst>>s)&1;
2908 if(c) {
2909 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
2910 }
2911 }
2912 if(!c) {
2913 emit_shlimm(addr,3,temp);
2914 if (opcode[i]==0x22||opcode[i]==0x26) {
2915 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
2916 }else{
2917 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
2918 }
2919 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
2920 }
2921 else {
2922 if(ram_offset&&memtarget) {
2923 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
2924 fastload_reg_override=HOST_TEMPREG;
2925 }
2926 if (opcode[i]==0x22||opcode[i]==0x26) {
2927 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
2928 }else{
2929 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
2930 }
2931 }
2932 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
2933 if(!c||memtarget) {
2934 int a=temp2;
2935 if(fastload_reg_override) a=fastload_reg_override;
2936 emit_readword_indexed(0,a,temp2);
2937 if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
2938 }
2939 else
2940 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
2941 if(rt1[i]) {
2942 assert(tl>=0);
2943 emit_andimm(temp,24,temp);
2944#ifdef BIG_ENDIAN_MIPS
2945 if (opcode[i]==0x26) // LWR
2946#else
2947 if (opcode[i]==0x22) // LWL
2948#endif
2949 emit_xorimm(temp,24,temp);
2950 emit_movimm(-1,HOST_TEMPREG);
2951 if (opcode[i]==0x26) {
2952 emit_shr(temp2,temp,temp2);
2953 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
2954 }else{
2955 emit_shl(temp2,temp,temp2);
2956 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
2957 }
2958 emit_or(temp2,tl,tl);
2959 }
2960 //emit_storereg(rt1[i],tl); // DEBUG
2961 }
2962 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2963 assert(0);
2964 }
2965}
2966#define loadlr_assemble loadlr_assemble_arm
2967
2968static void cop0_assemble(int i,struct regstat *i_regs)
2969{
2970 if(opcode2[i]==0) // MFC0
2971 {
2972 signed char t=get_reg(i_regs->regmap,rt1[i]);
2973 u_int copr=(source[i]>>11)&0x1f;
2974 //assert(t>=0); // Why does this happen? OOT is weird
2975 if(t>=0&&rt1[i]!=0) {
2976 emit_readword(&reg_cop0[copr],t);
2977 }
2978 }
2979 else if(opcode2[i]==4) // MTC0
2980 {
2981 signed char s=get_reg(i_regs->regmap,rs1[i]);
2982 char copr=(source[i]>>11)&0x1f;
2983 assert(s>=0);
2984 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
2985 if(copr==9||copr==11||copr==12||copr==13) {
2986 emit_readword(&last_count,HOST_TEMPREG);
2987 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
2988 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2989 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
2990 emit_writeword(HOST_CCREG,&Count);
2991 }
2992 // What a mess. The status register (12) can enable interrupts,
2993 // so needs a special case to handle a pending interrupt.
2994 // The interrupt must be taken immediately, because a subsequent
2995 // instruction might disable interrupts again.
2996 if(copr==12||copr==13) {
2997 if (is_delayslot) {
2998 // burn cycles to cause cc_interrupt, which will
2999 // reschedule next_interupt. Relies on CCREG from above.
3000 assem_debug("MTC0 DS %d\n", copr);
3001 emit_writeword(HOST_CCREG,&last_count);
3002 emit_movimm(0,HOST_CCREG);
3003 emit_storereg(CCREG,HOST_CCREG);
3004 emit_loadreg(rs1[i],1);
3005 emit_movimm(copr,0);
3006 emit_call(pcsx_mtc0_ds);
3007 emit_loadreg(rs1[i],s);
3008 return;
3009 }
3010 emit_movimm(start+i*4+4,HOST_TEMPREG);
3011 emit_writeword(HOST_TEMPREG,&pcaddr);
3012 emit_movimm(0,HOST_TEMPREG);
3013 emit_writeword(HOST_TEMPREG,&pending_exception);
3014 }
3015 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3016 //else
3017 if(s==HOST_CCREG)
3018 emit_loadreg(rs1[i],1);
3019 else if(s!=1)
3020 emit_mov(s,1);
3021 emit_movimm(copr,0);
3022 emit_call(pcsx_mtc0);
3023 if(copr==9||copr==11||copr==12||copr==13) {
3024 emit_readword(&Count,HOST_CCREG);
3025 emit_readword(&next_interupt,HOST_TEMPREG);
3026 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
3027 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3028 emit_writeword(HOST_TEMPREG,&last_count);
3029 emit_storereg(CCREG,HOST_CCREG);
3030 }
3031 if(copr==12||copr==13) {
3032 assert(!is_delayslot);
3033 emit_readword(&pending_exception,14);
3034 emit_test(14,14);
3035 emit_jne(&do_interrupt);
3036 }
3037 emit_loadreg(rs1[i],s);
3038 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3039 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3040 }
3041 else
3042 {
3043 assert(opcode2[i]==0x10);
3044 //if((source[i]&0x3f)==0x10) // RFE
3045 {
3046 emit_readword(&Status,0);
3047 emit_andimm(0,0x3c,1);
3048 emit_andimm(0,~0xf,0);
3049 emit_orrshr_imm(1,2,0);
3050 emit_writeword(0,&Status);
3051 }
3052 }
3053}
3054
3055static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3056{
3057 switch (copr) {
3058 case 1:
3059 case 3:
3060 case 5:
3061 case 8:
3062 case 9:
3063 case 10:
3064 case 11:
3065 emit_readword(&reg_cop2d[copr],tl);
3066 emit_signextend16(tl,tl);
3067 emit_writeword(tl,&reg_cop2d[copr]); // hmh
3068 break;
3069 case 7:
3070 case 16:
3071 case 17:
3072 case 18:
3073 case 19:
3074 emit_readword(&reg_cop2d[copr],tl);
3075 emit_andimm(tl,0xffff,tl);
3076 emit_writeword(tl,&reg_cop2d[copr]);
3077 break;
3078 case 15:
3079 emit_readword(&reg_cop2d[14],tl); // SXY2
3080 emit_writeword(tl,&reg_cop2d[copr]);
3081 break;
3082 case 28:
3083 case 29:
3084 emit_readword(&reg_cop2d[9],temp);
3085 emit_testimm(temp,0x8000); // do we need this?
3086 emit_andimm(temp,0xf80,temp);
3087 emit_andne_imm(temp,0,temp);
3088 emit_shrimm(temp,7,tl);
3089 emit_readword(&reg_cop2d[10],temp);
3090 emit_testimm(temp,0x8000);
3091 emit_andimm(temp,0xf80,temp);
3092 emit_andne_imm(temp,0,temp);
3093 emit_orrshr_imm(temp,2,tl);
3094 emit_readword(&reg_cop2d[11],temp);
3095 emit_testimm(temp,0x8000);
3096 emit_andimm(temp,0xf80,temp);
3097 emit_andne_imm(temp,0,temp);
3098 emit_orrshl_imm(temp,3,tl);
3099 emit_writeword(tl,&reg_cop2d[copr]);
3100 break;
3101 default:
3102 emit_readword(&reg_cop2d[copr],tl);
3103 break;
3104 }
3105}
3106
3107static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3108{
3109 switch (copr) {
3110 case 15:
3111 emit_readword(&reg_cop2d[13],temp); // SXY1
3112 emit_writeword(sl,&reg_cop2d[copr]);
3113 emit_writeword(temp,&reg_cop2d[12]); // SXY0
3114 emit_readword(&reg_cop2d[14],temp); // SXY2
3115 emit_writeword(sl,&reg_cop2d[14]);
3116 emit_writeword(temp,&reg_cop2d[13]); // SXY1
3117 break;
3118 case 28:
3119 emit_andimm(sl,0x001f,temp);
3120 emit_shlimm(temp,7,temp);
3121 emit_writeword(temp,&reg_cop2d[9]);
3122 emit_andimm(sl,0x03e0,temp);
3123 emit_shlimm(temp,2,temp);
3124 emit_writeword(temp,&reg_cop2d[10]);
3125 emit_andimm(sl,0x7c00,temp);
3126 emit_shrimm(temp,3,temp);
3127 emit_writeword(temp,&reg_cop2d[11]);
3128 emit_writeword(sl,&reg_cop2d[28]);
3129 break;
3130 case 30:
3131 emit_movs(sl,temp);
3132 emit_mvnmi(temp,temp);
3133#ifdef HAVE_ARMV5
3134 emit_clz(temp,temp);
3135#else
3136 emit_movs(temp,HOST_TEMPREG);
3137 emit_movimm(0,temp);
3138 emit_jeq((int)out+4*4);
3139 emit_addpl_imm(temp,1,temp);
3140 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3141 emit_jns((int)out-2*4);
3142#endif
3143 emit_writeword(sl,&reg_cop2d[30]);
3144 emit_writeword(temp,&reg_cop2d[31]);
3145 break;
3146 case 31:
3147 break;
3148 default:
3149 emit_writeword(sl,&reg_cop2d[copr]);
3150 break;
3151 }
3152}
3153
3154static void cop2_assemble(int i,struct regstat *i_regs)
3155{
3156 u_int copr=(source[i]>>11)&0x1f;
3157 signed char temp=get_reg(i_regs->regmap,-1);
3158 if (opcode2[i]==0) { // MFC2
3159 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3160 if(tl>=0&&rt1[i]!=0)
3161 cop2_get_dreg(copr,tl,temp);
3162 }
3163 else if (opcode2[i]==4) { // MTC2
3164 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3165 cop2_put_dreg(copr,sl,temp);
3166 }
3167 else if (opcode2[i]==2) // CFC2
3168 {
3169 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3170 if(tl>=0&&rt1[i]!=0)
3171 emit_readword(&reg_cop2c[copr],tl);
3172 }
3173 else if (opcode2[i]==6) // CTC2
3174 {
3175 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3176 switch(copr) {
3177 case 4:
3178 case 12:
3179 case 20:
3180 case 26:
3181 case 27:
3182 case 29:
3183 case 30:
3184 emit_signextend16(sl,temp);
3185 break;
3186 case 31:
3187 //value = value & 0x7ffff000;
3188 //if (value & 0x7f87e000) value |= 0x80000000;
3189 emit_shrimm(sl,12,temp);
3190 emit_shlimm(temp,12,temp);
3191 emit_testimm(temp,0x7f000000);
3192 emit_testeqimm(temp,0x00870000);
3193 emit_testeqimm(temp,0x0000e000);
3194 emit_orrne_imm(temp,0x80000000,temp);
3195 break;
3196 default:
3197 temp=sl;
3198 break;
3199 }
3200 emit_writeword(temp,&reg_cop2c[copr]);
3201 assert(sl>=0);
3202 }
3203}
3204
3205static void c2op_prologue(u_int op,u_int reglist)
3206{
3207 save_regs_all(reglist);
3208#ifdef PCNT
3209 emit_movimm(op,0);
3210 emit_call((int)pcnt_gte_start);
3211#endif
3212 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3213}
3214
3215static void c2op_epilogue(u_int op,u_int reglist)
3216{
3217#ifdef PCNT
3218 emit_movimm(op,0);
3219 emit_call((int)pcnt_gte_end);
3220#endif
3221 restore_regs_all(reglist);
3222}
3223
3224static void c2op_call_MACtoIR(int lm,int need_flags)
3225{
3226 if(need_flags)
3227 emit_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
3228 else
3229 emit_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
3230}
3231
3232static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3233{
3234 emit_call(func);
3235 // func is C code and trashes r0
3236 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3237 if(need_flags||need_ir)
3238 c2op_call_MACtoIR(lm,need_flags);
3239 emit_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
3240}
3241
3242static void c2op_assemble(int i,struct regstat *i_regs)
3243{
3244 u_int c2op=source[i]&0x3f;
3245 u_int hr,reglist_full=0,reglist;
3246 int need_flags,need_ir;
3247 for(hr=0;hr<HOST_REGS;hr++) {
3248 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
3249 }
3250 reglist=reglist_full&CALLER_SAVE_REGS;
3251
3252 if (gte_handlers[c2op]!=NULL) {
3253 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
3254 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
3255 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3256 source[i],gte_unneeded[i+1],need_flags,need_ir);
3257 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3258 need_flags=0;
3259 int shift = (source[i] >> 19) & 1;
3260 int lm = (source[i] >> 10) & 1;
3261 switch(c2op) {
3262#ifndef DRC_DBG
3263 case GTE_MVMVA: {
3264#ifdef HAVE_ARMV5
3265 int v = (source[i] >> 15) & 3;
3266 int cv = (source[i] >> 13) & 3;
3267 int mx = (source[i] >> 17) & 3;
3268 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
3269 c2op_prologue(c2op,reglist);
3270 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3271 if(v<3)
3272 emit_ldrd(v*8,0,4);
3273 else {
3274 emit_movzwl_indexed(9*4,0,4); // gteIR
3275 emit_movzwl_indexed(10*4,0,6);
3276 emit_movzwl_indexed(11*4,0,5);
3277 emit_orrshl_imm(6,16,4);
3278 }
3279 if(mx<3)
3280 emit_addimm(0,32*4+mx*8*4,6);
3281 else
3282 emit_readword(&zeromem_ptr,6);
3283 if(cv<3)
3284 emit_addimm(0,32*4+(cv*8+5)*4,7);
3285 else
3286 emit_readword(&zeromem_ptr,7);
3287#ifdef __ARM_NEON__
3288 emit_movimm(source[i],1); // opcode
3289 emit_call(gteMVMVA_part_neon);
3290 if(need_flags) {
3291 emit_movimm(lm,1);
3292 emit_call(gteMACtoIR_flags_neon);
3293 }
3294#else
3295 if(cv==3&&shift)
3296 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3297 else {
3298 emit_movimm(shift,1);
3299 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3300 }
3301 if(need_flags||need_ir)
3302 c2op_call_MACtoIR(lm,need_flags);
3303#endif
3304#else /* if not HAVE_ARMV5 */
3305 c2op_prologue(c2op,reglist);
3306 emit_movimm(source[i],1); // opcode
3307 emit_writeword(1,&psxRegs.code);
3308 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3309#endif
3310 break;
3311 }
3312 case GTE_OP:
3313 c2op_prologue(c2op,reglist);
3314 emit_call(shift?gteOP_part_shift:gteOP_part_noshift);
3315 if(need_flags||need_ir) {
3316 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3317 c2op_call_MACtoIR(lm,need_flags);
3318 }
3319 break;
3320 case GTE_DPCS:
3321 c2op_prologue(c2op,reglist);
3322 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3323 break;
3324 case GTE_INTPL:
3325 c2op_prologue(c2op,reglist);
3326 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3327 break;
3328 case GTE_SQR:
3329 c2op_prologue(c2op,reglist);
3330 emit_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
3331 if(need_flags||need_ir) {
3332 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3333 c2op_call_MACtoIR(lm,need_flags);
3334 }
3335 break;
3336 case GTE_DCPL:
3337 c2op_prologue(c2op,reglist);
3338 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3339 break;
3340 case GTE_GPF:
3341 c2op_prologue(c2op,reglist);
3342 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3343 break;
3344 case GTE_GPL:
3345 c2op_prologue(c2op,reglist);
3346 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3347 break;
3348#endif
3349 default:
3350 c2op_prologue(c2op,reglist);
3351#ifdef DRC_DBG
3352 emit_movimm(source[i],1); // opcode
3353 emit_writeword(1,&psxRegs.code);
3354#endif
3355 emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
3356 break;
3357 }
3358 c2op_epilogue(c2op,reglist);
3359 }
3360}
3361
3362static void cop1_unusable(int i,struct regstat *i_regs)
3363{
3364 // XXX: should just just do the exception instead
3365 //if(!cop1_usable)
3366 {
3367 void *jaddr=out;
3368 emit_jmp(0);
3369 add_stub_r(FP_STUB,jaddr,out,i,0,i_regs,is_delayslot,0);
3370 }
3371}
3372
3373static void cop1_assemble(int i,struct regstat *i_regs)
3374{
3375 cop1_unusable(i, i_regs);
3376}
3377
3378static void multdiv_assemble_arm(int i,struct regstat *i_regs)
3379{
3380 // case 0x18: MULT
3381 // case 0x19: MULTU
3382 // case 0x1A: DIV
3383 // case 0x1B: DIVU
3384 // case 0x1C: DMULT
3385 // case 0x1D: DMULTU
3386 // case 0x1E: DDIV
3387 // case 0x1F: DDIVU
3388 if(rs1[i]&&rs2[i])
3389 {
3390 if((opcode2[i]&4)==0) // 32-bit
3391 {
3392 if(opcode2[i]==0x18) // MULT
3393 {
3394 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3395 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3396 signed char hi=get_reg(i_regs->regmap,HIREG);
3397 signed char lo=get_reg(i_regs->regmap,LOREG);
3398 assert(m1>=0);
3399 assert(m2>=0);
3400 assert(hi>=0);
3401 assert(lo>=0);
3402 emit_smull(m1,m2,hi,lo);
3403 }
3404 if(opcode2[i]==0x19) // MULTU
3405 {
3406 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3407 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3408 signed char hi=get_reg(i_regs->regmap,HIREG);
3409 signed char lo=get_reg(i_regs->regmap,LOREG);
3410 assert(m1>=0);
3411 assert(m2>=0);
3412 assert(hi>=0);
3413 assert(lo>=0);
3414 emit_umull(m1,m2,hi,lo);
3415 }
3416 if(opcode2[i]==0x1A) // DIV
3417 {
3418 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3419 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3420 assert(d1>=0);
3421 assert(d2>=0);
3422 signed char quotient=get_reg(i_regs->regmap,LOREG);
3423 signed char remainder=get_reg(i_regs->regmap,HIREG);
3424 assert(quotient>=0);
3425 assert(remainder>=0);
3426 emit_movs(d1,remainder);
3427 emit_movimm(0xffffffff,quotient);
3428 emit_negmi(quotient,quotient); // .. quotient and ..
3429 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
3430 emit_movs(d2,HOST_TEMPREG);
3431 emit_jeq((int)out+52); // Division by zero
3432 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
3433#ifdef HAVE_ARMV5
3434 emit_clz(HOST_TEMPREG,quotient);
3435 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
3436#else
3437 emit_movimm(0,quotient);
3438 emit_addpl_imm(quotient,1,quotient);
3439 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3440 emit_jns((int)out-2*4);
3441#endif
3442 emit_orimm(quotient,1<<31,quotient);
3443 emit_shr(quotient,quotient,quotient);
3444 emit_cmp(remainder,HOST_TEMPREG);
3445 emit_subcs(remainder,HOST_TEMPREG,remainder);
3446 emit_adcs(quotient,quotient,quotient);
3447 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
3448 emit_jcc(out-16); // -4
3449 emit_teq(d1,d2);
3450 emit_negmi(quotient,quotient);
3451 emit_test(d1,d1);
3452 emit_negmi(remainder,remainder);
3453 }
3454 if(opcode2[i]==0x1B) // DIVU
3455 {
3456 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3457 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3458 assert(d1>=0);
3459 assert(d2>=0);
3460 signed char quotient=get_reg(i_regs->regmap,LOREG);
3461 signed char remainder=get_reg(i_regs->regmap,HIREG);
3462 assert(quotient>=0);
3463 assert(remainder>=0);
3464 emit_mov(d1,remainder);
3465 emit_movimm(0xffffffff,quotient); // div0 case
3466 emit_test(d2,d2);
3467 emit_jeq((int)out+40); // Division by zero
3468#ifdef HAVE_ARMV5
3469 emit_clz(d2,HOST_TEMPREG);
3470 emit_movimm(1<<31,quotient);
3471 emit_shl(d2,HOST_TEMPREG,d2);
3472#else
3473 emit_movimm(0,HOST_TEMPREG);
3474 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3475 emit_lslpls_imm(d2,1,d2);
3476 emit_jns((int)out-2*4);
3477 emit_movimm(1<<31,quotient);
3478#endif
3479 emit_shr(quotient,HOST_TEMPREG,quotient);
3480 emit_cmp(remainder,d2);
3481 emit_subcs(remainder,d2,remainder);
3482 emit_adcs(quotient,quotient,quotient);
3483 emit_shrcc_imm(d2,1,d2);
3484 emit_jcc(out-16); // -4
3485 }
3486 }
3487 else // 64-bit
3488 assert(0);
3489 }
3490 else
3491 {
3492 // Multiply by zero is zero.
3493 // MIPS does not have a divide by zero exception.
3494 // The result is undefined, we return zero.
3495 signed char hr=get_reg(i_regs->regmap,HIREG);
3496 signed char lr=get_reg(i_regs->regmap,LOREG);
3497 if(hr>=0) emit_zeroreg(hr);
3498 if(lr>=0) emit_zeroreg(lr);
3499 }
3500}
3501#define multdiv_assemble multdiv_assemble_arm
3502
3503static void do_preload_rhash(int r) {
3504 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
3505 // register. On ARM the hash can be done with a single instruction (below)
3506}
3507
3508static void do_preload_rhtbl(int ht) {
3509 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
3510}
3511
3512static void do_rhash(int rs,int rh) {
3513 emit_andimm(rs,0xf8,rh);
3514}
3515
3516static void do_miniht_load(int ht,int rh) {
3517 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
3518 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
3519}
3520
3521static void do_miniht_jump(int rs,int rh,int ht) {
3522 emit_cmp(rh,rs);
3523 emit_ldreq_indexed(ht,4,15);
3524 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3525 emit_mov(rs,7);
3526 emit_jmp(jump_vaddr_reg[7]);
3527 #else
3528 emit_jmp(jump_vaddr_reg[rs]);
3529 #endif
3530}
3531
3532static void do_miniht_insert(u_int return_address,int rt,int temp) {
3533 #ifndef HAVE_ARMV7
3534 emit_movimm(return_address,rt); // PC into link register
3535 add_to_linker(out,return_address,1);
3536 emit_pcreladdr(temp);
3537 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
3538 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
3539 #else
3540 emit_movw(return_address&0x0000FFFF,rt);
3541 add_to_linker(out,return_address,1);
3542 emit_pcreladdr(temp);
3543 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
3544 emit_movt(return_address&0xFFFF0000,rt);
3545 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
3546 #endif
3547}
3548
3549static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u)
3550{
3551 //if(dirty_pre==dirty) return;
3552 int hr,reg;
3553 for(hr=0;hr<HOST_REGS;hr++) {
3554 if(hr!=EXCLUDE_REG) {
3555 reg=pre[hr];
3556 if(((~u)>>(reg&63))&1) {
3557 if(reg>0) {
3558 if(((dirty_pre&~dirty)>>hr)&1) {
3559 if(reg>0&&reg<34) {
3560 emit_storereg(reg,hr);
3561 }
3562 else if(reg>=64) {
3563 assert(0);
3564 }
3565 }
3566 }
3567 }
3568 }
3569 }
3570}
3571
3572static void mark_clear_cache(void *target)
3573{
3574 u_long offset = (u_char *)target - translation_cache;
3575 u_int mask = 1u << ((offset >> 12) & 31);
3576 if (!(needs_clear_cache[offset >> 17] & mask)) {
3577 char *start = (char *)((u_long)target & ~4095ul);
3578 start_tcache_write(start, start + 4096);
3579 needs_clear_cache[offset >> 17] |= mask;
3580 }
3581}
3582
3583// Clearing the cache is rather slow on ARM Linux, so mark the areas
3584// that need to be cleared, and then only clear these areas once.
3585static void do_clear_cache()
3586{
3587 int i,j;
3588 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
3589 {
3590 u_int bitmap=needs_clear_cache[i];
3591 if(bitmap) {
3592 u_char *start, *end;
3593 for(j=0;j<32;j++)
3594 {
3595 if(bitmap&(1<<j)) {
3596 start=translation_cache+i*131072+j*4096;
3597 end=start+4095;
3598 j++;
3599 while(j<32) {
3600 if(bitmap&(1<<j)) {
3601 end+=4096;
3602 j++;
3603 }else{
3604 end_tcache_write(start, end);
3605 break;
3606 }
3607 }
3608 }
3609 }
3610 needs_clear_cache[i]=0;
3611 }
3612 }
3613}
3614
3615// CPU-architecture-specific initialization
3616static void arch_init() {
3617}
3618
3619// vim:shiftwidth=2:expandtab