psxmem: use rounding that's more likely to work
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
054175e9 22#ifdef PCSX
6c0eefaf 23#include "../gte.h"
24#define FLAGLESS
25#include "../gte.h"
26#undef FLAGLESS
054175e9 27#include "../gte_arm.h"
28#include "../gte_neon.h"
29#include "pcnt.h"
30#endif
665f33e1 31#include "arm_features.h"
054175e9 32
a327ad27 33#if !BASE_ADDR_FIXED
bdeade46 34char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
35#endif
36
4d646738 37#ifndef __MACH__
38#define CALLER_SAVE_REGS 0x100f
39#else
40#define CALLER_SAVE_REGS 0x120f
41#endif
42
57871462 43extern int cycle_count;
44extern int last_count;
45extern int pcaddr;
46extern int pending_exception;
47extern int branch_target;
48extern uint64_t readmem_dword;
3d624f89 49#ifdef MUPEN64
57871462 50extern precomp_instr fake_pc;
3d624f89 51#endif
57871462 52extern void *dynarec_local;
53extern u_int memory_map[1048576];
54extern u_int mini_ht[32][2];
55extern u_int rounding_modes[4];
56
57void indirect_jump_indexed();
58void indirect_jump();
59void do_interrupt();
60void jump_vaddr_r0();
61void jump_vaddr_r1();
62void jump_vaddr_r2();
63void jump_vaddr_r3();
64void jump_vaddr_r4();
65void jump_vaddr_r5();
66void jump_vaddr_r6();
67void jump_vaddr_r7();
68void jump_vaddr_r8();
69void jump_vaddr_r9();
70void jump_vaddr_r10();
71void jump_vaddr_r12();
72
73const u_int jump_vaddr_reg[16] = {
74 (int)jump_vaddr_r0,
75 (int)jump_vaddr_r1,
76 (int)jump_vaddr_r2,
77 (int)jump_vaddr_r3,
78 (int)jump_vaddr_r4,
79 (int)jump_vaddr_r5,
80 (int)jump_vaddr_r6,
81 (int)jump_vaddr_r7,
82 (int)jump_vaddr_r8,
83 (int)jump_vaddr_r9,
84 (int)jump_vaddr_r10,
85 0,
86 (int)jump_vaddr_r12,
87 0,
88 0,
89 0};
90
0bbd1454 91void invalidate_addr_r0();
92void invalidate_addr_r1();
93void invalidate_addr_r2();
94void invalidate_addr_r3();
95void invalidate_addr_r4();
96void invalidate_addr_r5();
97void invalidate_addr_r6();
98void invalidate_addr_r7();
99void invalidate_addr_r8();
100void invalidate_addr_r9();
101void invalidate_addr_r10();
102void invalidate_addr_r12();
103
104const u_int invalidate_addr_reg[16] = {
105 (int)invalidate_addr_r0,
106 (int)invalidate_addr_r1,
107 (int)invalidate_addr_r2,
108 (int)invalidate_addr_r3,
109 (int)invalidate_addr_r4,
110 (int)invalidate_addr_r5,
111 (int)invalidate_addr_r6,
112 (int)invalidate_addr_r7,
113 (int)invalidate_addr_r8,
114 (int)invalidate_addr_r9,
115 (int)invalidate_addr_r10,
116 0,
117 (int)invalidate_addr_r12,
118 0,
119 0,
120 0};
121
57871462 122#include "fpu.h"
123
dd3a91a1 124unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
125
57871462 126/* Linker */
127
128void set_jump_target(int addr,u_int target)
129{
130 u_char *ptr=(u_char *)addr;
131 u_int *ptr2=(u_int *)ptr;
132 if(ptr[3]==0xe2) {
133 assert((target-(u_int)ptr2-8)<1024);
134 assert((addr&3)==0);
135 assert((target&3)==0);
136 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
137 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
138 }
139 else if(ptr[3]==0x72) {
140 // generated by emit_jno_unlikely
141 if((target-(u_int)ptr2-8)<1024) {
142 assert((addr&3)==0);
143 assert((target&3)==0);
144 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
145 }
146 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
147 assert((addr&3)==0);
148 assert((target&3)==0);
149 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
150 }
151 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
152 }
153 else {
154 assert((ptr[3]&0x0e)==0xa);
155 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
156 }
157}
158
159// This optionally copies the instruction from the target of the branch into
160// the space before the branch. Works, but the difference in speed is
161// usually insignificant.
162void set_jump_target_fillslot(int addr,u_int target,int copy)
163{
164 u_char *ptr=(u_char *)addr;
165 u_int *ptr2=(u_int *)ptr;
166 assert(!copy||ptr2[-1]==0xe28dd000);
167 if(ptr[3]==0xe2) {
168 assert(!copy);
169 assert((target-(u_int)ptr2-8)<4096);
170 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
171 }
172 else {
173 assert((ptr[3]&0x0e)==0xa);
174 u_int target_insn=*(u_int *)target;
175 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
176 copy=0;
177 }
178 if((target_insn&0x0c100000)==0x04100000) { // Load
179 copy=0;
180 }
181 if(target_insn&0x08000000) {
182 copy=0;
183 }
184 if(copy) {
185 ptr2[-1]=target_insn;
186 target+=4;
187 }
188 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
189 }
190}
191
192/* Literal pool */
193add_literal(int addr,int val)
194{
15776b68 195 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 196 literals[literalcount][0]=addr;
197 literals[literalcount][1]=val;
198 literalcount++;
199}
200
f76eeef9 201void *kill_pointer(void *stub)
57871462 202{
203 int *ptr=(int *)(stub+4);
204 assert((*ptr&0x0ff00000)==0x05900000);
205 u_int offset=*ptr&0xfff;
206 int **l_ptr=(void *)ptr+offset+8;
207 int *i_ptr=*l_ptr;
208 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 209 return i_ptr;
57871462 210}
211
f968d35d 212// find where external branch is liked to using addr of it's stub:
213// get address that insn one after stub loads (dyna_linker arg1),
214// treat it as a pointer to branch insn,
215// return addr where that branch jumps to
57871462 216int get_pointer(void *stub)
217{
218 //printf("get_pointer(%x)\n",(int)stub);
219 int *ptr=(int *)(stub+4);
f968d35d 220 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 221 u_int offset=*ptr&0xfff;
222 int **l_ptr=(void *)ptr+offset+8;
223 int *i_ptr=*l_ptr;
224 assert((*i_ptr&0x0f000000)==0x0a000000);
225 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
226}
227
228// Find the "clean" entry point from a "dirty" entry point
229// by skipping past the call to verify_code
230u_int get_clean_addr(int addr)
231{
232 int *ptr=(int *)addr;
665f33e1 233 #ifndef HAVE_ARMV7
57871462 234 ptr+=4;
235 #else
236 ptr+=6;
237 #endif
238 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
239 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
240 ptr++;
241 if((*ptr&0xFF000000)==0xea000000) {
242 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
243 }
244 return (u_int)ptr;
245}
246
247int verify_dirty(int addr)
248{
249 u_int *ptr=(u_int *)addr;
665f33e1 250 #ifndef HAVE_ARMV7
57871462 251 // get from literal pool
15776b68 252 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 253 u_int offset=*ptr&0xfff;
254 u_int *l_ptr=(void *)ptr+offset+8;
255 u_int source=l_ptr[0];
256 u_int copy=l_ptr[1];
257 u_int len=l_ptr[2];
258 ptr+=4;
259 #else
260 // ARMv7 movw/movt
261 assert((*ptr&0xFFF00000)==0xe3000000);
262 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
263 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
264 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
265 ptr+=6;
266 #endif
267 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
268 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 269#ifndef DISABLE_TLB
cfcba99a 270 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 271 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
272 unsigned int page=source>>12;
273 unsigned int map_value=memory_map[page];
274 if(map_value>=0x80000000) return 0;
275 while(page<((source+len-1)>>12)) {
276 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
277 }
278 source = source+(map_value<<2);
279 }
63cb0298 280#endif
57871462 281 //printf("verify_dirty: %x %x %x\n",source,copy,len);
282 return !memcmp((void *)source,(void *)copy,len);
283}
284
285// This doesn't necessarily find all clean entry points, just
286// guarantees that it's not dirty
287int isclean(int addr)
288{
665f33e1 289 #ifndef HAVE_ARMV7
57871462 290 int *ptr=((u_int *)addr)+4;
291 #else
292 int *ptr=((u_int *)addr)+6;
293 #endif
294 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
295 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
296 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
297 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
298 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
299 return 1;
300}
301
4a35de07 302// get source that block at addr was compiled from (host pointers)
57871462 303void get_bounds(int addr,u_int *start,u_int *end)
304{
305 u_int *ptr=(u_int *)addr;
665f33e1 306 #ifndef HAVE_ARMV7
57871462 307 // get from literal pool
15776b68 308 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 309 u_int offset=*ptr&0xfff;
310 u_int *l_ptr=(void *)ptr+offset+8;
311 u_int source=l_ptr[0];
312 //u_int copy=l_ptr[1];
313 u_int len=l_ptr[2];
314 ptr+=4;
315 #else
316 // ARMv7 movw/movt
317 assert((*ptr&0xFFF00000)==0xe3000000);
318 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
319 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
320 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
321 ptr+=6;
322 #endif
323 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
324 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 325#ifndef DISABLE_TLB
cfcba99a 326 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 327 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
328 if(memory_map[source>>12]>=0x80000000) source = 0;
329 else source = source+(memory_map[source>>12]<<2);
330 }
63cb0298 331#endif
57871462 332 *start=source;
333 *end=source+len;
334}
335
336/* Register allocation */
337
338// Note: registers are allocated clean (unmodified state)
339// if you intend to modify the register, you must call dirty_reg().
340void alloc_reg(struct regstat *cur,int i,signed char reg)
341{
342 int r,hr;
343 int preferred_reg = (reg&7);
344 if(reg==CCREG) preferred_reg=HOST_CCREG;
345 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
346
347 // Don't allocate unused registers
348 if((cur->u>>reg)&1) return;
349
350 // see if it's already allocated
351 for(hr=0;hr<HOST_REGS;hr++)
352 {
353 if(cur->regmap[hr]==reg) return;
354 }
355
356 // Keep the same mapping if the register was already allocated in a loop
357 preferred_reg = loop_reg(i,reg,preferred_reg);
358
359 // Try to allocate the preferred register
360 if(cur->regmap[preferred_reg]==-1) {
361 cur->regmap[preferred_reg]=reg;
362 cur->dirty&=~(1<<preferred_reg);
363 cur->isconst&=~(1<<preferred_reg);
364 return;
365 }
366 r=cur->regmap[preferred_reg];
367 if(r<64&&((cur->u>>r)&1)) {
368 cur->regmap[preferred_reg]=reg;
369 cur->dirty&=~(1<<preferred_reg);
370 cur->isconst&=~(1<<preferred_reg);
371 return;
372 }
373 if(r>=64&&((cur->uu>>(r&63))&1)) {
374 cur->regmap[preferred_reg]=reg;
375 cur->dirty&=~(1<<preferred_reg);
376 cur->isconst&=~(1<<preferred_reg);
377 return;
378 }
379
380 // Clear any unneeded registers
381 // We try to keep the mapping consistent, if possible, because it
382 // makes branches easier (especially loops). So we try to allocate
383 // first (see above) before removing old mappings. If this is not
384 // possible then go ahead and clear out the registers that are no
385 // longer needed.
386 for(hr=0;hr<HOST_REGS;hr++)
387 {
388 r=cur->regmap[hr];
389 if(r>=0) {
390 if(r<64) {
391 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
392 }
393 else
394 {
395 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
396 }
397 }
398 }
399 // Try to allocate any available register, but prefer
400 // registers that have not been used recently.
401 if(i>0) {
402 for(hr=0;hr<HOST_REGS;hr++) {
403 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
404 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
405 cur->regmap[hr]=reg;
406 cur->dirty&=~(1<<hr);
407 cur->isconst&=~(1<<hr);
408 return;
409 }
410 }
411 }
412 }
413 // Try to allocate any available register
414 for(hr=0;hr<HOST_REGS;hr++) {
415 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
416 cur->regmap[hr]=reg;
417 cur->dirty&=~(1<<hr);
418 cur->isconst&=~(1<<hr);
419 return;
420 }
421 }
422
423 // Ok, now we have to evict someone
424 // Pick a register we hopefully won't need soon
425 u_char hsn[MAXREG+1];
426 memset(hsn,10,sizeof(hsn));
427 int j;
428 lsn(hsn,i,&preferred_reg);
429 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
430 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
431 if(i>0) {
432 // Don't evict the cycle count at entry points, otherwise the entry
433 // stub will have to write it.
434 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
435 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
436 for(j=10;j>=3;j--)
437 {
438 // Alloc preferred register if available
439 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
440 for(hr=0;hr<HOST_REGS;hr++) {
441 // Evict both parts of a 64-bit register
442 if((cur->regmap[hr]&63)==r) {
443 cur->regmap[hr]=-1;
444 cur->dirty&=~(1<<hr);
445 cur->isconst&=~(1<<hr);
446 }
447 }
448 cur->regmap[preferred_reg]=reg;
449 return;
450 }
451 for(r=1;r<=MAXREG;r++)
452 {
453 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
454 for(hr=0;hr<HOST_REGS;hr++) {
455 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
456 if(cur->regmap[hr]==r+64) {
457 cur->regmap[hr]=reg;
458 cur->dirty&=~(1<<hr);
459 cur->isconst&=~(1<<hr);
460 return;
461 }
462 }
463 }
464 for(hr=0;hr<HOST_REGS;hr++) {
465 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
466 if(cur->regmap[hr]==r) {
467 cur->regmap[hr]=reg;
468 cur->dirty&=~(1<<hr);
469 cur->isconst&=~(1<<hr);
470 return;
471 }
472 }
473 }
474 }
475 }
476 }
477 }
478 for(j=10;j>=0;j--)
479 {
480 for(r=1;r<=MAXREG;r++)
481 {
482 if(hsn[r]==j) {
483 for(hr=0;hr<HOST_REGS;hr++) {
484 if(cur->regmap[hr]==r+64) {
485 cur->regmap[hr]=reg;
486 cur->dirty&=~(1<<hr);
487 cur->isconst&=~(1<<hr);
488 return;
489 }
490 }
491 for(hr=0;hr<HOST_REGS;hr++) {
492 if(cur->regmap[hr]==r) {
493 cur->regmap[hr]=reg;
494 cur->dirty&=~(1<<hr);
495 cur->isconst&=~(1<<hr);
496 return;
497 }
498 }
499 }
500 }
501 }
c43b5311 502 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
57871462 503}
504
505void alloc_reg64(struct regstat *cur,int i,signed char reg)
506{
507 int preferred_reg = 8+(reg&1);
508 int r,hr;
509
510 // allocate the lower 32 bits
511 alloc_reg(cur,i,reg);
512
513 // Don't allocate unused registers
514 if((cur->uu>>reg)&1) return;
515
516 // see if the upper half is already allocated
517 for(hr=0;hr<HOST_REGS;hr++)
518 {
519 if(cur->regmap[hr]==reg+64) return;
520 }
521
522 // Keep the same mapping if the register was already allocated in a loop
523 preferred_reg = loop_reg(i,reg,preferred_reg);
524
525 // Try to allocate the preferred register
526 if(cur->regmap[preferred_reg]==-1) {
527 cur->regmap[preferred_reg]=reg|64;
528 cur->dirty&=~(1<<preferred_reg);
529 cur->isconst&=~(1<<preferred_reg);
530 return;
531 }
532 r=cur->regmap[preferred_reg];
533 if(r<64&&((cur->u>>r)&1)) {
534 cur->regmap[preferred_reg]=reg|64;
535 cur->dirty&=~(1<<preferred_reg);
536 cur->isconst&=~(1<<preferred_reg);
537 return;
538 }
539 if(r>=64&&((cur->uu>>(r&63))&1)) {
540 cur->regmap[preferred_reg]=reg|64;
541 cur->dirty&=~(1<<preferred_reg);
542 cur->isconst&=~(1<<preferred_reg);
543 return;
544 }
545
546 // Clear any unneeded registers
547 // We try to keep the mapping consistent, if possible, because it
548 // makes branches easier (especially loops). So we try to allocate
549 // first (see above) before removing old mappings. If this is not
550 // possible then go ahead and clear out the registers that are no
551 // longer needed.
552 for(hr=HOST_REGS-1;hr>=0;hr--)
553 {
554 r=cur->regmap[hr];
555 if(r>=0) {
556 if(r<64) {
557 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
558 }
559 else
560 {
561 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
562 }
563 }
564 }
565 // Try to allocate any available register, but prefer
566 // registers that have not been used recently.
567 if(i>0) {
568 for(hr=0;hr<HOST_REGS;hr++) {
569 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
570 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
571 cur->regmap[hr]=reg|64;
572 cur->dirty&=~(1<<hr);
573 cur->isconst&=~(1<<hr);
574 return;
575 }
576 }
577 }
578 }
579 // Try to allocate any available register
580 for(hr=0;hr<HOST_REGS;hr++) {
581 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
582 cur->regmap[hr]=reg|64;
583 cur->dirty&=~(1<<hr);
584 cur->isconst&=~(1<<hr);
585 return;
586 }
587 }
588
589 // Ok, now we have to evict someone
590 // Pick a register we hopefully won't need soon
591 u_char hsn[MAXREG+1];
592 memset(hsn,10,sizeof(hsn));
593 int j;
594 lsn(hsn,i,&preferred_reg);
595 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
596 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
597 if(i>0) {
598 // Don't evict the cycle count at entry points, otherwise the entry
599 // stub will have to write it.
600 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
601 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
602 for(j=10;j>=3;j--)
603 {
604 // Alloc preferred register if available
605 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
606 for(hr=0;hr<HOST_REGS;hr++) {
607 // Evict both parts of a 64-bit register
608 if((cur->regmap[hr]&63)==r) {
609 cur->regmap[hr]=-1;
610 cur->dirty&=~(1<<hr);
611 cur->isconst&=~(1<<hr);
612 }
613 }
614 cur->regmap[preferred_reg]=reg|64;
615 return;
616 }
617 for(r=1;r<=MAXREG;r++)
618 {
619 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
620 for(hr=0;hr<HOST_REGS;hr++) {
621 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
622 if(cur->regmap[hr]==r+64) {
623 cur->regmap[hr]=reg|64;
624 cur->dirty&=~(1<<hr);
625 cur->isconst&=~(1<<hr);
626 return;
627 }
628 }
629 }
630 for(hr=0;hr<HOST_REGS;hr++) {
631 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
632 if(cur->regmap[hr]==r) {
633 cur->regmap[hr]=reg|64;
634 cur->dirty&=~(1<<hr);
635 cur->isconst&=~(1<<hr);
636 return;
637 }
638 }
639 }
640 }
641 }
642 }
643 }
644 for(j=10;j>=0;j--)
645 {
646 for(r=1;r<=MAXREG;r++)
647 {
648 if(hsn[r]==j) {
649 for(hr=0;hr<HOST_REGS;hr++) {
650 if(cur->regmap[hr]==r+64) {
651 cur->regmap[hr]=reg|64;
652 cur->dirty&=~(1<<hr);
653 cur->isconst&=~(1<<hr);
654 return;
655 }
656 }
657 for(hr=0;hr<HOST_REGS;hr++) {
658 if(cur->regmap[hr]==r) {
659 cur->regmap[hr]=reg|64;
660 cur->dirty&=~(1<<hr);
661 cur->isconst&=~(1<<hr);
662 return;
663 }
664 }
665 }
666 }
667 }
c43b5311 668 SysPrintf("This shouldn't happen");exit(1);
57871462 669}
670
671// Allocate a temporary register. This is done without regard to
672// dirty status or whether the register we request is on the unneeded list
673// Note: This will only allocate one register, even if called multiple times
674void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
675{
676 int r,hr;
677 int preferred_reg = -1;
678
679 // see if it's already allocated
680 for(hr=0;hr<HOST_REGS;hr++)
681 {
682 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
683 }
684
685 // Try to allocate any available register
686 for(hr=HOST_REGS-1;hr>=0;hr--) {
687 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
688 cur->regmap[hr]=reg;
689 cur->dirty&=~(1<<hr);
690 cur->isconst&=~(1<<hr);
691 return;
692 }
693 }
694
695 // Find an unneeded register
696 for(hr=HOST_REGS-1;hr>=0;hr--)
697 {
698 r=cur->regmap[hr];
699 if(r>=0) {
700 if(r<64) {
701 if((cur->u>>r)&1) {
702 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
703 cur->regmap[hr]=reg;
704 cur->dirty&=~(1<<hr);
705 cur->isconst&=~(1<<hr);
706 return;
707 }
708 }
709 }
710 else
711 {
712 if((cur->uu>>(r&63))&1) {
713 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
714 cur->regmap[hr]=reg;
715 cur->dirty&=~(1<<hr);
716 cur->isconst&=~(1<<hr);
717 return;
718 }
719 }
720 }
721 }
722 }
723
724 // Ok, now we have to evict someone
725 // Pick a register we hopefully won't need soon
726 // TODO: we might want to follow unconditional jumps here
727 // TODO: get rid of dupe code and make this into a function
728 u_char hsn[MAXREG+1];
729 memset(hsn,10,sizeof(hsn));
730 int j;
731 lsn(hsn,i,&preferred_reg);
732 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
733 if(i>0) {
734 // Don't evict the cycle count at entry points, otherwise the entry
735 // stub will have to write it.
736 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
737 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
738 for(j=10;j>=3;j--)
739 {
740 for(r=1;r<=MAXREG;r++)
741 {
742 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
743 for(hr=0;hr<HOST_REGS;hr++) {
744 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
745 if(cur->regmap[hr]==r+64) {
746 cur->regmap[hr]=reg;
747 cur->dirty&=~(1<<hr);
748 cur->isconst&=~(1<<hr);
749 return;
750 }
751 }
752 }
753 for(hr=0;hr<HOST_REGS;hr++) {
754 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
755 if(cur->regmap[hr]==r) {
756 cur->regmap[hr]=reg;
757 cur->dirty&=~(1<<hr);
758 cur->isconst&=~(1<<hr);
759 return;
760 }
761 }
762 }
763 }
764 }
765 }
766 }
767 for(j=10;j>=0;j--)
768 {
769 for(r=1;r<=MAXREG;r++)
770 {
771 if(hsn[r]==j) {
772 for(hr=0;hr<HOST_REGS;hr++) {
773 if(cur->regmap[hr]==r+64) {
774 cur->regmap[hr]=reg;
775 cur->dirty&=~(1<<hr);
776 cur->isconst&=~(1<<hr);
777 return;
778 }
779 }
780 for(hr=0;hr<HOST_REGS;hr++) {
781 if(cur->regmap[hr]==r) {
782 cur->regmap[hr]=reg;
783 cur->dirty&=~(1<<hr);
784 cur->isconst&=~(1<<hr);
785 return;
786 }
787 }
788 }
789 }
790 }
c43b5311 791 SysPrintf("This shouldn't happen");exit(1);
57871462 792}
793// Allocate a specific ARM register.
794void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
795{
796 int n;
f776eb14 797 int dirty=0;
57871462 798
799 // see if it's already allocated (and dealloc it)
800 for(n=0;n<HOST_REGS;n++)
801 {
f776eb14 802 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
803 dirty=(cur->dirty>>n)&1;
804 cur->regmap[n]=-1;
805 }
57871462 806 }
807
808 cur->regmap[hr]=reg;
809 cur->dirty&=~(1<<hr);
f776eb14 810 cur->dirty|=dirty<<hr;
57871462 811 cur->isconst&=~(1<<hr);
812}
813
814// Alloc cycle count into dedicated register
815alloc_cc(struct regstat *cur,int i)
816{
817 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
818}
819
820/* Special alloc */
821
822
823/* Assembler */
824
825char regname[16][4] = {
826 "r0",
827 "r1",
828 "r2",
829 "r3",
830 "r4",
831 "r5",
832 "r6",
833 "r7",
834 "r8",
835 "r9",
836 "r10",
837 "fp",
838 "r12",
839 "sp",
840 "lr",
841 "pc"};
842
843void output_byte(u_char byte)
844{
845 *(out++)=byte;
846}
847void output_modrm(u_char mod,u_char rm,u_char ext)
848{
849 assert(mod<4);
850 assert(rm<8);
851 assert(ext<8);
852 u_char byte=(mod<<6)|(ext<<3)|rm;
853 *(out++)=byte;
854}
855void output_sib(u_char scale,u_char index,u_char base)
856{
857 assert(scale<4);
858 assert(index<8);
859 assert(base<8);
860 u_char byte=(scale<<6)|(index<<3)|base;
861 *(out++)=byte;
862}
863void output_w32(u_int word)
864{
865 *((u_int *)out)=word;
866 out+=4;
867}
868u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
869{
870 assert(rd<16);
871 assert(rn<16);
872 assert(rm<16);
873 return((rn<<16)|(rd<<12)|rm);
874}
875u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
876{
877 assert(rd<16);
878 assert(rn<16);
879 assert(imm<256);
880 assert((shift&1)==0);
881 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
882}
883u_int genimm(u_int imm,u_int *encoded)
884{
c2e3bd42 885 *encoded=0;
886 if(imm==0) return 1;
57871462 887 int i=32;
888 while(i>0)
889 {
890 if(imm<256) {
891 *encoded=((i&30)<<7)|imm;
892 return 1;
893 }
894 imm=(imm>>2)|(imm<<30);i-=2;
895 }
896 return 0;
897}
cfbd3c6e 898void genimm_checked(u_int imm,u_int *encoded)
899{
900 u_int ret=genimm(imm,encoded);
901 assert(ret);
902}
57871462 903u_int genjmp(u_int addr)
904{
905 int offset=addr-(int)out-8;
e80343e2 906 if(offset<-33554432||offset>=33554432) {
907 if (addr>2) {
c43b5311 908 SysPrintf("genjmp: out of range: %08x\n", offset);
e80343e2 909 exit(1);
910 }
911 return 0;
912 }
57871462 913 return ((u_int)offset>>2)&0xffffff;
914}
915
916void emit_mov(int rs,int rt)
917{
918 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
919 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
920}
921
922void emit_movs(int rs,int rt)
923{
924 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
925 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
926}
927
928void emit_add(int rs1,int rs2,int rt)
929{
930 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
931 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
932}
933
934void emit_adds(int rs1,int rs2,int rt)
935{
936 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
937 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
938}
939
940void emit_adcs(int rs1,int rs2,int rt)
941{
942 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
943 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
944}
945
946void emit_sbc(int rs1,int rs2,int rt)
947{
948 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
949 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
950}
951
952void emit_sbcs(int rs1,int rs2,int rt)
953{
954 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
955 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
956}
957
958void emit_neg(int rs, int rt)
959{
960 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
961 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
962}
963
964void emit_negs(int rs, int rt)
965{
966 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
967 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
968}
969
970void emit_sub(int rs1,int rs2,int rt)
971{
972 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
973 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
974}
975
976void emit_subs(int rs1,int rs2,int rt)
977{
978 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
979 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
980}
981
982void emit_zeroreg(int rt)
983{
984 assem_debug("mov %s,#0\n",regname[rt]);
985 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
986}
987
790ee18e 988void emit_loadlp(u_int imm,u_int rt)
989{
990 add_literal((int)out,imm);
991 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
992 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
993}
994void emit_movw(u_int imm,u_int rt)
995{
996 assert(imm<65536);
997 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
998 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
999}
1000void emit_movt(u_int imm,u_int rt)
1001{
1002 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
1003 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
1004}
1005void emit_movimm(u_int imm,u_int rt)
1006{
1007 u_int armval;
1008 if(genimm(imm,&armval)) {
1009 assem_debug("mov %s,#%d\n",regname[rt],imm);
1010 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1011 }else if(genimm(~imm,&armval)) {
1012 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1013 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1014 }else if(imm<65536) {
665f33e1 1015 #ifndef HAVE_ARMV7
790ee18e 1016 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1017 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1018 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1019 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1020 #else
1021 emit_movw(imm,rt);
1022 #endif
1023 }else{
665f33e1 1024 #ifndef HAVE_ARMV7
790ee18e 1025 emit_loadlp(imm,rt);
1026 #else
1027 emit_movw(imm&0x0000FFFF,rt);
1028 emit_movt(imm&0xFFFF0000,rt);
1029 #endif
1030 }
1031}
1032void emit_pcreladdr(u_int rt)
1033{
1034 assem_debug("add %s,pc,#?\n",regname[rt]);
1035 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1036}
1037
57871462 1038void emit_loadreg(int r, int hr)
1039{
3d624f89 1040#ifdef FORCE32
1041 if(r&64) {
c43b5311 1042 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 1043 assert(0);
1044 return;
3d624f89 1045 }
1046#endif
57871462 1047 if((r&63)==0)
1048 emit_zeroreg(hr);
1049 else {
3d624f89 1050 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1051 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1052 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1053 if(r==CCREG) addr=(int)&cycle_count;
1054 if(r==CSREG) addr=(int)&Status;
1055 if(r==FSREG) addr=(int)&FCR31;
1056 if(r==INVCP) addr=(int)&invc_ptr;
1057 u_int offset = addr-(u_int)&dynarec_local;
1058 assert(offset<4096);
1059 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1060 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1061 }
1062}
1063void emit_storereg(int r, int hr)
1064{
3d624f89 1065#ifdef FORCE32
1066 if(r&64) {
c43b5311 1067 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 1068 assert(0);
1069 return;
3d624f89 1070 }
1071#endif
1072 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1073 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1074 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1075 if(r==CCREG) addr=(int)&cycle_count;
1076 if(r==FSREG) addr=(int)&FCR31;
1077 u_int offset = addr-(u_int)&dynarec_local;
1078 assert(offset<4096);
1079 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1080 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1081}
1082
1083void emit_test(int rs, int rt)
1084{
1085 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1086 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1087}
1088
1089void emit_testimm(int rs,int imm)
1090{
1091 u_int armval;
5a05d80c 1092 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1093 genimm_checked(imm,&armval);
57871462 1094 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1095}
1096
b9b61529 1097void emit_testeqimm(int rs,int imm)
1098{
1099 u_int armval;
1100 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1101 genimm_checked(imm,&armval);
b9b61529 1102 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1103}
1104
57871462 1105void emit_not(int rs,int rt)
1106{
1107 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1108 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1109}
1110
b9b61529 1111void emit_mvnmi(int rs,int rt)
1112{
1113 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1114 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1115}
1116
57871462 1117void emit_and(u_int rs1,u_int rs2,u_int rt)
1118{
1119 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1120 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1121}
1122
1123void emit_or(u_int rs1,u_int rs2,u_int rt)
1124{
1125 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1126 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1127}
1128void emit_or_and_set_flags(int rs1,int rs2,int rt)
1129{
1130 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1131 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1132}
1133
f70d384d 1134void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1135{
1136 assert(rs<16);
1137 assert(rt<16);
1138 assert(imm<32);
1139 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1140 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1141}
1142
576bbd8f 1143void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1144{
1145 assert(rs<16);
1146 assert(rt<16);
1147 assert(imm<32);
1148 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1149 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1150}
1151
57871462 1152void emit_xor(u_int rs1,u_int rs2,u_int rt)
1153{
1154 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1155 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1156}
1157
57871462 1158void emit_addimm(u_int rs,int imm,u_int rt)
1159{
1160 assert(rs<16);
1161 assert(rt<16);
1162 if(imm!=0) {
57871462 1163 u_int armval;
1164 if(genimm(imm,&armval)) {
1165 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1166 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1167 }else if(genimm(-imm,&armval)) {
8a0a8423 1168 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1169 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 1170 #ifdef HAVE_ARMV7
1171 }else if(rt!=rs&&(u_int)imm<65536) {
1172 emit_movw(imm&0x0000ffff,rt);
1173 emit_add(rs,rt,rt);
1174 }else if(rt!=rs&&(u_int)-imm<65536) {
1175 emit_movw(-imm&0x0000ffff,rt);
1176 emit_sub(rs,rt,rt);
1177 #endif
1178 }else if((u_int)-imm<65536) {
57871462 1179 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1180 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1181 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1182 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 1183 }else {
1184 do {
1185 int shift = (ffs(imm) - 1) & ~1;
1186 int imm8 = imm & (0xff << shift);
1187 genimm_checked(imm8,&armval);
1188 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1189 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1190 rs = rt;
1191 imm &= ~imm8;
1192 }
1193 while (imm != 0);
57871462 1194 }
1195 }
1196 else if(rs!=rt) emit_mov(rs,rt);
1197}
1198
1199void emit_addimm_and_set_flags(int imm,int rt)
1200{
1201 assert(imm>-65536&&imm<65536);
1202 u_int armval;
1203 if(genimm(imm,&armval)) {
1204 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1205 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1206 }else if(genimm(-imm,&armval)) {
1207 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1208 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1209 }else if(imm<0) {
1210 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1211 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1212 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1213 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1214 }else{
1215 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1216 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1217 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1218 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1219 }
1220}
1221void emit_addimm_no_flags(u_int imm,u_int rt)
1222{
1223 emit_addimm(rt,imm,rt);
1224}
1225
1226void emit_addnop(u_int r)
1227{
1228 assert(r<16);
1229 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1230 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1231}
1232
1233void emit_adcimm(u_int rs,int imm,u_int rt)
1234{
1235 u_int armval;
cfbd3c6e 1236 genimm_checked(imm,&armval);
57871462 1237 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1238 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1239}
1240/*void emit_sbcimm(int imm,u_int rt)
1241{
1242 u_int armval;
cfbd3c6e 1243 genimm_checked(imm,&armval);
57871462 1244 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1245 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1246}*/
1247void emit_sbbimm(int imm,u_int rt)
1248{
1249 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1250 assert(rt<8);
1251 if(imm<128&&imm>=-128) {
1252 output_byte(0x83);
1253 output_modrm(3,rt,3);
1254 output_byte(imm);
1255 }
1256 else
1257 {
1258 output_byte(0x81);
1259 output_modrm(3,rt,3);
1260 output_w32(imm);
1261 }
1262}
1263void emit_rscimm(int rs,int imm,u_int rt)
1264{
1265 assert(0);
1266 u_int armval;
cfbd3c6e 1267 genimm_checked(imm,&armval);
57871462 1268 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1269 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1270}
1271
1272void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1273{
1274 // TODO: if(genimm(imm,&armval)) ...
1275 // else
1276 emit_movimm(imm,HOST_TEMPREG);
1277 emit_adds(HOST_TEMPREG,rsl,rtl);
1278 emit_adcimm(rsh,0,rth);
1279}
1280
1281void emit_sbb(int rs1,int rs2)
1282{
1283 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1284 output_byte(0x19);
1285 output_modrm(3,rs1,rs2);
1286}
1287
1288void emit_andimm(int rs,int imm,int rt)
1289{
1290 u_int armval;
790ee18e 1291 if(imm==0) {
1292 emit_zeroreg(rt);
1293 }else if(genimm(imm,&armval)) {
57871462 1294 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1295 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1296 }else if(genimm(~imm,&armval)) {
1297 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1298 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1299 }else if(imm==65535) {
332a4533 1300 #ifndef HAVE_ARMV6
57871462 1301 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1302 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1303 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1304 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1305 #else
1306 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1307 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1308 #endif
1309 }else{
1310 assert(imm>0&&imm<65535);
665f33e1 1311 #ifndef HAVE_ARMV7
57871462 1312 assem_debug("mov r14,#%d\n",imm&0xFF00);
1313 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1314 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1315 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1316 #else
1317 emit_movw(imm,HOST_TEMPREG);
1318 #endif
1319 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1320 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1321 }
1322}
1323
1324void emit_orimm(int rs,int imm,int rt)
1325{
1326 u_int armval;
790ee18e 1327 if(imm==0) {
1328 if(rs!=rt) emit_mov(rs,rt);
1329 }else if(genimm(imm,&armval)) {
57871462 1330 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1331 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1332 }else{
1333 assert(imm>0&&imm<65536);
1334 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1335 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1336 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1337 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1338 }
1339}
1340
1341void emit_xorimm(int rs,int imm,int rt)
1342{
57871462 1343 u_int armval;
790ee18e 1344 if(imm==0) {
1345 if(rs!=rt) emit_mov(rs,rt);
1346 }else if(genimm(imm,&armval)) {
57871462 1347 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1348 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1349 }else{
514ed0d9 1350 assert(imm>0&&imm<65536);
57871462 1351 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1352 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1353 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1354 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1355 }
1356}
1357
1358void emit_shlimm(int rs,u_int imm,int rt)
1359{
1360 assert(imm>0);
1361 assert(imm<32);
1362 //if(imm==1) ...
1363 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1364 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1365}
1366
c6c3b1b3 1367void emit_lsls_imm(int rs,int imm,int rt)
1368{
1369 assert(imm>0);
1370 assert(imm<32);
1371 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1372 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1373}
1374
665f33e1 1375void emit_lslpls_imm(int rs,int imm,int rt)
1376{
1377 assert(imm>0);
1378 assert(imm<32);
1379 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1380 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1381}
1382
57871462 1383void emit_shrimm(int rs,u_int imm,int rt)
1384{
1385 assert(imm>0);
1386 assert(imm<32);
1387 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1388 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1389}
1390
1391void emit_sarimm(int rs,u_int imm,int rt)
1392{
1393 assert(imm>0);
1394 assert(imm<32);
1395 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1396 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1397}
1398
1399void emit_rorimm(int rs,u_int imm,int rt)
1400{
1401 assert(imm>0);
1402 assert(imm<32);
1403 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1404 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1405}
1406
1407void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1408{
1409 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1410 assert(imm>0);
1411 assert(imm<32);
1412 //if(imm==1) ...
1413 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1414 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1415 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1416 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1417}
1418
1419void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1420{
1421 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1422 assert(imm>0);
1423 assert(imm<32);
1424 //if(imm==1) ...
1425 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1426 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1427 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1428 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1429}
1430
b9b61529 1431void emit_signextend16(int rs,int rt)
1432{
332a4533 1433 #ifndef HAVE_ARMV6
b9b61529 1434 emit_shlimm(rs,16,rt);
1435 emit_sarimm(rt,16,rt);
1436 #else
1437 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1438 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1439 #endif
1440}
1441
c6c3b1b3 1442void emit_signextend8(int rs,int rt)
1443{
332a4533 1444 #ifndef HAVE_ARMV6
c6c3b1b3 1445 emit_shlimm(rs,24,rt);
1446 emit_sarimm(rt,24,rt);
1447 #else
1448 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1449 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1450 #endif
1451}
1452
57871462 1453void emit_shl(u_int rs,u_int shift,u_int rt)
1454{
1455 assert(rs<16);
1456 assert(rt<16);
1457 assert(shift<16);
1458 //if(imm==1) ...
1459 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1460 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1461}
1462void emit_shr(u_int rs,u_int shift,u_int rt)
1463{
1464 assert(rs<16);
1465 assert(rt<16);
1466 assert(shift<16);
1467 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1468 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1469}
1470void emit_sar(u_int rs,u_int shift,u_int rt)
1471{
1472 assert(rs<16);
1473 assert(rt<16);
1474 assert(shift<16);
1475 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1476 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1477}
1478void emit_shlcl(int r)
1479{
1480 assem_debug("shl %%%s,%%cl\n",regname[r]);
1481 assert(0);
1482}
1483void emit_shrcl(int r)
1484{
1485 assem_debug("shr %%%s,%%cl\n",regname[r]);
1486 assert(0);
1487}
1488void emit_sarcl(int r)
1489{
1490 assem_debug("sar %%%s,%%cl\n",regname[r]);
1491 assert(0);
1492}
1493
1494void emit_shldcl(int r1,int r2)
1495{
1496 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1497 assert(0);
1498}
1499void emit_shrdcl(int r1,int r2)
1500{
1501 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1502 assert(0);
1503}
1504void emit_orrshl(u_int rs,u_int shift,u_int rt)
1505{
1506 assert(rs<16);
1507 assert(rt<16);
1508 assert(shift<16);
1509 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1510 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1511}
1512void emit_orrshr(u_int rs,u_int shift,u_int rt)
1513{
1514 assert(rs<16);
1515 assert(rt<16);
1516 assert(shift<16);
1517 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1518 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1519}
1520
1521void emit_cmpimm(int rs,int imm)
1522{
1523 u_int armval;
1524 if(genimm(imm,&armval)) {
5a05d80c 1525 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1526 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1527 }else if(genimm(-imm,&armval)) {
5a05d80c 1528 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1529 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1530 }else if(imm>0) {
1531 assert(imm<65536);
57871462 1532 emit_movimm(imm,HOST_TEMPREG);
57871462 1533 assem_debug("cmp %s,r14\n",regname[rs]);
1534 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1535 }else{
1536 assert(imm>-65536);
57871462 1537 emit_movimm(-imm,HOST_TEMPREG);
57871462 1538 assem_debug("cmn %s,r14\n",regname[rs]);
1539 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1540 }
1541}
1542
1543void emit_cmovne(u_int *addr,int rt)
1544{
1545 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1546 assert(0);
1547}
1548void emit_cmovl(u_int *addr,int rt)
1549{
1550 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1551 assert(0);
1552}
1553void emit_cmovs(u_int *addr,int rt)
1554{
1555 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1556 assert(0);
1557}
1558void emit_cmovne_imm(int imm,int rt)
1559{
1560 assem_debug("movne %s,#%d\n",regname[rt],imm);
1561 u_int armval;
cfbd3c6e 1562 genimm_checked(imm,&armval);
57871462 1563 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1564}
1565void emit_cmovl_imm(int imm,int rt)
1566{
1567 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1568 u_int armval;
cfbd3c6e 1569 genimm_checked(imm,&armval);
57871462 1570 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1571}
1572void emit_cmovb_imm(int imm,int rt)
1573{
1574 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1575 u_int armval;
cfbd3c6e 1576 genimm_checked(imm,&armval);
57871462 1577 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1578}
1579void emit_cmovs_imm(int imm,int rt)
1580{
1581 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1582 u_int armval;
cfbd3c6e 1583 genimm_checked(imm,&armval);
57871462 1584 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1585}
1586void emit_cmove_reg(int rs,int rt)
1587{
1588 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1589 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1590}
1591void emit_cmovne_reg(int rs,int rt)
1592{
1593 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1594 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1595}
1596void emit_cmovl_reg(int rs,int rt)
1597{
1598 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1599 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1600}
1601void emit_cmovs_reg(int rs,int rt)
1602{
1603 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1604 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1605}
1606
1607void emit_slti32(int rs,int imm,int rt)
1608{
1609 if(rs!=rt) emit_zeroreg(rt);
1610 emit_cmpimm(rs,imm);
1611 if(rs==rt) emit_movimm(0,rt);
1612 emit_cmovl_imm(1,rt);
1613}
1614void emit_sltiu32(int rs,int imm,int rt)
1615{
1616 if(rs!=rt) emit_zeroreg(rt);
1617 emit_cmpimm(rs,imm);
1618 if(rs==rt) emit_movimm(0,rt);
1619 emit_cmovb_imm(1,rt);
1620}
1621void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1622{
1623 assert(rsh!=rt);
1624 emit_slti32(rsl,imm,rt);
1625 if(imm>=0)
1626 {
1627 emit_test(rsh,rsh);
1628 emit_cmovne_imm(0,rt);
1629 emit_cmovs_imm(1,rt);
1630 }
1631 else
1632 {
1633 emit_cmpimm(rsh,-1);
1634 emit_cmovne_imm(0,rt);
1635 emit_cmovl_imm(1,rt);
1636 }
1637}
1638void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1639{
1640 assert(rsh!=rt);
1641 emit_sltiu32(rsl,imm,rt);
1642 if(imm>=0)
1643 {
1644 emit_test(rsh,rsh);
1645 emit_cmovne_imm(0,rt);
1646 }
1647 else
1648 {
1649 emit_cmpimm(rsh,-1);
1650 emit_cmovne_imm(1,rt);
1651 }
1652}
1653
1654void emit_cmp(int rs,int rt)
1655{
1656 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1657 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1658}
1659void emit_set_gz32(int rs, int rt)
1660{
1661 //assem_debug("set_gz32\n");
1662 emit_cmpimm(rs,1);
1663 emit_movimm(1,rt);
1664 emit_cmovl_imm(0,rt);
1665}
1666void emit_set_nz32(int rs, int rt)
1667{
1668 //assem_debug("set_nz32\n");
1669 if(rs!=rt) emit_movs(rs,rt);
1670 else emit_test(rs,rs);
1671 emit_cmovne_imm(1,rt);
1672}
1673void emit_set_gz64_32(int rsh, int rsl, int rt)
1674{
1675 //assem_debug("set_gz64\n");
1676 emit_set_gz32(rsl,rt);
1677 emit_test(rsh,rsh);
1678 emit_cmovne_imm(1,rt);
1679 emit_cmovs_imm(0,rt);
1680}
1681void emit_set_nz64_32(int rsh, int rsl, int rt)
1682{
1683 //assem_debug("set_nz64\n");
1684 emit_or_and_set_flags(rsh,rsl,rt);
1685 emit_cmovne_imm(1,rt);
1686}
1687void emit_set_if_less32(int rs1, int rs2, int rt)
1688{
1689 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1690 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1691 emit_cmp(rs1,rs2);
1692 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1693 emit_cmovl_imm(1,rt);
1694}
1695void emit_set_if_carry32(int rs1, int rs2, int rt)
1696{
1697 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1698 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1699 emit_cmp(rs1,rs2);
1700 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1701 emit_cmovb_imm(1,rt);
1702}
1703void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1704{
1705 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1706 assert(u1!=rt);
1707 assert(u2!=rt);
1708 emit_cmp(l1,l2);
1709 emit_movimm(0,rt);
1710 emit_sbcs(u1,u2,HOST_TEMPREG);
1711 emit_cmovl_imm(1,rt);
1712}
1713void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1714{
1715 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1716 assert(u1!=rt);
1717 assert(u2!=rt);
1718 emit_cmp(l1,l2);
1719 emit_movimm(0,rt);
1720 emit_sbcs(u1,u2,HOST_TEMPREG);
1721 emit_cmovb_imm(1,rt);
1722}
1723
1724void emit_call(int a)
1725{
1726 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1727 u_int offset=genjmp(a);
1728 output_w32(0xeb000000|offset);
1729}
1730void emit_jmp(int a)
1731{
1732 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1733 u_int offset=genjmp(a);
1734 output_w32(0xea000000|offset);
1735}
1736void emit_jne(int a)
1737{
1738 assem_debug("bne %x\n",a);
1739 u_int offset=genjmp(a);
1740 output_w32(0x1a000000|offset);
1741}
1742void emit_jeq(int a)
1743{
1744 assem_debug("beq %x\n",a);
1745 u_int offset=genjmp(a);
1746 output_w32(0x0a000000|offset);
1747}
1748void emit_js(int a)
1749{
1750 assem_debug("bmi %x\n",a);
1751 u_int offset=genjmp(a);
1752 output_w32(0x4a000000|offset);
1753}
1754void emit_jns(int a)
1755{
1756 assem_debug("bpl %x\n",a);
1757 u_int offset=genjmp(a);
1758 output_w32(0x5a000000|offset);
1759}
1760void emit_jl(int a)
1761{
1762 assem_debug("blt %x\n",a);
1763 u_int offset=genjmp(a);
1764 output_w32(0xba000000|offset);
1765}
1766void emit_jge(int a)
1767{
1768 assem_debug("bge %x\n",a);
1769 u_int offset=genjmp(a);
1770 output_w32(0xaa000000|offset);
1771}
1772void emit_jno(int a)
1773{
1774 assem_debug("bvc %x\n",a);
1775 u_int offset=genjmp(a);
1776 output_w32(0x7a000000|offset);
1777}
1778void emit_jc(int a)
1779{
1780 assem_debug("bcs %x\n",a);
1781 u_int offset=genjmp(a);
1782 output_w32(0x2a000000|offset);
1783}
1784void emit_jcc(int a)
1785{
1786 assem_debug("bcc %x\n",a);
1787 u_int offset=genjmp(a);
1788 output_w32(0x3a000000|offset);
1789}
1790
1791void emit_pushimm(int imm)
1792{
1793 assem_debug("push $%x\n",imm);
1794 assert(0);
1795}
1796void emit_pusha()
1797{
1798 assem_debug("pusha\n");
1799 assert(0);
1800}
1801void emit_popa()
1802{
1803 assem_debug("popa\n");
1804 assert(0);
1805}
1806void emit_pushreg(u_int r)
1807{
1808 assem_debug("push %%%s\n",regname[r]);
1809 assert(0);
1810}
1811void emit_popreg(u_int r)
1812{
1813 assem_debug("pop %%%s\n",regname[r]);
1814 assert(0);
1815}
1816void emit_callreg(u_int r)
1817{
c6c3b1b3 1818 assert(r<15);
1819 assem_debug("blx %s\n",regname[r]);
1820 output_w32(0xe12fff30|r);
57871462 1821}
1822void emit_jmpreg(u_int r)
1823{
1824 assem_debug("mov pc,%s\n",regname[r]);
1825 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1826}
1827
1828void emit_readword_indexed(int offset, int rs, int rt)
1829{
1830 assert(offset>-4096&&offset<4096);
1831 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1832 if(offset>=0) {
1833 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1834 }else{
1835 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1836 }
1837}
1838void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1839{
1840 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1841 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1842}
c6c3b1b3 1843void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1844{
1845 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1846 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1847}
1848void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1849{
1850 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1851 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1852}
1853void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1854{
1855 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1856 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1857}
1858void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1859{
1860 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1861 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1862}
1863void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1864{
1865 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1866 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1867}
57871462 1868void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1869{
1870 if(map<0) emit_readword_indexed(addr, rs, rt);
1871 else {
1872 assert(addr==0);
1873 emit_readword_dualindexedx4(rs, map, rt);
1874 }
1875}
1876void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1877{
1878 if(map<0) {
1879 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1880 emit_readword_indexed(addr+4, rs, rl);
1881 }else{
1882 assert(rh!=rs);
1883 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1884 emit_addimm(map,1,map);
1885 emit_readword_indexed_tlb(addr, rs, map, rl);
1886 }
1887}
1888void emit_movsbl_indexed(int offset, int rs, int rt)
1889{
1890 assert(offset>-256&&offset<256);
1891 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1892 if(offset>=0) {
1893 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1894 }else{
1895 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1896 }
1897}
1898void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1899{
1900 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1901 else {
1902 if(addr==0) {
1903 emit_shlimm(map,2,map);
1904 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1905 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1906 }else{
1907 assert(addr>-256&&addr<256);
1908 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1909 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1910 emit_movsbl_indexed(addr, rt, rt);
1911 }
1912 }
1913}
1914void emit_movswl_indexed(int offset, int rs, int rt)
1915{
1916 assert(offset>-256&&offset<256);
1917 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1918 if(offset>=0) {
1919 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1920 }else{
1921 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1922 }
1923}
1924void emit_movzbl_indexed(int offset, int rs, int rt)
1925{
1926 assert(offset>-4096&&offset<4096);
1927 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1928 if(offset>=0) {
1929 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1930 }else{
1931 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1932 }
1933}
1934void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1935{
1936 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1937 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1938}
1939void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1940{
1941 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1942 else {
1943 if(addr==0) {
1944 emit_movzbl_dualindexedx4(rs, map, rt);
1945 }else{
1946 emit_addimm(rs,addr,rt);
1947 emit_movzbl_dualindexedx4(rt, map, rt);
1948 }
1949 }
1950}
1951void emit_movzwl_indexed(int offset, int rs, int rt)
1952{
1953 assert(offset>-256&&offset<256);
1954 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1955 if(offset>=0) {
1956 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1957 }else{
1958 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1959 }
1960}
054175e9 1961static void emit_ldrd(int offset, int rs, int rt)
1962{
1963 assert(offset>-256&&offset<256);
1964 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1965 if(offset>=0) {
1966 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1967 }else{
1968 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1969 }
1970}
57871462 1971void emit_readword(int addr, int rt)
1972{
1973 u_int offset = addr-(u_int)&dynarec_local;
1974 assert(offset<4096);
1975 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1976 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1977}
1978void emit_movsbl(int addr, int rt)
1979{
1980 u_int offset = addr-(u_int)&dynarec_local;
1981 assert(offset<256);
1982 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1983 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1984}
1985void emit_movswl(int addr, int rt)
1986{
1987 u_int offset = addr-(u_int)&dynarec_local;
1988 assert(offset<256);
1989 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1990 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1991}
1992void emit_movzbl(int addr, int rt)
1993{
1994 u_int offset = addr-(u_int)&dynarec_local;
1995 assert(offset<4096);
1996 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1997 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1998}
1999void emit_movzwl(int addr, int rt)
2000{
2001 u_int offset = addr-(u_int)&dynarec_local;
2002 assert(offset<256);
2003 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
2004 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2005}
2006void emit_movzwl_reg(int rs, int rt)
2007{
2008 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
2009 assert(0);
2010}
2011
2012void emit_xchg(int rs, int rt)
2013{
2014 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
2015 assert(0);
2016}
2017void emit_writeword_indexed(int rt, int offset, int rs)
2018{
2019 assert(offset>-4096&&offset<4096);
2020 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
2021 if(offset>=0) {
2022 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
2023 }else{
2024 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
2025 }
2026}
2027void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
2028{
2029 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2030 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2031}
2032void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2033{
2034 if(map<0) emit_writeword_indexed(rt, addr, rs);
2035 else {
2036 assert(addr==0);
2037 emit_writeword_dualindexedx4(rt, rs, map);
2038 }
2039}
2040void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2041{
2042 if(map<0) {
2043 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2044 emit_writeword_indexed(rl, addr+4, rs);
2045 }else{
2046 assert(rh>=0);
2047 if(temp!=rs) emit_addimm(map,1,temp);
2048 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2049 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2050 else {
2051 emit_addimm(rs,4,rs);
2052 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2053 }
2054 }
2055}
2056void emit_writehword_indexed(int rt, int offset, int rs)
2057{
2058 assert(offset>-256&&offset<256);
2059 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2060 if(offset>=0) {
2061 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2062 }else{
2063 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2064 }
2065}
2066void emit_writebyte_indexed(int rt, int offset, int rs)
2067{
2068 assert(offset>-4096&&offset<4096);
2069 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2070 if(offset>=0) {
2071 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2072 }else{
2073 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2074 }
2075}
2076void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2077{
2078 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2079 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2080}
2081void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2082{
2083 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2084 else {
2085 if(addr==0) {
2086 emit_writebyte_dualindexedx4(rt, rs, map);
2087 }else{
2088 emit_addimm(rs,addr,temp);
2089 emit_writebyte_dualindexedx4(rt, temp, map);
2090 }
2091 }
2092}
b96d3df7 2093void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2094{
2095 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2096 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2097}
2098void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2099{
2100 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2101 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2102}
2103void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2104{
2105 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2106 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2107}
57871462 2108void emit_writeword(int rt, int addr)
2109{
2110 u_int offset = addr-(u_int)&dynarec_local;
2111 assert(offset<4096);
2112 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2113 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2114}
2115void emit_writehword(int rt, int addr)
2116{
2117 u_int offset = addr-(u_int)&dynarec_local;
2118 assert(offset<256);
2119 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2120 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2121}
2122void emit_writebyte(int rt, int addr)
2123{
2124 u_int offset = addr-(u_int)&dynarec_local;
2125 assert(offset<4096);
74426039 2126 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2127 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2128}
2129void emit_writeword_imm(int imm, int addr)
2130{
2131 assem_debug("movl $%x,%x\n",imm,addr);
2132 assert(0);
2133}
2134void emit_writebyte_imm(int imm, int addr)
2135{
2136 assem_debug("movb $%x,%x\n",imm,addr);
2137 assert(0);
2138}
2139
2140void emit_mul(int rs)
2141{
2142 assem_debug("mul %%%s\n",regname[rs]);
2143 assert(0);
2144}
2145void emit_imul(int rs)
2146{
2147 assem_debug("imul %%%s\n",regname[rs]);
2148 assert(0);
2149}
2150void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2151{
2152 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2153 assert(rs1<16);
2154 assert(rs2<16);
2155 assert(hi<16);
2156 assert(lo<16);
2157 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2158}
2159void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2160{
2161 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2162 assert(rs1<16);
2163 assert(rs2<16);
2164 assert(hi<16);
2165 assert(lo<16);
2166 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2167}
2168
2169void emit_div(int rs)
2170{
2171 assem_debug("div %%%s\n",regname[rs]);
2172 assert(0);
2173}
2174void emit_idiv(int rs)
2175{
2176 assem_debug("idiv %%%s\n",regname[rs]);
2177 assert(0);
2178}
2179void emit_cdq()
2180{
2181 assem_debug("cdq\n");
2182 assert(0);
2183}
2184
2185void emit_clz(int rs,int rt)
2186{
2187 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2188 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2189}
2190
2191void emit_subcs(int rs1,int rs2,int rt)
2192{
2193 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2194 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2195}
2196
2197void emit_shrcc_imm(int rs,u_int imm,int rt)
2198{
2199 assert(imm>0);
2200 assert(imm<32);
2201 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2202 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2203}
2204
b1be1eee 2205void emit_shrne_imm(int rs,u_int imm,int rt)
2206{
2207 assert(imm>0);
2208 assert(imm<32);
2209 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2210 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2211}
2212
57871462 2213void emit_negmi(int rs, int rt)
2214{
2215 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2216 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2217}
2218
2219void emit_negsmi(int rs, int rt)
2220{
2221 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2222 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2223}
2224
2225void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2226{
2227 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2228 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2229}
2230
2231void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2232{
2233 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2234 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2235}
2236
2237void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2238{
2239 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2240 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2241}
2242
2243void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2244{
2245 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2246 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2247}
2248
2249void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2250{
2251 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2252 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2253}
2254
2255void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2256{
2257 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2258 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2259}
2260
2261void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2262{
2263 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2264 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2265}
2266
2267void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2268{
2269 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2270 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2271}
2272
2273void emit_teq(int rs, int rt)
2274{
2275 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2276 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2277}
2278
2279void emit_rsbimm(int rs, int imm, int rt)
2280{
2281 u_int armval;
cfbd3c6e 2282 genimm_checked(imm,&armval);
57871462 2283 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2284 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2285}
2286
2287// Load 2 immediates optimizing for small code size
2288void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2289{
2290 emit_movimm(imm1,rt1);
2291 u_int armval;
2292 if(genimm(imm2-imm1,&armval)) {
2293 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2294 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2295 }else if(genimm(imm1-imm2,&armval)) {
2296 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2297 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2298 }
2299 else emit_movimm(imm2,rt2);
2300}
2301
2302// Conditionally select one of two immediates, optimizing for small code size
2303// This will only be called if HAVE_CMOV_IMM is defined
2304void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2305{
2306 u_int armval;
2307 if(genimm(imm2-imm1,&armval)) {
2308 emit_movimm(imm1,rt);
2309 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2310 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2311 }else if(genimm(imm1-imm2,&armval)) {
2312 emit_movimm(imm1,rt);
2313 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2314 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2315 }
2316 else {
665f33e1 2317 #ifndef HAVE_ARMV7
57871462 2318 emit_movimm(imm1,rt);
2319 add_literal((int)out,imm2);
2320 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2321 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2322 #else
2323 emit_movw(imm1&0x0000FFFF,rt);
2324 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2325 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2326 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2327 }
2328 emit_movt(imm1&0xFFFF0000,rt);
2329 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2330 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2331 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2332 }
2333 #endif
2334 }
2335}
2336
2337// special case for checking invalid_code
2338void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2339{
2340 assert(0);
2341}
2342
2343// special case for checking invalid_code
2344void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2345{
2346 assert(imm<128&&imm>=0);
2347 assert(r>=0&&r<16);
2348 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2349 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2350 emit_cmpimm(HOST_TEMPREG,imm);
2351}
2352
2353// special case for tlb mapping
2354void emit_addsr12(int rs1,int rs2,int rt)
2355{
2356 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2357 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2358}
2359
0bbd1454 2360void emit_callne(int a)
2361{
2362 assem_debug("blne %x\n",a);
2363 u_int offset=genjmp(a);
2364 output_w32(0x1b000000|offset);
2365}
2366
57871462 2367// Used to preload hash table entries
2368void emit_prefetch(void *addr)
2369{
2370 assem_debug("prefetch %x\n",(int)addr);
2371 output_byte(0x0F);
2372 output_byte(0x18);
2373 output_modrm(0,5,1);
2374 output_w32((int)addr);
2375}
2376void emit_prefetchreg(int r)
2377{
2378 assem_debug("pld %s\n",regname[r]);
2379 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2380}
2381
2382// Special case for mini_ht
2383void emit_ldreq_indexed(int rs, u_int offset, int rt)
2384{
2385 assert(offset<4096);
2386 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2387 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2388}
2389
2390void emit_flds(int r,int sr)
2391{
2392 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2393 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2394}
2395
2396void emit_vldr(int r,int vr)
2397{
2398 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2399 output_w32(0xed900b00|(vr<<12)|(r<<16));
2400}
2401
2402void emit_fsts(int sr,int r)
2403{
2404 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2405 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2406}
2407
2408void emit_vstr(int vr,int r)
2409{
2410 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2411 output_w32(0xed800b00|(vr<<12)|(r<<16));
2412}
2413
2414void emit_ftosizs(int s,int d)
2415{
2416 assem_debug("ftosizs s%d,s%d\n",d,s);
2417 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2418}
2419
2420void emit_ftosizd(int s,int d)
2421{
2422 assem_debug("ftosizd s%d,d%d\n",d,s);
2423 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2424}
2425
2426void emit_fsitos(int s,int d)
2427{
2428 assem_debug("fsitos s%d,s%d\n",d,s);
2429 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2430}
2431
2432void emit_fsitod(int s,int d)
2433{
2434 assem_debug("fsitod d%d,s%d\n",d,s);
2435 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2436}
2437
2438void emit_fcvtds(int s,int d)
2439{
2440 assem_debug("fcvtds d%d,s%d\n",d,s);
2441 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2442}
2443
2444void emit_fcvtsd(int s,int d)
2445{
2446 assem_debug("fcvtsd s%d,d%d\n",d,s);
2447 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2448}
2449
2450void emit_fsqrts(int s,int d)
2451{
2452 assem_debug("fsqrts d%d,s%d\n",d,s);
2453 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2454}
2455
2456void emit_fsqrtd(int s,int d)
2457{
2458 assem_debug("fsqrtd s%d,d%d\n",d,s);
2459 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2460}
2461
2462void emit_fabss(int s,int d)
2463{
2464 assem_debug("fabss d%d,s%d\n",d,s);
2465 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2466}
2467
2468void emit_fabsd(int s,int d)
2469{
2470 assem_debug("fabsd s%d,d%d\n",d,s);
2471 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2472}
2473
2474void emit_fnegs(int s,int d)
2475{
2476 assem_debug("fnegs d%d,s%d\n",d,s);
2477 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2478}
2479
2480void emit_fnegd(int s,int d)
2481{
2482 assem_debug("fnegd s%d,d%d\n",d,s);
2483 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2484}
2485
2486void emit_fadds(int s1,int s2,int d)
2487{
2488 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2489 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2490}
2491
2492void emit_faddd(int s1,int s2,int d)
2493{
2494 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2495 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2496}
2497
2498void emit_fsubs(int s1,int s2,int d)
2499{
2500 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2501 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2502}
2503
2504void emit_fsubd(int s1,int s2,int d)
2505{
2506 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2507 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2508}
2509
2510void emit_fmuls(int s1,int s2,int d)
2511{
2512 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2513 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2514}
2515
2516void emit_fmuld(int s1,int s2,int d)
2517{
2518 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2519 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2520}
2521
2522void emit_fdivs(int s1,int s2,int d)
2523{
2524 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2525 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2526}
2527
2528void emit_fdivd(int s1,int s2,int d)
2529{
2530 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2531 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2532}
2533
2534void emit_fcmps(int x,int y)
2535{
2536 assem_debug("fcmps s14, s15\n");
2537 output_w32(0xeeb47a67);
2538}
2539
2540void emit_fcmpd(int x,int y)
2541{
2542 assem_debug("fcmpd d6, d7\n");
2543 output_w32(0xeeb46b47);
2544}
2545
2546void emit_fmstat()
2547{
2548 assem_debug("fmstat\n");
2549 output_w32(0xeef1fa10);
2550}
2551
2552void emit_bicne_imm(int rs,int imm,int rt)
2553{
2554 u_int armval;
cfbd3c6e 2555 genimm_checked(imm,&armval);
57871462 2556 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2557 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2558}
2559
2560void emit_biccs_imm(int rs,int imm,int rt)
2561{
2562 u_int armval;
cfbd3c6e 2563 genimm_checked(imm,&armval);
57871462 2564 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2565 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2566}
2567
2568void emit_bicvc_imm(int rs,int imm,int rt)
2569{
2570 u_int armval;
cfbd3c6e 2571 genimm_checked(imm,&armval);
57871462 2572 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2573 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2574}
2575
2576void emit_bichi_imm(int rs,int imm,int rt)
2577{
2578 u_int armval;
cfbd3c6e 2579 genimm_checked(imm,&armval);
57871462 2580 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2581 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2582}
2583
2584void emit_orrvs_imm(int rs,int imm,int rt)
2585{
2586 u_int armval;
cfbd3c6e 2587 genimm_checked(imm,&armval);
57871462 2588 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2589 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2590}
2591
b9b61529 2592void emit_orrne_imm(int rs,int imm,int rt)
2593{
2594 u_int armval;
cfbd3c6e 2595 genimm_checked(imm,&armval);
b9b61529 2596 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2597 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2598}
2599
2600void emit_andne_imm(int rs,int imm,int rt)
2601{
2602 u_int armval;
cfbd3c6e 2603 genimm_checked(imm,&armval);
b9b61529 2604 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2605 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2606}
2607
665f33e1 2608void emit_addpl_imm(int rs,int imm,int rt)
2609{
2610 u_int armval;
2611 genimm_checked(imm,&armval);
2612 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2613 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2614}
2615
57871462 2616void emit_jno_unlikely(int a)
2617{
2618 //emit_jno(a);
2619 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2620 output_w32(0x72800000|rd_rn_rm(15,15,0));
2621}
2622
054175e9 2623static void save_regs_all(u_int reglist)
57871462 2624{
054175e9 2625 int i;
57871462 2626 if(!reglist) return;
2627 assem_debug("stmia fp,{");
054175e9 2628 for(i=0;i<16;i++)
2629 if(reglist&(1<<i))
2630 assem_debug("r%d,",i);
57871462 2631 assem_debug("}\n");
2632 output_w32(0xe88b0000|reglist);
2633}
054175e9 2634static void restore_regs_all(u_int reglist)
57871462 2635{
054175e9 2636 int i;
57871462 2637 if(!reglist) return;
2638 assem_debug("ldmia fp,{");
054175e9 2639 for(i=0;i<16;i++)
2640 if(reglist&(1<<i))
2641 assem_debug("r%d,",i);
57871462 2642 assem_debug("}\n");
2643 output_w32(0xe89b0000|reglist);
2644}
054175e9 2645// Save registers before function call
2646static void save_regs(u_int reglist)
2647{
4d646738 2648 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 2649 save_regs_all(reglist);
2650}
2651// Restore registers after function call
2652static void restore_regs(u_int reglist)
2653{
4d646738 2654 reglist&=CALLER_SAVE_REGS;
054175e9 2655 restore_regs_all(reglist);
2656}
57871462 2657
2658// Write back consts using r14 so we don't disturb the other registers
2659void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2660{
2661 int hr;
2662 for(hr=0;hr<HOST_REGS;hr++) {
2663 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2664 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2665 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2666 int value=constmap[i][hr];
2667 if(value==0) {
2668 emit_zeroreg(HOST_TEMPREG);
2669 }
2670 else {
2671 emit_movimm(value,HOST_TEMPREG);
2672 }
2673 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2674#ifndef FORCE32
57871462 2675 if((i_is32>>i_regmap[hr])&1) {
2676 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2677 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2678 }
24385cae 2679#endif
57871462 2680 }
2681 }
2682 }
2683 }
2684}
2685
2686/* Stubs/epilogue */
2687
2688void literal_pool(int n)
2689{
2690 if(!literalcount) return;
2691 if(n) {
2692 if((int)out-literals[0][0]<4096-n) return;
2693 }
2694 u_int *ptr;
2695 int i;
2696 for(i=0;i<literalcount;i++)
2697 {
77750690 2698 u_int l_addr=(u_int)out;
2699 int j;
2700 for(j=0;j<i;j++) {
2701 if(literals[j][1]==literals[i][1]) {
2702 //printf("dup %08x\n",literals[i][1]);
2703 l_addr=literals[j][0];
2704 break;
2705 }
2706 }
57871462 2707 ptr=(u_int *)literals[i][0];
77750690 2708 u_int offset=l_addr-(u_int)ptr-8;
57871462 2709 assert(offset<4096);
2710 assert(!(offset&3));
2711 *ptr|=offset;
77750690 2712 if(l_addr==(u_int)out) {
2713 literals[i][0]=l_addr; // remember for dupes
2714 output_w32(literals[i][1]);
2715 }
57871462 2716 }
2717 literalcount=0;
2718}
2719
2720void literal_pool_jumpover(int n)
2721{
2722 if(!literalcount) return;
2723 if(n) {
2724 if((int)out-literals[0][0]<4096-n) return;
2725 }
2726 int jaddr=(int)out;
2727 emit_jmp(0);
2728 literal_pool(0);
2729 set_jump_target(jaddr,(int)out);
2730}
2731
c67af2ac 2732emit_extjump2(u_int addr, int target, int linker)
57871462 2733{
2734 u_char *ptr=(u_char *)addr;
2735 assert((ptr[3]&0x0e)==0xa);
2736 emit_loadlp(target,0);
2737 emit_loadlp(addr,1);
24385cae 2738 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2739 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2740//DEBUG >
2741#ifdef DEBUG_CYCLE_COUNT
2742 emit_readword((int)&last_count,ECX);
2743 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2744 emit_readword((int)&next_interupt,ECX);
2745 emit_writeword(HOST_CCREG,(int)&Count);
2746 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2747 emit_writeword(ECX,(int)&last_count);
2748#endif
2749//DEBUG <
2750 emit_jmp(linker);
2751}
2752
2753emit_extjump(int addr, int target)
2754{
2755 emit_extjump2(addr, target, (int)dyna_linker);
2756}
2757emit_extjump_ds(int addr, int target)
2758{
2759 emit_extjump2(addr, target, (int)dyna_linker_ds);
2760}
2761
13e35c04 2762// put rt_val into rt, potentially making use of rs with value rs_val
2763static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2764{
8575a877 2765 u_int armval;
2766 int diff;
2767 if(genimm(rt_val,&armval)) {
2768 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2769 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2770 return;
2771 }
2772 if(genimm(~rt_val,&armval)) {
2773 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2774 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2775 return;
2776 }
2777 diff=rt_val-rs_val;
2778 if(genimm(diff,&armval)) {
2779 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2780 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2781 return;
2782 }else if(genimm(-diff,&armval)) {
2783 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2784 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2785 return;
2786 }
2787 emit_movimm(rt_val,rt);
2788}
2789
2790// return 1 if above function can do it's job cheaply
2791static int is_similar_value(u_int v1,u_int v2)
2792{
13e35c04 2793 u_int xs;
8575a877 2794 int diff;
2795 if(v1==v2) return 1;
2796 diff=v2-v1;
2797 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2798 ;
8575a877 2799 if(xs<0x100) return 1;
2800 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2801 ;
2802 if(xs<0x100) return 1;
2803 return 0;
13e35c04 2804}
cbbab9cd 2805
b96d3df7 2806// trashes r2
2807static void pass_args(int a0, int a1)
2808{
2809 if(a0==1&&a1==0) {
2810 // must swap
2811 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2812 }
2813 else if(a0!=0&&a1==0) {
2814 emit_mov(a1,1);
2815 if (a0>=0) emit_mov(a0,0);
2816 }
2817 else {
2818 if(a0>=0&&a0!=0) emit_mov(a0,0);
2819 if(a1>=0&&a1!=1) emit_mov(a1,1);
2820 }
2821}
2822
b1be1eee 2823static void mov_loadtype_adj(int type,int rs,int rt)
2824{
2825 switch(type) {
2826 case LOADB_STUB: emit_signextend8(rs,rt); break;
2827 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2828 case LOADH_STUB: emit_signextend16(rs,rt); break;
2829 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2830 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2831 default: assert(0);
2832 }
2833}
2834
2835#ifdef PCSX
2836#include "pcsxmem.h"
2837#include "pcsxmem_inline.c"
2838#endif
2839
57871462 2840do_readstub(int n)
2841{
2842 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2843 literal_pool(256);
2844 set_jump_target(stubs[n][1],(int)out);
2845 int type=stubs[n][0];
2846 int i=stubs[n][3];
2847 int rs=stubs[n][4];
2848 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2849 u_int reglist=stubs[n][7];
2850 signed char *i_regmap=i_regs->regmap;
2851 int addr=get_reg(i_regmap,AGEN1+(i&1));
2852 int rth,rt;
2853 int ds;
b9b61529 2854 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2855 rth=get_reg(i_regmap,FTEMP|64);
2856 rt=get_reg(i_regmap,FTEMP);
2857 }else{
2858 rth=get_reg(i_regmap,rt1[i]|64);
2859 rt=get_reg(i_regmap,rt1[i]);
2860 }
2861 assert(rs>=0);
c6c3b1b3 2862#ifdef PCSX
2863 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2864 reglist|=(1<<rs);
2865 for(r=0;r<=12;r++) {
2866 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2867 temp=r; break;
2868 }
2869 }
db829eeb 2870 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2871 reglist&=~(1<<rt);
2872 if(temp==-1) {
2873 save_regs(reglist);
2874 regs_saved=1;
2875 temp=(rs==0)?2:0;
2876 }
2877 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2878 temp2=1;
2879 emit_readword((int)&mem_rtab,temp);
2880 emit_shrimm(rs,12,temp2);
2881 emit_readword_dualindexedx4(temp,temp2,temp2);
2882 emit_lsls_imm(temp2,1,temp2);
2883 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2884 switch(type) {
2885 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2886 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2887 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2888 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2889 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2890 }
2891 }
2892 if(regs_saved) {
2893 restore_jump=(int)out;
2894 emit_jcc(0); // jump to reg restore
2895 }
2896 else
2897 emit_jcc(stubs[n][2]); // return address
2898
2899 if(!regs_saved)
2900 save_regs(reglist);
2901 int handler=0;
2902 if(type==LOADB_STUB||type==LOADBU_STUB)
2903 handler=(int)jump_handler_read8;
2904 if(type==LOADH_STUB||type==LOADHU_STUB)
2905 handler=(int)jump_handler_read16;
2906 if(type==LOADW_STUB)
2907 handler=(int)jump_handler_read32;
2908 assert(handler!=0);
b96d3df7 2909 pass_args(rs,temp2);
c6c3b1b3 2910 int cc=get_reg(i_regmap,CCREG);
2911 if(cc<0)
2912 emit_loadreg(CCREG,2);
2573466a 2913 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2914 emit_call(handler);
2915 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2916 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2917 }
2918 if(restore_jump)
2919 set_jump_target(restore_jump,(int)out);
2920 restore_regs(reglist);
2921 emit_jmp(stubs[n][2]); // return address
2922#else // !PCSX
57871462 2923 if(addr<0) addr=rt;
535d208a 2924 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2925 assert(addr>=0);
2926 int ftable=0;
2927 if(type==LOADB_STUB||type==LOADBU_STUB)
2928 ftable=(int)readmemb;
2929 if(type==LOADH_STUB||type==LOADHU_STUB)
2930 ftable=(int)readmemh;
2931 if(type==LOADW_STUB)
2932 ftable=(int)readmem;
24385cae 2933#ifndef FORCE32
57871462 2934 if(type==LOADD_STUB)
2935 ftable=(int)readmemd;
24385cae 2936#endif
2937 assert(ftable!=0);
57871462 2938 emit_writeword(rs,(int)&address);
2939 //emit_pusha();
2940 save_regs(reglist);
97a238a6 2941#ifndef PCSX
57871462 2942 ds=i_regs!=&regs[i];
2943 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2944 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2945 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2946 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2947 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2948#endif
57871462 2949 emit_shrimm(rs,16,1);
2950 int cc=get_reg(i_regmap,CCREG);
2951 if(cc<0) {
2952 emit_loadreg(CCREG,2);
2953 }
2954 emit_movimm(ftable,0);
2955 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2956#ifndef PCSX
57871462 2957 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2958#endif
57871462 2959 //emit_readword((int)&last_count,temp);
2960 //emit_add(cc,temp,cc);
2961 //emit_writeword(cc,(int)&Count);
2962 //emit_mov(15,14);
2963 emit_call((int)&indirect_jump_indexed);
2964 //emit_callreg(rs);
2965 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2966#ifndef PCSX
57871462 2967 // We really shouldn't need to update the count here,
2968 // but not doing so causes random crashes...
2969 emit_readword((int)&Count,HOST_TEMPREG);
2970 emit_readword((int)&next_interupt,2);
2971 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2972 emit_writeword(2,(int)&last_count);
2973 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2974 if(cc<0) {
2975 emit_storereg(CCREG,HOST_TEMPREG);
2976 }
f51dc36c 2977#endif
57871462 2978 //emit_popa();
2979 restore_regs(reglist);
2980 //if((cc=get_reg(regmap,CCREG))>=0) {
2981 // emit_loadreg(CCREG,cc);
2982 //}
f18c0f46 2983 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2984 assert(rt>=0);
2985 if(type==LOADB_STUB)
2986 emit_movsbl((int)&readmem_dword,rt);
2987 if(type==LOADBU_STUB)
2988 emit_movzbl((int)&readmem_dword,rt);
2989 if(type==LOADH_STUB)
2990 emit_movswl((int)&readmem_dword,rt);
2991 if(type==LOADHU_STUB)
2992 emit_movzwl((int)&readmem_dword,rt);
2993 if(type==LOADW_STUB)
2994 emit_readword((int)&readmem_dword,rt);
2995 if(type==LOADD_STUB) {
2996 emit_readword((int)&readmem_dword,rt);
2997 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2998 }
57871462 2999 }
3000 emit_jmp(stubs[n][2]); // return address
c6c3b1b3 3001#endif // !PCSX
57871462 3002}
3003
c6c3b1b3 3004#ifdef PCSX
3005// return memhandler, or get directly accessable address and return 0
3006u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
3007{
3008 u_int l1,l2=0;
3009 l1=((u_int *)table)[addr>>12];
3010 if((l1&(1<<31))==0) {
3011 u_int v=l1<<1;
3012 *addr_host=v+addr;
3013 return 0;
3014 }
3015 else {
3016 l1<<=1;
3017 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
3018 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 3019 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 3020 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
3021 else
3022 l2=((u_int *)l1)[(addr&0xfff)/4];
3023 if((l2&(1<<31))==0) {
3024 u_int v=l2<<1;
3025 *addr_host=v+(addr&0xfff);
3026 return 0;
3027 }
3028 return l2<<1;
3029 }
3030}
3031#endif
3032
57871462 3033inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3034{
3035 int rs=get_reg(regmap,target);
3036 int rth=get_reg(regmap,target|64);
3037 int rt=get_reg(regmap,target);
535d208a 3038 if(rs<0) rs=get_reg(regmap,-1);
57871462 3039 assert(rs>=0);
c6c3b1b3 3040#ifdef PCSX
b1be1eee 3041 u_int handler,host_addr=0,is_dynamic,far_call=0;
3042 int cc=get_reg(regmap,CCREG);
3043 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
3044 return;
c6c3b1b3 3045 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
3046 if (handler==0) {
db829eeb 3047 if(rt<0||rt1[i]==0)
c6c3b1b3 3048 return;
13e35c04 3049 if(addr!=host_addr)
3050 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 3051 switch(type) {
3052 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
3053 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
3054 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
3055 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
3056 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
3057 default: assert(0);
3058 }
3059 return;
3060 }
b1be1eee 3061 is_dynamic=pcsxmem_is_handler_dynamic(addr);
3062 if(is_dynamic) {
3063 if(type==LOADB_STUB||type==LOADBU_STUB)
3064 handler=(int)jump_handler_read8;
3065 if(type==LOADH_STUB||type==LOADHU_STUB)
3066 handler=(int)jump_handler_read16;
3067 if(type==LOADW_STUB)
3068 handler=(int)jump_handler_read32;
3069 }
c6c3b1b3 3070
3071 // call a memhandler
db829eeb 3072 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 3073 reglist&=~(1<<rt);
3074 save_regs(reglist);
3075 if(target==0)
3076 emit_movimm(addr,0);
3077 else if(rs!=0)
3078 emit_mov(rs,0);
c6c3b1b3 3079 int offset=(int)handler-(int)out-8;
3080 if(offset<-33554432||offset>=33554432) {
3081 // unreachable memhandler, a plugin func perhaps
b1be1eee 3082 emit_movimm(handler,12);
3083 far_call=1;
3084 }
3085 if(cc<0)
3086 emit_loadreg(CCREG,2);
3087 if(is_dynamic) {
3088 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
3089 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 3090 }
b1be1eee 3091 else {
3092 emit_readword((int)&last_count,3);
3093 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
3094 emit_add(2,3,2);
3095 emit_writeword(2,(int)&Count);
3096 }
3097
3098 if(far_call)
3099 emit_callreg(12);
c6c3b1b3 3100 else
3101 emit_call(handler);
b1be1eee 3102
db829eeb 3103 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 3104 switch(type) {
3105 case LOADB_STUB: emit_signextend8(0,rt); break;
3106 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
3107 case LOADH_STUB: emit_signextend16(0,rt); break;
3108 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
3109 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
3110 default: assert(0);
3111 }
3112 }
3113 restore_regs(reglist);
3114#else // if !PCSX
57871462 3115 int ftable=0;
3116 if(type==LOADB_STUB||type==LOADBU_STUB)
3117 ftable=(int)readmemb;
3118 if(type==LOADH_STUB||type==LOADHU_STUB)
3119 ftable=(int)readmemh;
3120 if(type==LOADW_STUB)
3121 ftable=(int)readmem;
24385cae 3122#ifndef FORCE32
57871462 3123 if(type==LOADD_STUB)
3124 ftable=(int)readmemd;
24385cae 3125#endif
3126 assert(ftable!=0);
fd99c415 3127 if(target==0)
3128 emit_movimm(addr,rs);
57871462 3129 emit_writeword(rs,(int)&address);
3130 //emit_pusha();
3131 save_regs(reglist);
0c1fe38b 3132#ifndef PCSX
3133 if((signed int)addr>=(signed int)0xC0000000) {
3134 // Theoretically we can have a pagefault here, if the TLB has never
3135 // been enabled and the address is outside the range 80000000..BFFFFFFF
3136 // Write out the registers so the pagefault can be handled. This is
3137 // a very rare case and likely represents a bug.
3138 int ds=regmap!=regs[i].regmap;
3139 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3140 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3141 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3142 }
3143#endif
57871462 3144 //emit_shrimm(rs,16,1);
3145 int cc=get_reg(regmap,CCREG);
3146 if(cc<0) {
3147 emit_loadreg(CCREG,2);
3148 }
3149 //emit_movimm(ftable,0);
3150 emit_movimm(((u_int *)ftable)[addr>>16],0);
3151 //emit_readword((int)&last_count,12);
2573466a 3152 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3153#ifndef PCSX
57871462 3154 if((signed int)addr>=(signed int)0xC0000000) {
3155 // Pagefault address
3156 int ds=regmap!=regs[i].regmap;
3157 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3158 }
f51dc36c 3159#endif
57871462 3160 //emit_add(12,2,2);
3161 //emit_writeword(2,(int)&Count);
3162 //emit_call(((u_int *)ftable)[addr>>16]);
3163 emit_call((int)&indirect_jump);
f51dc36c 3164#ifndef PCSX
57871462 3165 // We really shouldn't need to update the count here,
3166 // but not doing so causes random crashes...
3167 emit_readword((int)&Count,HOST_TEMPREG);
3168 emit_readword((int)&next_interupt,2);
2573466a 3169 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3170 emit_writeword(2,(int)&last_count);
3171 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3172 if(cc<0) {
3173 emit_storereg(CCREG,HOST_TEMPREG);
3174 }
f51dc36c 3175#endif
57871462 3176 //emit_popa();
3177 restore_regs(reglist);
fd99c415 3178 if(rt>=0) {
3179 if(type==LOADB_STUB)
3180 emit_movsbl((int)&readmem_dword,rt);
3181 if(type==LOADBU_STUB)
3182 emit_movzbl((int)&readmem_dword,rt);
3183 if(type==LOADH_STUB)
3184 emit_movswl((int)&readmem_dword,rt);
3185 if(type==LOADHU_STUB)
3186 emit_movzwl((int)&readmem_dword,rt);
3187 if(type==LOADW_STUB)
3188 emit_readword((int)&readmem_dword,rt);
3189 if(type==LOADD_STUB) {
3190 emit_readword((int)&readmem_dword,rt);
3191 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
3192 }
57871462 3193 }
c6c3b1b3 3194#endif // !PCSX
57871462 3195}
3196
3197do_writestub(int n)
3198{
3199 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
3200 literal_pool(256);
3201 set_jump_target(stubs[n][1],(int)out);
3202 int type=stubs[n][0];
3203 int i=stubs[n][3];
3204 int rs=stubs[n][4];
3205 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3206 u_int reglist=stubs[n][7];
3207 signed char *i_regmap=i_regs->regmap;
3208 int addr=get_reg(i_regmap,AGEN1+(i&1));
3209 int rth,rt,r;
3210 int ds;
b9b61529 3211 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 3212 rth=get_reg(i_regmap,FTEMP|64);
3213 rt=get_reg(i_regmap,r=FTEMP);
3214 }else{
3215 rth=get_reg(i_regmap,rs2[i]|64);
3216 rt=get_reg(i_regmap,r=rs2[i]);
3217 }
3218 assert(rs>=0);
3219 assert(rt>=0);
b96d3df7 3220#ifdef PCSX
3221 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
3222 int reglist2=reglist|(1<<rs)|(1<<rt);
3223 for(rtmp=0;rtmp<=12;rtmp++) {
3224 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
3225 temp=rtmp; break;
3226 }
3227 }
3228 if(temp==-1) {
3229 save_regs(reglist);
3230 regs_saved=1;
3231 for(rtmp=0;rtmp<=3;rtmp++)
3232 if(rtmp!=rs&&rtmp!=rt)
3233 {temp=rtmp;break;}
3234 }
3235 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
3236 temp2=3;
3237 emit_readword((int)&mem_wtab,temp);
3238 emit_shrimm(rs,12,temp2);
3239 emit_readword_dualindexedx4(temp,temp2,temp2);
3240 emit_lsls_imm(temp2,1,temp2);
3241 switch(type) {
3242 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
3243 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
3244 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
3245 default: assert(0);
3246 }
3247 if(regs_saved) {
3248 restore_jump=(int)out;
3249 emit_jcc(0); // jump to reg restore
3250 }
3251 else
3252 emit_jcc(stubs[n][2]); // return address (invcode check)
3253
3254 if(!regs_saved)
3255 save_regs(reglist);
3256 int handler=0;
3257 switch(type) {
3258 case STOREB_STUB: handler=(int)jump_handler_write8; break;
3259 case STOREH_STUB: handler=(int)jump_handler_write16; break;
3260 case STOREW_STUB: handler=(int)jump_handler_write32; break;
3261 }
3262 assert(handler!=0);
3263 pass_args(rs,rt);
3264 if(temp2!=3)
3265 emit_mov(temp2,3);
3266 int cc=get_reg(i_regmap,CCREG);
3267 if(cc<0)
3268 emit_loadreg(CCREG,2);
2573466a 3269 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3270 // returns new cycle_count
3271 emit_call(handler);
2573466a 3272 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3273 if(cc<0)
3274 emit_storereg(CCREG,2);
3275 if(restore_jump)
3276 set_jump_target(restore_jump,(int)out);
3277 restore_regs(reglist);
3278 ra=stubs[n][2];
b96d3df7 3279 emit_jmp(ra);
3280#else // if !PCSX
57871462 3281 if(addr<0) addr=get_reg(i_regmap,-1);
3282 assert(addr>=0);
3283 int ftable=0;
3284 if(type==STOREB_STUB)
3285 ftable=(int)writememb;
3286 if(type==STOREH_STUB)
3287 ftable=(int)writememh;
3288 if(type==STOREW_STUB)
3289 ftable=(int)writemem;
24385cae 3290#ifndef FORCE32
57871462 3291 if(type==STORED_STUB)
3292 ftable=(int)writememd;
24385cae 3293#endif
3294 assert(ftable!=0);
57871462 3295 emit_writeword(rs,(int)&address);
3296 //emit_shrimm(rs,16,rs);
3297 //emit_movmem_indexedx4(ftable,rs,rs);
3298 if(type==STOREB_STUB)
3299 emit_writebyte(rt,(int)&byte);
3300 if(type==STOREH_STUB)
3301 emit_writehword(rt,(int)&hword);
3302 if(type==STOREW_STUB)
3303 emit_writeword(rt,(int)&word);
3304 if(type==STORED_STUB) {
3d624f89 3305#ifndef FORCE32
57871462 3306 emit_writeword(rt,(int)&dword);
3307 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 3308#else
c43b5311 3309 SysPrintf("STORED_STUB\n");
3d624f89 3310#endif
57871462 3311 }
3312 //emit_pusha();
3313 save_regs(reglist);
97a238a6 3314#ifndef PCSX
57871462 3315 ds=i_regs!=&regs[i];
3316 int real_rs=get_reg(i_regmap,rs1[i]);
3317 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3318 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3319 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3320 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3321#endif
57871462 3322 emit_shrimm(rs,16,1);
3323 int cc=get_reg(i_regmap,CCREG);
3324 if(cc<0) {
3325 emit_loadreg(CCREG,2);
3326 }
3327 emit_movimm(ftable,0);
3328 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3329#ifndef PCSX
57871462 3330 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 3331#endif
57871462 3332 //emit_readword((int)&last_count,temp);
3333 //emit_addimm(cc,2*stubs[n][5]+2,cc);
3334 //emit_add(cc,temp,cc);
3335 //emit_writeword(cc,(int)&Count);
3336 emit_call((int)&indirect_jump_indexed);
3337 //emit_callreg(rs);
3338 emit_readword((int)&Count,HOST_TEMPREG);
3339 emit_readword((int)&next_interupt,2);
3340 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3341 emit_writeword(2,(int)&last_count);
3342 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3343 if(cc<0) {
3344 emit_storereg(CCREG,HOST_TEMPREG);
3345 }
3346 //emit_popa();
3347 restore_regs(reglist);
3348 //if((cc=get_reg(regmap,CCREG))>=0) {
3349 // emit_loadreg(CCREG,cc);
3350 //}
3351 emit_jmp(stubs[n][2]); // return address
b96d3df7 3352#endif // !PCSX
57871462 3353}
3354
3355inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3356{
3357 int rs=get_reg(regmap,-1);
3358 int rth=get_reg(regmap,target|64);
3359 int rt=get_reg(regmap,target);
3360 assert(rs>=0);
3361 assert(rt>=0);
cbbab9cd 3362#ifdef PCSX
b96d3df7 3363 u_int handler,host_addr=0;
b96d3df7 3364 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
3365 if (handler==0) {
13e35c04 3366 if(addr!=host_addr)
3367 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 3368 switch(type) {
3369 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
3370 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
3371 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
3372 default: assert(0);
3373 }
3374 return;
3375 }
3376
3377 // call a memhandler
3378 save_regs(reglist);
13e35c04 3379 pass_args(rs,rt);
b96d3df7 3380 int cc=get_reg(regmap,CCREG);
3381 if(cc<0)
3382 emit_loadreg(CCREG,2);
2573466a 3383 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 3384 emit_movimm(handler,3);
3385 // returns new cycle_count
3386 emit_call((int)jump_handler_write_h);
2573466a 3387 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 3388 if(cc<0)
3389 emit_storereg(CCREG,2);
3390 restore_regs(reglist);
3391#else // if !pcsx
57871462 3392 int ftable=0;
3393 if(type==STOREB_STUB)
3394 ftable=(int)writememb;
3395 if(type==STOREH_STUB)
3396 ftable=(int)writememh;
3397 if(type==STOREW_STUB)
3398 ftable=(int)writemem;
24385cae 3399#ifndef FORCE32
57871462 3400 if(type==STORED_STUB)
3401 ftable=(int)writememd;
24385cae 3402#endif
3403 assert(ftable!=0);
57871462 3404 emit_writeword(rs,(int)&address);
3405 //emit_shrimm(rs,16,rs);
3406 //emit_movmem_indexedx4(ftable,rs,rs);
3407 if(type==STOREB_STUB)
3408 emit_writebyte(rt,(int)&byte);
3409 if(type==STOREH_STUB)
3410 emit_writehword(rt,(int)&hword);
3411 if(type==STOREW_STUB)
3412 emit_writeword(rt,(int)&word);
3413 if(type==STORED_STUB) {
3d624f89 3414#ifndef FORCE32
57871462 3415 emit_writeword(rt,(int)&dword);
3416 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 3417#else
c43b5311 3418 SysPrintf("STORED_STUB\n");
3d624f89 3419#endif
57871462 3420 }
3421 //emit_pusha();
3422 save_regs(reglist);
0c1fe38b 3423#ifndef PCSX
3424 // rearmed note: load_all_consts prevents BIOS boot, some bug?
3425 if((signed int)addr>=(signed int)0xC0000000) {
3426 // Theoretically we can have a pagefault here, if the TLB has never
3427 // been enabled and the address is outside the range 80000000..BFFFFFFF
3428 // Write out the registers so the pagefault can be handled. This is
3429 // a very rare case and likely represents a bug.
3430 int ds=regmap!=regs[i].regmap;
3431 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3432 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3433 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3434 }
3435#endif
57871462 3436 //emit_shrimm(rs,16,1);
3437 int cc=get_reg(regmap,CCREG);
3438 if(cc<0) {
3439 emit_loadreg(CCREG,2);
3440 }
3441 //emit_movimm(ftable,0);
3442 emit_movimm(((u_int *)ftable)[addr>>16],0);
3443 //emit_readword((int)&last_count,12);
2573466a 3444 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3445#ifndef PCSX
57871462 3446 if((signed int)addr>=(signed int)0xC0000000) {
3447 // Pagefault address
3448 int ds=regmap!=regs[i].regmap;
3449 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3450 }
f51dc36c 3451#endif
57871462 3452 //emit_add(12,2,2);
3453 //emit_writeword(2,(int)&Count);
3454 //emit_call(((u_int *)ftable)[addr>>16]);
3455 emit_call((int)&indirect_jump);
3456 emit_readword((int)&Count,HOST_TEMPREG);
3457 emit_readword((int)&next_interupt,2);
2573466a 3458 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3459 emit_writeword(2,(int)&last_count);
3460 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3461 if(cc<0) {
3462 emit_storereg(CCREG,HOST_TEMPREG);
3463 }
3464 //emit_popa();
3465 restore_regs(reglist);
b96d3df7 3466#endif
57871462 3467}
3468
3469do_unalignedwritestub(int n)
3470{
b7918751 3471 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3472 literal_pool(256);
57871462 3473 set_jump_target(stubs[n][1],(int)out);
b7918751 3474
3475 int i=stubs[n][3];
3476 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3477 int addr=stubs[n][5];
3478 u_int reglist=stubs[n][7];
3479 signed char *i_regmap=i_regs->regmap;
3480 int temp2=get_reg(i_regmap,FTEMP);
3481 int rt;
3482 int ds, real_rs;
3483 rt=get_reg(i_regmap,rs2[i]);
3484 assert(rt>=0);
3485 assert(addr>=0);
3486 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3487 reglist|=(1<<addr);
3488 reglist&=~(1<<temp2);
3489
b96d3df7 3490#if 1
3491 // don't bother with it and call write handler
3492 save_regs(reglist);
3493 pass_args(addr,rt);
3494 int cc=get_reg(i_regmap,CCREG);
3495 if(cc<0)
3496 emit_loadreg(CCREG,2);
2573466a 3497 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3498 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 3499 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3500 if(cc<0)
3501 emit_storereg(CCREG,2);
3502 restore_regs(reglist);
3503 emit_jmp(stubs[n][2]); // return address
3504#else
b7918751 3505 emit_andimm(addr,0xfffffffc,temp2);
3506 emit_writeword(temp2,(int)&address);
3507
3508 save_regs(reglist);
97a238a6 3509#ifndef PCSX
b7918751 3510 ds=i_regs!=&regs[i];
3511 real_rs=get_reg(i_regmap,rs1[i]);
3512 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3513 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3514 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3515 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3516#endif
b7918751 3517 emit_shrimm(addr,16,1);
3518 int cc=get_reg(i_regmap,CCREG);
3519 if(cc<0) {
3520 emit_loadreg(CCREG,2);
3521 }
3522 emit_movimm((u_int)readmem,0);
3523 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3524#ifndef PCSX
3525 // pagefault address
3526 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3527#endif
b7918751 3528 emit_call((int)&indirect_jump_indexed);
3529 restore_regs(reglist);
3530
3531 emit_readword((int)&readmem_dword,temp2);
3532 int temp=addr; //hmh
3533 emit_shlimm(addr,3,temp);
3534 emit_andimm(temp,24,temp);
3535#ifdef BIG_ENDIAN_MIPS
3536 if (opcode[i]==0x2e) // SWR
3537#else
3538 if (opcode[i]==0x2a) // SWL
3539#endif
3540 emit_xorimm(temp,24,temp);
3541 emit_movimm(-1,HOST_TEMPREG);
55439448 3542 if (opcode[i]==0x2a) { // SWL
b7918751 3543 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3544 emit_orrshr(rt,temp,temp2);
3545 }else{
3546 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3547 emit_orrshl(rt,temp,temp2);
3548 }
3549 emit_readword((int)&address,addr);
3550 emit_writeword(temp2,(int)&word);
3551 //save_regs(reglist); // don't need to, no state changes
3552 emit_shrimm(addr,16,1);
3553 emit_movimm((u_int)writemem,0);
3554 //emit_call((int)&indirect_jump_indexed);
3555 emit_mov(15,14);
3556 emit_readword_dualindexedx4(0,1,15);
3557 emit_readword((int)&Count,HOST_TEMPREG);
3558 emit_readword((int)&next_interupt,2);
3559 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3560 emit_writeword(2,(int)&last_count);
3561 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3562 if(cc<0) {
3563 emit_storereg(CCREG,HOST_TEMPREG);
3564 }
3565 restore_regs(reglist);
57871462 3566 emit_jmp(stubs[n][2]); // return address
b96d3df7 3567#endif
57871462 3568}
3569
3570void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3571{
3572 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3573}
3574
3575do_invstub(int n)
3576{
3577 literal_pool(20);
3578 u_int reglist=stubs[n][3];
3579 set_jump_target(stubs[n][1],(int)out);
3580 save_regs(reglist);
3581 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3582 emit_call((int)&invalidate_addr);
3583 restore_regs(reglist);
3584 emit_jmp(stubs[n][2]); // return address
3585}
3586
3587int do_dirty_stub(int i)
3588{
3589 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3590 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3591 #ifdef PCSX
3592 addr=(u_int)source;
3593 #endif
57871462 3594 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 3595 #ifndef HAVE_ARMV7
ac545b3a 3596 emit_loadlp(addr,1);
57871462 3597 emit_loadlp((int)copy,2);
3598 emit_loadlp(slen*4,3);
3599 #else
ac545b3a 3600 emit_movw(addr&0x0000FFFF,1);
57871462 3601 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3602 emit_movt(addr&0xFFFF0000,1);
57871462 3603 emit_movt(((u_int)copy)&0xFFFF0000,2);
3604 emit_movw(slen*4,3);
3605 #endif
3606 emit_movimm(start+i*4,0);
3607 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3608 int entry=(int)out;
3609 load_regs_entry(i);
3610 if(entry==(int)out) entry=instr_addr[i];
3611 emit_jmp(instr_addr[i]);
3612 return entry;
3613}
3614
3615void do_dirty_stub_ds()
3616{
3617 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 3618 #ifndef HAVE_ARMV7
57871462 3619 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3620 emit_loadlp((int)copy,2);
3621 emit_loadlp(slen*4,3);
3622 #else
3623 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3624 emit_movw(((u_int)copy)&0x0000FFFF,2);
3625 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3626 emit_movt(((u_int)copy)&0xFFFF0000,2);
3627 emit_movw(slen*4,3);
3628 #endif
3629 emit_movimm(start+1,0);
3630 emit_call((int)&verify_code_ds);
3631}
3632
3633do_cop1stub(int n)
3634{
3635 literal_pool(256);
3636 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3637 set_jump_target(stubs[n][1],(int)out);
3638 int i=stubs[n][3];
3d624f89 3639// int rs=stubs[n][4];
57871462 3640 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3641 int ds=stubs[n][6];
3642 if(!ds) {
3643 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3644 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3645 }
3646 //else {printf("fp exception in delay slot\n");}
3647 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3648 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3649 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 3650 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 3651 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3652}
3653
63cb0298 3654#ifndef DISABLE_TLB
3655
57871462 3656/* TLB */
3657
3658int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3659{
3660 if(c) {
3661 if((signed int)addr>=(signed int)0xC0000000) {
3662 // address_generation already loaded the const
3663 emit_readword_dualindexedx4(FP,map,map);
3664 }
3665 else
3666 return -1; // No mapping
3667 }
3668 else {
3669 assert(s!=map);
3670 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3671 emit_addsr12(map,s,map);
3672 // Schedule this while we wait on the load
3673 //if(x) emit_xorimm(s,x,ar);
3674 if(shift>=0) emit_shlimm(s,3,shift);
3675 if(~a) emit_andimm(s,a,ar);
3676 emit_readword_dualindexedx4(FP,map,map);
3677 }
3678 return map;
3679}
3680int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3681{
3682 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3683 emit_test(map,map);
3684 *jaddr=(int)out;
3685 emit_js(0);
3686 }
3687 return map;
3688}
3689
3690int gen_tlb_addr_r(int ar, int map) {
3691 if(map>=0) {
3692 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3693 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3694 }
3695}
3696
3697int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3698{
3699 if(c) {
3700 if(addr<0x80800000||addr>=0xC0000000) {
3701 // address_generation already loaded the const
3702 emit_readword_dualindexedx4(FP,map,map);
3703 }
3704 else
3705 return -1; // No mapping
3706 }
3707 else {
3708 assert(s!=map);
3709 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3710 emit_addsr12(map,s,map);
3711 // Schedule this while we wait on the load
3712 //if(x) emit_xorimm(s,x,ar);
3713 emit_readword_dualindexedx4(FP,map,map);
3714 }
3715 return map;
3716}
3717int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3718{
3719 if(!c||addr<0x80800000||addr>=0xC0000000) {
3720 emit_testimm(map,0x40000000);
3721 *jaddr=(int)out;
3722 emit_jne(0);
3723 }
3724}
3725
3726int gen_tlb_addr_w(int ar, int map) {
3727 if(map>=0) {
3728 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3729 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3730 }
3731}
3732
3733// Generate the address of the memory_map entry, relative to dynarec_local
3734generate_map_const(u_int addr,int reg) {
3735 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3736 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3737}
3738
63cb0298 3739#else
3740
57ab9898 3741static int do_tlb_r(int a, ...) { return 0; }
3742static int do_tlb_r_branch(int a, ...) { return 0; }
3743static int gen_tlb_addr_r(int a, ...) { return 0; }
3744static int do_tlb_w(int a, ...) { return 0; }
3745static int do_tlb_w_branch(int a, ...) { return 0; }
3746static int gen_tlb_addr_w(int a, ...) { return 0; }
63cb0298 3747
3748#endif // DISABLE_TLB
3749
57871462 3750/* Special assem */
3751
3752void shift_assemble_arm(int i,struct regstat *i_regs)
3753{
3754 if(rt1[i]) {
3755 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3756 {
3757 signed char s,t,shift;
3758 t=get_reg(i_regs->regmap,rt1[i]);
3759 s=get_reg(i_regs->regmap,rs1[i]);
3760 shift=get_reg(i_regs->regmap,rs2[i]);
3761 if(t>=0){
3762 if(rs1[i]==0)
3763 {
3764 emit_zeroreg(t);
3765 }
3766 else if(rs2[i]==0)
3767 {
3768 assert(s>=0);
3769 if(s!=t) emit_mov(s,t);
3770 }
3771 else
3772 {
3773 emit_andimm(shift,31,HOST_TEMPREG);
3774 if(opcode2[i]==4) // SLLV
3775 {
3776 emit_shl(s,HOST_TEMPREG,t);
3777 }
3778 if(opcode2[i]==6) // SRLV
3779 {
3780 emit_shr(s,HOST_TEMPREG,t);
3781 }
3782 if(opcode2[i]==7) // SRAV
3783 {
3784 emit_sar(s,HOST_TEMPREG,t);
3785 }
3786 }
3787 }
3788 } else { // DSLLV/DSRLV/DSRAV
3789 signed char sh,sl,th,tl,shift;
3790 th=get_reg(i_regs->regmap,rt1[i]|64);
3791 tl=get_reg(i_regs->regmap,rt1[i]);
3792 sh=get_reg(i_regs->regmap,rs1[i]|64);
3793 sl=get_reg(i_regs->regmap,rs1[i]);
3794 shift=get_reg(i_regs->regmap,rs2[i]);
3795 if(tl>=0){
3796 if(rs1[i]==0)
3797 {
3798 emit_zeroreg(tl);
3799 if(th>=0) emit_zeroreg(th);
3800 }
3801 else if(rs2[i]==0)
3802 {
3803 assert(sl>=0);
3804 if(sl!=tl) emit_mov(sl,tl);
3805 if(th>=0&&sh!=th) emit_mov(sh,th);
3806 }
3807 else
3808 {
3809 // FIXME: What if shift==tl ?
3810 assert(shift!=tl);
3811 int temp=get_reg(i_regs->regmap,-1);
3812 int real_th=th;
3813 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3814 assert(sl>=0);
3815 assert(sh>=0);
3816 emit_andimm(shift,31,HOST_TEMPREG);
3817 if(opcode2[i]==0x14) // DSLLV
3818 {
3819 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3820 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3821 emit_orrshr(sl,HOST_TEMPREG,th);
3822 emit_andimm(shift,31,HOST_TEMPREG);
3823 emit_testimm(shift,32);
3824 emit_shl(sl,HOST_TEMPREG,tl);
3825 if(th>=0) emit_cmovne_reg(tl,th);
3826 emit_cmovne_imm(0,tl);
3827 }
3828 if(opcode2[i]==0x16) // DSRLV
3829 {
3830 assert(th>=0);
3831 emit_shr(sl,HOST_TEMPREG,tl);
3832 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3833 emit_orrshl(sh,HOST_TEMPREG,tl);
3834 emit_andimm(shift,31,HOST_TEMPREG);
3835 emit_testimm(shift,32);
3836 emit_shr(sh,HOST_TEMPREG,th);
3837 emit_cmovne_reg(th,tl);
3838 if(real_th>=0) emit_cmovne_imm(0,th);
3839 }
3840 if(opcode2[i]==0x17) // DSRAV
3841 {
3842 assert(th>=0);
3843 emit_shr(sl,HOST_TEMPREG,tl);
3844 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3845 if(real_th>=0) {
3846 assert(temp>=0);
3847 emit_sarimm(th,31,temp);
3848 }
3849 emit_orrshl(sh,HOST_TEMPREG,tl);
3850 emit_andimm(shift,31,HOST_TEMPREG);
3851 emit_testimm(shift,32);
3852 emit_sar(sh,HOST_TEMPREG,th);
3853 emit_cmovne_reg(th,tl);
3854 if(real_th>=0) emit_cmovne_reg(temp,th);
3855 }
3856 }
3857 }
3858 }
3859 }
3860}
ffb0b9e0 3861
3862#ifdef PCSX
3863static void speculate_mov(int rs,int rt)
3864{
3865 if(rt!=0) {
3866 smrv_strong_next|=1<<rt;
3867 smrv[rt]=smrv[rs];
3868 }
3869}
3870
3871static void speculate_mov_weak(int rs,int rt)
3872{
3873 if(rt!=0) {
3874 smrv_weak_next|=1<<rt;
3875 smrv[rt]=smrv[rs];
3876 }
3877}
3878
3879static void speculate_register_values(int i)
3880{
3881 if(i==0) {
3882 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3883 // gp,sp are likely to stay the same throughout the block
3884 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3885 smrv_weak_next=~smrv_strong_next;
3886 //printf(" llr %08x\n", smrv[4]);
3887 }
3888 smrv_strong=smrv_strong_next;
3889 smrv_weak=smrv_weak_next;
3890 switch(itype[i]) {
3891 case ALU:
3892 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3893 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3894 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3895 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3896 else {
3897 smrv_strong_next&=~(1<<rt1[i]);
3898 smrv_weak_next&=~(1<<rt1[i]);
3899 }
3900 break;
3901 case SHIFTIMM:
3902 smrv_strong_next&=~(1<<rt1[i]);
3903 smrv_weak_next&=~(1<<rt1[i]);
3904 // fallthrough
3905 case IMM16:
3906 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3907 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3908 if(hr>=0) {
3909 if(get_final_value(hr,i,&value))
3910 smrv[rt1[i]]=value;
3911 else smrv[rt1[i]]=constmap[i][hr];
3912 smrv_strong_next|=1<<rt1[i];
3913 }
3914 }
3915 else {
3916 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3917 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3918 }
3919 break;
3920 case LOAD:
3921 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3922 // special case for BIOS
3923 smrv[rt1[i]]=0xa0000000;
3924 smrv_strong_next|=1<<rt1[i];
3925 break;
3926 }
3927 // fallthrough
3928 case SHIFT:
3929 case LOADLR:
3930 case MOV:
3931 smrv_strong_next&=~(1<<rt1[i]);
3932 smrv_weak_next&=~(1<<rt1[i]);
3933 break;
3934 case COP0:
3935 case COP2:
3936 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3937 smrv_strong_next&=~(1<<rt1[i]);
3938 smrv_weak_next&=~(1<<rt1[i]);
3939 }
3940 break;
3941 case C2LS:
3942 if (opcode[i]==0x32) { // LWC2
3943 smrv_strong_next&=~(1<<rt1[i]);
3944 smrv_weak_next&=~(1<<rt1[i]);
3945 }
3946 break;
3947 }
3948#if 0
3949 int r=4;
3950 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3951 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3952#endif
3953}
3954
3955enum {
3956 MTYPE_8000 = 0,
3957 MTYPE_8020,
3958 MTYPE_0000,
3959 MTYPE_A000,
3960 MTYPE_1F80,
3961};
3962
3963static int get_ptr_mem_type(u_int a)
3964{
3965 if(a < 0x00200000) {
3966 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3967 // return wrong, must use memhandler for BIOS self-test to pass
3968 // 007 does similar stuff from a00 mirror, weird stuff
3969 return MTYPE_8000;
3970 return MTYPE_0000;
3971 }
3972 if(0x1f800000 <= a && a < 0x1f801000)
3973 return MTYPE_1F80;
3974 if(0x80200000 <= a && a < 0x80800000)
3975 return MTYPE_8020;
3976 if(0xa0000000 <= a && a < 0xa0200000)
3977 return MTYPE_A000;
3978 return MTYPE_8000;
3979}
3980#endif
3981
3982static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3983{
3984 int jaddr,type=0;
3985
3986#ifdef PCSX
3987 int mr=rs1[i];
3988 if(((smrv_strong|smrv_weak)>>mr)&1) {
3989 type=get_ptr_mem_type(smrv[mr]);
3990 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3991 }
3992 else {
3993 // use the mirror we are running on
3994 type=get_ptr_mem_type(start);
3995 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3996 }
3997
3998 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3999 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
4000 addr=*addr_reg_override=HOST_TEMPREG;
4001 type=0;
4002 }
4003 else if(type==MTYPE_0000) { // RAM 0 mirror
4004 emit_orimm(addr,0x80000000,HOST_TEMPREG);
4005 addr=*addr_reg_override=HOST_TEMPREG;
4006 type=0;
4007 }
4008 else if(type==MTYPE_A000) { // RAM A mirror
4009 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
4010 addr=*addr_reg_override=HOST_TEMPREG;
4011 type=0;
4012 }
4013 else if(type==MTYPE_1F80) { // scratchpad
6d760c92 4014 if (psxH == (void *)0x1f800000) {
4015 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
4016 emit_cmpimm(HOST_TEMPREG,0x1000);
4017 jaddr=(int)out;
4018 emit_jc(0);
4019 }
4020 else {
4021 // do usual RAM check, jump will go to the right handler
4022 type=0;
4023 }
ffb0b9e0 4024 }
4025#endif
4026
4027 if(type==0)
4028 {
4029 emit_cmpimm(addr,RAM_SIZE);
4030 jaddr=(int)out;
4031 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4032 // Hint to branch predictor that the branch is unlikely to be taken
4033 if(rs1[i]>=28)
4034 emit_jno_unlikely(0);
4035 else
4036 #endif
4037 emit_jno(0);
a327ad27 4038 if(ram_offset!=0) {
4039 emit_addimm(addr,ram_offset,HOST_TEMPREG);
4040 addr=*addr_reg_override=HOST_TEMPREG;
4041 }
ffb0b9e0 4042 }
4043
4044 return jaddr;
4045}
4046
57871462 4047#define shift_assemble shift_assemble_arm
4048
4049void loadlr_assemble_arm(int i,struct regstat *i_regs)
4050{
4051 int s,th,tl,temp,temp2,addr,map=-1;
4052 int offset;
4053 int jaddr=0;
af4ee1fe 4054 int memtarget=0,c=0;
ffb0b9e0 4055 int fastload_reg_override=0;
57871462 4056 u_int hr,reglist=0;
4057 th=get_reg(i_regs->regmap,rt1[i]|64);
4058 tl=get_reg(i_regs->regmap,rt1[i]);
4059 s=get_reg(i_regs->regmap,rs1[i]);
4060 temp=get_reg(i_regs->regmap,-1);
4061 temp2=get_reg(i_regs->regmap,FTEMP);
4062 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
4063 assert(addr<0);
4064 offset=imm[i];
4065 for(hr=0;hr<HOST_REGS;hr++) {
4066 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4067 }
4068 reglist|=1<<temp;
4069 if(offset||s<0||c) addr=temp2;
4070 else addr=s;
4071 if(s>=0) {
4072 c=(i_regs->wasconst>>s)&1;
af4ee1fe 4073 if(c) {
4074 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
4075 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
4076 }
57871462 4077 }
535d208a 4078 if(!using_tlb) {
4079 if(!c) {
4080 #ifdef RAM_OFFSET
4081 map=get_reg(i_regs->regmap,ROREG);
4082 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
4083 #endif
4084 emit_shlimm(addr,3,temp);
4085 if (opcode[i]==0x22||opcode[i]==0x26) {
4086 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 4087 }else{
535d208a 4088 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 4089 }
ffb0b9e0 4090 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
535d208a 4091 }
4092 else {
a327ad27 4093 if(ram_offset&&memtarget) {
4094 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
4095 fastload_reg_override=HOST_TEMPREG;
4096 }
535d208a 4097 if (opcode[i]==0x22||opcode[i]==0x26) {
4098 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4099 }else{
4100 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4101 }
57871462 4102 }
535d208a 4103 }else{ // using tlb
4104 int a;
4105 if(c) {
4106 a=-1;
4107 }else if (opcode[i]==0x22||opcode[i]==0x26) {
4108 a=0xFFFFFFFC; // LWL/LWR
4109 }else{
4110 a=0xFFFFFFF8; // LDL/LDR
4111 }
4112 map=get_reg(i_regs->regmap,TLREG);
4113 assert(map>=0);
ea3d2e6e 4114 reglist&=~(1<<map);
535d208a 4115 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
4116 if(c) {
4117 if (opcode[i]==0x22||opcode[i]==0x26) {
4118 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4119 }else{
4120 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4121 }
535d208a 4122 }
4123 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
4124 }
4125 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
4126 if(!c||memtarget) {
ffb0b9e0 4127 int a=temp2;
4128 if(fastload_reg_override) a=fastload_reg_override;
535d208a 4129 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 4130 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 4131 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4132 }
4133 else
4134 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
4135 if(rt1[i]) {
4136 assert(tl>=0);
57871462 4137 emit_andimm(temp,24,temp);
2002a1db 4138#ifdef BIG_ENDIAN_MIPS
4139 if (opcode[i]==0x26) // LWR
4140#else
4141 if (opcode[i]==0x22) // LWL
4142#endif
4143 emit_xorimm(temp,24,temp);
57871462 4144 emit_movimm(-1,HOST_TEMPREG);
4145 if (opcode[i]==0x26) {
4146 emit_shr(temp2,temp,temp2);
4147 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
4148 }else{
4149 emit_shl(temp2,temp,temp2);
4150 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
4151 }
4152 emit_or(temp2,tl,tl);
57871462 4153 }
535d208a 4154 //emit_storereg(rt1[i],tl); // DEBUG
4155 }
4156 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 4157 // FIXME: little endian, fastload_reg_override
535d208a 4158 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
4159 if(!c||memtarget) {
4160 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
4161 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
4162 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
4163 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4164 }
4165 else
4166 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
4167 if(rt1[i]) {
4168 assert(th>=0);
4169 assert(tl>=0);
57871462 4170 emit_testimm(temp,32);
4171 emit_andimm(temp,24,temp);
4172 if (opcode[i]==0x1A) { // LDL
4173 emit_rsbimm(temp,32,HOST_TEMPREG);
4174 emit_shl(temp2h,temp,temp2h);
4175 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
4176 emit_movimm(-1,HOST_TEMPREG);
4177 emit_shl(temp2,temp,temp2);
4178 emit_cmove_reg(temp2h,th);
4179 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
4180 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
4181 emit_orreq(temp2,tl,tl);
4182 emit_orrne(temp2,th,th);
4183 }
4184 if (opcode[i]==0x1B) { // LDR
4185 emit_xorimm(temp,24,temp);
4186 emit_rsbimm(temp,32,HOST_TEMPREG);
4187 emit_shr(temp2,temp,temp2);
4188 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
4189 emit_movimm(-1,HOST_TEMPREG);
4190 emit_shr(temp2h,temp,temp2h);
4191 emit_cmovne_reg(temp2,tl);
4192 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
4193 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
4194 emit_orrne(temp2h,th,th);
4195 emit_orreq(temp2h,tl,tl);
4196 }
4197 }
4198 }
4199}
4200#define loadlr_assemble loadlr_assemble_arm
4201
4202void cop0_assemble(int i,struct regstat *i_regs)
4203{
4204 if(opcode2[i]==0) // MFC0
4205 {
4206 signed char t=get_reg(i_regs->regmap,rt1[i]);
4207 char copr=(source[i]>>11)&0x1f;
4208 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 4209 if(t>=0&&rt1[i]!=0) {
7139f3c8 4210#ifdef MUPEN64
57871462 4211 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4212 emit_movimm((source[i]>>11)&0x1f,1);
4213 emit_writeword(0,(int)&PC);
4214 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
4215 if(copr==9) {
4216 emit_readword((int)&last_count,ECX);
4217 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
4218 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4219 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4220 emit_writeword(HOST_CCREG,(int)&Count);
4221 }
4222 emit_call((int)MFC0);
4223 emit_readword((int)&readmem_dword,t);
7139f3c8 4224#else
4225 emit_readword((int)&reg_cop0+copr*4,t);
4226#endif
57871462 4227 }
4228 }
4229 else if(opcode2[i]==4) // MTC0
4230 {
4231 signed char s=get_reg(i_regs->regmap,rs1[i]);
4232 char copr=(source[i]>>11)&0x1f;
4233 assert(s>=0);
63cb0298 4234#ifdef MUPEN64
57871462 4235 emit_writeword(s,(int)&readmem_dword);
4236 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
4237 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4238 emit_movimm((source[i]>>11)&0x1f,1);
4239 emit_writeword(0,(int)&PC);
4240 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
63cb0298 4241#else
4242 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 4243#endif
4244 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 4245 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 4246 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 4247 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 4248 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4249 emit_writeword(HOST_CCREG,(int)&Count);
4250 }
4251 // What a mess. The status register (12) can enable interrupts,
4252 // so needs a special case to handle a pending interrupt.
4253 // The interrupt must be taken immediately, because a subsequent
4254 // instruction might disable interrupts again.
7139f3c8 4255 if(copr==12||copr==13) {
fca1aef2 4256#ifdef PCSX
4257 if (is_delayslot) {
4258 // burn cycles to cause cc_interrupt, which will
4259 // reschedule next_interupt. Relies on CCREG from above.
4260 assem_debug("MTC0 DS %d\n", copr);
4261 emit_writeword(HOST_CCREG,(int)&last_count);
4262 emit_movimm(0,HOST_CCREG);
4263 emit_storereg(CCREG,HOST_CCREG);
caeefe31 4264 emit_loadreg(rs1[i],1);
fca1aef2 4265 emit_movimm(copr,0);
4266 emit_call((int)pcsx_mtc0_ds);
042c7287 4267 emit_loadreg(rs1[i],s);
fca1aef2 4268 return;
4269 }
4270#endif
63cb0298 4271 emit_movimm(start+i*4+4,HOST_TEMPREG);
4272 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
4273 emit_movimm(0,HOST_TEMPREG);
4274 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 4275 }
4276 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
4277 //else
fca1aef2 4278#ifdef PCSX
caeefe31 4279 if(s==HOST_CCREG)
4280 emit_loadreg(rs1[i],1);
4281 else if(s!=1)
63cb0298 4282 emit_mov(s,1);
fca1aef2 4283 emit_movimm(copr,0);
4284 emit_call((int)pcsx_mtc0);
4285#else
57871462 4286 emit_call((int)MTC0);
fca1aef2 4287#endif
7139f3c8 4288 if(copr==9||copr==11||copr==12||copr==13) {
57871462 4289 emit_readword((int)&Count,HOST_CCREG);
042c7287 4290 emit_readword((int)&next_interupt,HOST_TEMPREG);
2573466a 4291 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 4292 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
4293 emit_writeword(HOST_TEMPREG,(int)&last_count);
57871462 4294 emit_storereg(CCREG,HOST_CCREG);
4295 }
7139f3c8 4296 if(copr==12||copr==13) {
57871462 4297 assert(!is_delayslot);
4298 emit_readword((int)&pending_exception,14);
042c7287 4299 emit_test(14,14);
4300 emit_jne((int)&do_interrupt);
57871462 4301 }
4302 emit_loadreg(rs1[i],s);
4303 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
4304 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 4305 cop1_usable=0;
4306 }
4307 else
4308 {
4309 assert(opcode2[i]==0x10);
3d624f89 4310#ifndef DISABLE_TLB
57871462 4311 if((source[i]&0x3f)==0x01) // TLBR
4312 emit_call((int)TLBR);
4313 if((source[i]&0x3f)==0x02) // TLBWI
4314 emit_call((int)TLBWI_new);
4315 if((source[i]&0x3f)==0x06) { // TLBWR
4316 // The TLB entry written by TLBWR is dependent on the count,
4317 // so update the cycle count
4318 emit_readword((int)&last_count,ECX);
4319 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
4320 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4321 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4322 emit_writeword(HOST_CCREG,(int)&Count);
4323 emit_call((int)TLBWR_new);
4324 }
4325 if((source[i]&0x3f)==0x08) // TLBP
4326 emit_call((int)TLBP);
3d624f89 4327#endif
576bbd8f 4328#ifdef PCSX
4329 if((source[i]&0x3f)==0x10) // RFE
4330 {
4331 emit_readword((int)&Status,0);
4332 emit_andimm(0,0x3c,1);
4333 emit_andimm(0,~0xf,0);
4334 emit_orrshr_imm(1,2,0);
4335 emit_writeword(0,(int)&Status);
4336 }
4337#else
57871462 4338 if((source[i]&0x3f)==0x18) // ERET
4339 {
4340 int count=ccadj[i];
4341 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2573466a 4342 emit_addimm(HOST_CCREG,CLOCK_ADJUST(count),HOST_CCREG); // TODO: Should there be an extra cycle here?
57871462 4343 emit_jmp((int)jump_eret);
4344 }
576bbd8f 4345#endif
57871462 4346 }
4347}
4348
b9b61529 4349static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
4350{
4351 switch (copr) {
4352 case 1:
4353 case 3:
4354 case 5:
4355 case 8:
4356 case 9:
4357 case 10:
4358 case 11:
4359 emit_readword((int)&reg_cop2d[copr],tl);
4360 emit_signextend16(tl,tl);
4361 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
4362 break;
4363 case 7:
4364 case 16:
4365 case 17:
4366 case 18:
4367 case 19:
4368 emit_readword((int)&reg_cop2d[copr],tl);
4369 emit_andimm(tl,0xffff,tl);
4370 emit_writeword(tl,(int)&reg_cop2d[copr]);
4371 break;
4372 case 15:
4373 emit_readword((int)&reg_cop2d[14],tl); // SXY2
4374 emit_writeword(tl,(int)&reg_cop2d[copr]);
4375 break;
4376 case 28:
b9b61529 4377 case 29:
4378 emit_readword((int)&reg_cop2d[9],temp);
4379 emit_testimm(temp,0x8000); // do we need this?
4380 emit_andimm(temp,0xf80,temp);
4381 emit_andne_imm(temp,0,temp);
f70d384d 4382 emit_shrimm(temp,7,tl);
b9b61529 4383 emit_readword((int)&reg_cop2d[10],temp);
4384 emit_testimm(temp,0x8000);
4385 emit_andimm(temp,0xf80,temp);
4386 emit_andne_imm(temp,0,temp);
f70d384d 4387 emit_orrshr_imm(temp,2,tl);
b9b61529 4388 emit_readword((int)&reg_cop2d[11],temp);
4389 emit_testimm(temp,0x8000);
4390 emit_andimm(temp,0xf80,temp);
4391 emit_andne_imm(temp,0,temp);
f70d384d 4392 emit_orrshl_imm(temp,3,tl);
b9b61529 4393 emit_writeword(tl,(int)&reg_cop2d[copr]);
4394 break;
4395 default:
4396 emit_readword((int)&reg_cop2d[copr],tl);
4397 break;
4398 }
4399}
4400
4401static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
4402{
4403 switch (copr) {
4404 case 15:
4405 emit_readword((int)&reg_cop2d[13],temp); // SXY1
4406 emit_writeword(sl,(int)&reg_cop2d[copr]);
4407 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
4408 emit_readword((int)&reg_cop2d[14],temp); // SXY2
4409 emit_writeword(sl,(int)&reg_cop2d[14]);
4410 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
4411 break;
4412 case 28:
4413 emit_andimm(sl,0x001f,temp);
f70d384d 4414 emit_shlimm(temp,7,temp);
b9b61529 4415 emit_writeword(temp,(int)&reg_cop2d[9]);
4416 emit_andimm(sl,0x03e0,temp);
f70d384d 4417 emit_shlimm(temp,2,temp);
b9b61529 4418 emit_writeword(temp,(int)&reg_cop2d[10]);
4419 emit_andimm(sl,0x7c00,temp);
f70d384d 4420 emit_shrimm(temp,3,temp);
b9b61529 4421 emit_writeword(temp,(int)&reg_cop2d[11]);
4422 emit_writeword(sl,(int)&reg_cop2d[28]);
4423 break;
4424 case 30:
4425 emit_movs(sl,temp);
4426 emit_mvnmi(temp,temp);
665f33e1 4427#ifdef HAVE_ARMV5
b9b61529 4428 emit_clz(temp,temp);
665f33e1 4429#else
4430 emit_movs(temp,HOST_TEMPREG);
4431 emit_movimm(0,temp);
4432 emit_jeq((int)out+4*4);
4433 emit_addpl_imm(temp,1,temp);
4434 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
4435 emit_jns((int)out-2*4);
4436#endif
b9b61529 4437 emit_writeword(sl,(int)&reg_cop2d[30]);
4438 emit_writeword(temp,(int)&reg_cop2d[31]);
4439 break;
b9b61529 4440 case 31:
4441 break;
4442 default:
4443 emit_writeword(sl,(int)&reg_cop2d[copr]);
4444 break;
4445 }
4446}
4447
4448void cop2_assemble(int i,struct regstat *i_regs)
4449{
4450 u_int copr=(source[i]>>11)&0x1f;
4451 signed char temp=get_reg(i_regs->regmap,-1);
4452 if (opcode2[i]==0) { // MFC2
4453 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4454 if(tl>=0&&rt1[i]!=0)
b9b61529 4455 cop2_get_dreg(copr,tl,temp);
4456 }
4457 else if (opcode2[i]==4) { // MTC2
4458 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4459 cop2_put_dreg(copr,sl,temp);
4460 }
4461 else if (opcode2[i]==2) // CFC2
4462 {
4463 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4464 if(tl>=0&&rt1[i]!=0)
b9b61529 4465 emit_readword((int)&reg_cop2c[copr],tl);
4466 }
4467 else if (opcode2[i]==6) // CTC2
4468 {
4469 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4470 switch(copr) {
4471 case 4:
4472 case 12:
4473 case 20:
4474 case 26:
4475 case 27:
4476 case 29:
4477 case 30:
4478 emit_signextend16(sl,temp);
4479 break;
4480 case 31:
4481 //value = value & 0x7ffff000;
4482 //if (value & 0x7f87e000) value |= 0x80000000;
4483 emit_shrimm(sl,12,temp);
4484 emit_shlimm(temp,12,temp);
4485 emit_testimm(temp,0x7f000000);
4486 emit_testeqimm(temp,0x00870000);
4487 emit_testeqimm(temp,0x0000e000);
4488 emit_orrne_imm(temp,0x80000000,temp);
4489 break;
4490 default:
4491 temp=sl;
4492 break;
4493 }
4494 emit_writeword(temp,(int)&reg_cop2c[copr]);
4495 assert(sl>=0);
4496 }
4497}
4498
054175e9 4499static void c2op_prologue(u_int op,u_int reglist)
4500{
4501 save_regs_all(reglist);
82ed88eb 4502#ifdef PCNT
4503 emit_movimm(op,0);
4504 emit_call((int)pcnt_gte_start);
4505#endif
054175e9 4506 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
4507}
4508
4509static void c2op_epilogue(u_int op,u_int reglist)
4510{
82ed88eb 4511#ifdef PCNT
4512 emit_movimm(op,0);
4513 emit_call((int)pcnt_gte_end);
4514#endif
054175e9 4515 restore_regs_all(reglist);
4516}
4517
6c0eefaf 4518static void c2op_call_MACtoIR(int lm,int need_flags)
4519{
4520 if(need_flags)
4521 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
4522 else
4523 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
4524}
4525
4526static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
4527{
4528 emit_call((int)func);
4529 // func is C code and trashes r0
4530 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4531 if(need_flags||need_ir)
4532 c2op_call_MACtoIR(lm,need_flags);
4533 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
4534}
4535
054175e9 4536static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 4537{
4538 signed char temp=get_reg(i_regs->regmap,-1);
4539 u_int c2op=source[i]&0x3f;
6c0eefaf 4540 u_int hr,reglist_full=0,reglist;
054175e9 4541 int need_flags,need_ir;
b9b61529 4542 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 4543 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 4544 }
4d646738 4545 reglist=reglist_full&CALLER_SAVE_REGS;
b9b61529 4546
4547 if (gte_handlers[c2op]!=NULL) {
bedfea38 4548 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 4549 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 4550 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
4551 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 4552 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
4553 need_flags=0;
6c0eefaf 4554 int shift = (source[i] >> 19) & 1;
4555 int lm = (source[i] >> 10) & 1;
054175e9 4556 switch(c2op) {
19776aef 4557#ifndef DRC_DBG
054175e9 4558 case GTE_MVMVA: {
82336ba3 4559#ifdef HAVE_ARMV5
054175e9 4560 int v = (source[i] >> 15) & 3;
4561 int cv = (source[i] >> 13) & 3;
4562 int mx = (source[i] >> 17) & 3;
4d646738 4563 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
054175e9 4564 c2op_prologue(c2op,reglist);
4565 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
4566 if(v<3)
4567 emit_ldrd(v*8,0,4);
4568 else {
4569 emit_movzwl_indexed(9*4,0,4); // gteIR
4570 emit_movzwl_indexed(10*4,0,6);
4571 emit_movzwl_indexed(11*4,0,5);
4572 emit_orrshl_imm(6,16,4);
4573 }
4574 if(mx<3)
4575 emit_addimm(0,32*4+mx*8*4,6);
4576 else
4577 emit_readword((int)&zeromem_ptr,6);
4578 if(cv<3)
4579 emit_addimm(0,32*4+(cv*8+5)*4,7);
4580 else
4581 emit_readword((int)&zeromem_ptr,7);
4582#ifdef __ARM_NEON__
4583 emit_movimm(source[i],1); // opcode
4584 emit_call((int)gteMVMVA_part_neon);
4585 if(need_flags) {
4586 emit_movimm(lm,1);
4587 emit_call((int)gteMACtoIR_flags_neon);
4588 }
4589#else
4590 if(cv==3&&shift)
4591 emit_call((int)gteMVMVA_part_cv3sh12_arm);
4592 else {
4593 emit_movimm(shift,1);
4594 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
4595 }
6c0eefaf 4596 if(need_flags||need_ir)
4597 c2op_call_MACtoIR(lm,need_flags);
82336ba3 4598#endif
4599#else /* if not HAVE_ARMV5 */
4600 c2op_prologue(c2op,reglist);
4601 emit_movimm(source[i],1); // opcode
4602 emit_writeword(1,(int)&psxRegs.code);
4603 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
054175e9 4604#endif
4605 break;
4606 }
6c0eefaf 4607 case GTE_OP:
4608 c2op_prologue(c2op,reglist);
4609 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
4610 if(need_flags||need_ir) {
4611 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4612 c2op_call_MACtoIR(lm,need_flags);
4613 }
4614 break;
4615 case GTE_DPCS:
4616 c2op_prologue(c2op,reglist);
4617 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
4618 break;
4619 case GTE_INTPL:
4620 c2op_prologue(c2op,reglist);
4621 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
4622 break;
4623 case GTE_SQR:
4624 c2op_prologue(c2op,reglist);
4625 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
4626 if(need_flags||need_ir) {
4627 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4628 c2op_call_MACtoIR(lm,need_flags);
4629 }
4630 break;
4631 case GTE_DCPL:
4632 c2op_prologue(c2op,reglist);
4633 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
4634 break;
4635 case GTE_GPF:
4636 c2op_prologue(c2op,reglist);
4637 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
4638 break;
4639 case GTE_GPL:
4640 c2op_prologue(c2op,reglist);
4641 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
4642 break;
19776aef 4643#endif
054175e9 4644 default:
054175e9 4645 c2op_prologue(c2op,reglist);
19776aef 4646#ifdef DRC_DBG
4647 emit_movimm(source[i],1); // opcode
4648 emit_writeword(1,(int)&psxRegs.code);
4649#endif
054175e9 4650 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
4651 break;
4652 }
4653 c2op_epilogue(c2op,reglist);
4654 }
b9b61529 4655}
4656
4657void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 4658{
4659 // XXX: should just just do the exception instead
4660 if(!cop1_usable) {
4661 int jaddr=(int)out;
4662 emit_jmp(0);
4663 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
4664 cop1_usable=1;
4665 }
4666}
4667
57871462 4668void cop1_assemble(int i,struct regstat *i_regs)
4669{
3d624f89 4670#ifndef DISABLE_COP1
57871462 4671 // Check cop1 unusable
4672 if(!cop1_usable) {
4673 signed char rs=get_reg(i_regs->regmap,CSREG);
4674 assert(rs>=0);
4675 emit_testimm(rs,0x20000000);
4676 int jaddr=(int)out;
4677 emit_jeq(0);
4678 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4679 cop1_usable=1;
4680 }
4681 if (opcode2[i]==0) { // MFC1
4682 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4683 if(tl>=0) {
4684 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
4685 emit_readword_indexed(0,tl,tl);
4686 }
4687 }
4688 else if (opcode2[i]==1) { // DMFC1
4689 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4690 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
4691 if(tl>=0) {
4692 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
4693 if(th>=0) emit_readword_indexed(4,tl,th);
4694 emit_readword_indexed(0,tl,tl);
4695 }
4696 }
4697 else if (opcode2[i]==4) { // MTC1
4698 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4699 signed char temp=get_reg(i_regs->regmap,-1);
4700 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4701 emit_writeword_indexed(sl,0,temp);
4702 }
4703 else if (opcode2[i]==5) { // DMTC1
4704 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4705 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
4706 signed char temp=get_reg(i_regs->regmap,-1);
4707 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4708 emit_writeword_indexed(sh,4,temp);
4709 emit_writeword_indexed(sl,0,temp);
4710 }
4711 else if (opcode2[i]==2) // CFC1
4712 {
4713 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4714 if(tl>=0) {
4715 u_int copr=(source[i]>>11)&0x1f;
4716 if(copr==0) emit_readword((int)&FCR0,tl);
4717 if(copr==31) emit_readword((int)&FCR31,tl);
4718 }
4719 }
4720 else if (opcode2[i]==6) // CTC1
4721 {
4722 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4723 u_int copr=(source[i]>>11)&0x1f;
4724 assert(sl>=0);
4725 if(copr==31)
4726 {
4727 emit_writeword(sl,(int)&FCR31);
4728 // Set the rounding mode
4729 //FIXME
4730 //char temp=get_reg(i_regs->regmap,-1);
4731 //emit_andimm(sl,3,temp);
4732 //emit_fldcw_indexed((int)&rounding_modes,temp);
4733 }
4734 }
3d624f89 4735#else
4736 cop1_unusable(i, i_regs);
4737#endif
57871462 4738}
4739
4740void fconv_assemble_arm(int i,struct regstat *i_regs)
4741{
3d624f89 4742#ifndef DISABLE_COP1
57871462 4743 signed char temp=get_reg(i_regs->regmap,-1);
4744 assert(temp>=0);
4745 // Check cop1 unusable
4746 if(!cop1_usable) {
4747 signed char rs=get_reg(i_regs->regmap,CSREG);
4748 assert(rs>=0);
4749 emit_testimm(rs,0x20000000);
4750 int jaddr=(int)out;
4751 emit_jeq(0);
4752 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4753 cop1_usable=1;
4754 }
4755
4756 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4757 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
4758 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4759 emit_flds(temp,15);
4760 emit_ftosizs(15,15); // float->int, truncate
4761 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4762 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4763 emit_fsts(15,temp);
4764 return;
4765 }
4766 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
4767 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4768 emit_vldr(temp,7);
4769 emit_ftosizd(7,13); // double->int, truncate
4770 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4771 emit_fsts(13,temp);
4772 return;
4773 }
4774
4775 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
4776 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4777 emit_flds(temp,13);
4778 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4779 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4780 emit_fsitos(13,15);
4781 emit_fsts(15,temp);
4782 return;
4783 }
4784 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
4785 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4786 emit_flds(temp,13);
4787 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4788 emit_fsitod(13,7);
4789 emit_vstr(7,temp);
4790 return;
4791 }
4792
4793 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
4794 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4795 emit_flds(temp,13);
4796 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4797 emit_fcvtds(13,7);
4798 emit_vstr(7,temp);
4799 return;
4800 }
4801 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
4802 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4803 emit_vldr(temp,7);
4804 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4805 emit_fcvtsd(7,13);
4806 emit_fsts(13,temp);
4807 return;
4808 }
4809 #endif
4810
4811 // C emulation code
4812
4813 u_int hr,reglist=0;
4814 for(hr=0;hr<HOST_REGS;hr++) {
4815 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4816 }
4817 save_regs(reglist);
4818
4819 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
4820 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4821 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4822 emit_call((int)cvt_s_w);
4823 }
4824 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
4825 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4826 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4827 emit_call((int)cvt_d_w);
4828 }
4829 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
4830 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4831 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4832 emit_call((int)cvt_s_l);
4833 }
4834 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
4835 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4836 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4837 emit_call((int)cvt_d_l);
4838 }
4839
4840 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
4841 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4842 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4843 emit_call((int)cvt_d_s);
4844 }
4845 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4846 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4847 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4848 emit_call((int)cvt_w_s);
4849 }
4850 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4851 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4852 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4853 emit_call((int)cvt_l_s);
4854 }
4855
4856 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4857 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4858 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4859 emit_call((int)cvt_s_d);
4860 }
4861 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4862 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4863 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4864 emit_call((int)cvt_w_d);
4865 }
4866 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4867 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4868 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4869 emit_call((int)cvt_l_d);
4870 }
4871
4872 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4873 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4874 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4875 emit_call((int)round_l_s);
4876 }
4877 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4878 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4879 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4880 emit_call((int)trunc_l_s);
4881 }
4882 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4883 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4884 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4885 emit_call((int)ceil_l_s);
4886 }
4887 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4888 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4889 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4890 emit_call((int)floor_l_s);
4891 }
4892 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4893 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4894 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4895 emit_call((int)round_w_s);
4896 }
4897 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4898 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4899 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4900 emit_call((int)trunc_w_s);
4901 }
4902 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4903 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4904 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4905 emit_call((int)ceil_w_s);
4906 }
4907 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4908 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4909 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4910 emit_call((int)floor_w_s);
4911 }
4912
4913 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4914 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4915 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4916 emit_call((int)round_l_d);
4917 }
4918 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4919 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4920 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4921 emit_call((int)trunc_l_d);
4922 }
4923 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4924 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4925 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4926 emit_call((int)ceil_l_d);
4927 }
4928 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4929 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4930 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4931 emit_call((int)floor_l_d);
4932 }
4933 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4934 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4935 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4936 emit_call((int)round_w_d);
4937 }
4938 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4939 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4940 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4941 emit_call((int)trunc_w_d);
4942 }
4943 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4944 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4945 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4946 emit_call((int)ceil_w_d);
4947 }
4948 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4949 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4950 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4951 emit_call((int)floor_w_d);
4952 }
4953
4954 restore_regs(reglist);
3d624f89 4955#else
4956 cop1_unusable(i, i_regs);
4957#endif
57871462 4958}
4959#define fconv_assemble fconv_assemble_arm
4960
4961void fcomp_assemble(int i,struct regstat *i_regs)
4962{
3d624f89 4963#ifndef DISABLE_COP1
57871462 4964 signed char fs=get_reg(i_regs->regmap,FSREG);
4965 signed char temp=get_reg(i_regs->regmap,-1);
4966 assert(temp>=0);
4967 // Check cop1 unusable
4968 if(!cop1_usable) {
4969 signed char cs=get_reg(i_regs->regmap,CSREG);
4970 assert(cs>=0);
4971 emit_testimm(cs,0x20000000);
4972 int jaddr=(int)out;
4973 emit_jeq(0);
4974 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4975 cop1_usable=1;
4976 }
4977
4978 if((source[i]&0x3f)==0x30) {
4979 emit_andimm(fs,~0x800000,fs);
4980 return;
4981 }
4982
4983 if((source[i]&0x3e)==0x38) {
4984 // sf/ngle - these should throw exceptions for NaNs
4985 emit_andimm(fs,~0x800000,fs);
4986 return;
4987 }
4988
4989 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4990 if(opcode2[i]==0x10) {
4991 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4992 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4993 emit_orimm(fs,0x800000,fs);
4994 emit_flds(temp,14);
4995 emit_flds(HOST_TEMPREG,15);
4996 emit_fcmps(14,15);
4997 emit_fmstat();
4998 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4999 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
5000 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
5001 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
5002 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
5003 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
5004 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
5005 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
5006 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
5007 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
5008 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
5009 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
5010 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
5011 return;
5012 }
5013 if(opcode2[i]==0x11) {
5014 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5015 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
5016 emit_orimm(fs,0x800000,fs);
5017 emit_vldr(temp,6);
5018 emit_vldr(HOST_TEMPREG,7);
5019 emit_fcmpd(6,7);
5020 emit_fmstat();
5021 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
5022 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
5023 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
5024 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
5025 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
5026 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
5027 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
5028 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
5029 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
5030 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
5031 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
5032 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
5033 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
5034 return;
5035 }
5036 #endif
5037
5038 // C only
5039
5040 u_int hr,reglist=0;
5041 for(hr=0;hr<HOST_REGS;hr++) {
5042 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
5043 }
5044 reglist&=~(1<<fs);
5045 save_regs(reglist);
5046 if(opcode2[i]==0x10) {
5047 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
5048 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
5049 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
5050 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
5051 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
5052 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
5053 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
5054 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
5055 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
5056 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
5057 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
5058 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
5059 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
5060 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
5061 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
5062 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
5063 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
5064 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
5065 }
5066 if(opcode2[i]==0x11) {
5067 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5068 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5069 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
5070 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
5071 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
5072 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
5073 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
5074 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
5075 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
5076 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
5077 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
5078 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
5079 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
5080 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
5081 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
5082 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
5083 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
5084 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
5085 }
5086 restore_regs(reglist);
5087 emit_loadreg(FSREG,fs);
3d624f89 5088#else
5089 cop1_unusable(i, i_regs);
5090#endif
57871462 5091}
5092
5093void float_assemble(int i,struct regstat *i_regs)
5094{
3d624f89 5095#ifndef DISABLE_COP1
57871462 5096 signed char temp=get_reg(i_regs->regmap,-1);
5097 assert(temp>=0);
5098 // Check cop1 unusable
5099 if(!cop1_usable) {
5100 signed char cs=get_reg(i_regs->regmap,CSREG);
5101 assert(cs>=0);
5102 emit_testimm(cs,0x20000000);
5103 int jaddr=(int)out;
5104 emit_jeq(0);
5105 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
5106 cop1_usable=1;
5107 }
5108
5109 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
5110 if((source[i]&0x3f)==6) // mov
5111 {
5112 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5113 if(opcode2[i]==0x10) {
5114 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5115 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
5116 emit_readword_indexed(0,temp,temp);
5117 emit_writeword_indexed(temp,0,HOST_TEMPREG);
5118 }
5119 if(opcode2[i]==0x11) {
5120 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5121 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
5122 emit_vldr(temp,7);
5123 emit_vstr(7,HOST_TEMPREG);
5124 }
5125 }
5126 return;
5127 }
5128
5129 if((source[i]&0x3f)>3)
5130 {
5131 if(opcode2[i]==0x10) {
5132 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5133 emit_flds(temp,15);
5134 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5135 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5136 }
5137 if((source[i]&0x3f)==4) // sqrt
5138 emit_fsqrts(15,15);
5139 if((source[i]&0x3f)==5) // abs
5140 emit_fabss(15,15);
5141 if((source[i]&0x3f)==7) // neg
5142 emit_fnegs(15,15);
5143 emit_fsts(15,temp);
5144 }
5145 if(opcode2[i]==0x11) {
5146 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5147 emit_vldr(temp,7);
5148 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5149 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5150 }
5151 if((source[i]&0x3f)==4) // sqrt
5152 emit_fsqrtd(7,7);
5153 if((source[i]&0x3f)==5) // abs
5154 emit_fabsd(7,7);
5155 if((source[i]&0x3f)==7) // neg
5156 emit_fnegd(7,7);
5157 emit_vstr(7,temp);
5158 }
5159 return;
5160 }
5161 if((source[i]&0x3f)<4)
5162 {
5163 if(opcode2[i]==0x10) {
5164 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5165 }
5166 if(opcode2[i]==0x11) {
5167 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5168 }
5169 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
5170 if(opcode2[i]==0x10) {
5171 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
5172 emit_flds(temp,15);
5173 emit_flds(HOST_TEMPREG,13);
5174 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5175 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5176 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5177 }
5178 }
5179 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
5180 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
5181 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
5182 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
5183 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5184 emit_fsts(15,HOST_TEMPREG);
5185 }else{
5186 emit_fsts(15,temp);
5187 }
5188 }
5189 else if(opcode2[i]==0x11) {
5190 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
5191 emit_vldr(temp,7);
5192 emit_vldr(HOST_TEMPREG,6);
5193 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5194 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5195 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5196 }
5197 }
5198 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
5199 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
5200 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
5201 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
5202 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5203 emit_vstr(7,HOST_TEMPREG);
5204 }else{
5205 emit_vstr(7,temp);
5206 }
5207 }
5208 }
5209 else {
5210 if(opcode2[i]==0x10) {
5211 emit_flds(temp,15);
5212 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5213 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5214 }
5215 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
5216 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
5217 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
5218 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
5219 emit_fsts(15,temp);
5220 }
5221 else if(opcode2[i]==0x11) {
5222 emit_vldr(temp,7);
5223 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5224 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5225 }
5226 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
5227 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
5228 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
5229 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
5230 emit_vstr(7,temp);
5231 }
5232 }
5233 return;
5234 }
5235 #endif
5236
5237 u_int hr,reglist=0;
5238 for(hr=0;hr<HOST_REGS;hr++) {
5239 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
5240 }
5241 if(opcode2[i]==0x10) { // Single precision
5242 save_regs(reglist);
5243 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
5244 if((source[i]&0x3f)<4) {
5245 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
5246 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
5247 }else{
5248 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
5249 }
5250 switch(source[i]&0x3f)
5251 {
5252 case 0x00: emit_call((int)add_s);break;
5253 case 0x01: emit_call((int)sub_s);break;
5254 case 0x02: emit_call((int)mul_s);break;
5255 case 0x03: emit_call((int)div_s);break;
5256 case 0x04: emit_call((int)sqrt_s);break;
5257 case 0x05: emit_call((int)abs_s);break;
5258 case 0x06: emit_call((int)mov_s);break;
5259 case 0x07: emit_call((int)neg_s);break;
5260 }
5261 restore_regs(reglist);
5262 }
5263 if(opcode2[i]==0x11) { // Double precision
5264 save_regs(reglist);
5265 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5266 if((source[i]&0x3f)<4) {
5267 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5268 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
5269 }else{
5270 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
5271 }
5272 switch(source[i]&0x3f)
5273 {
5274 case 0x00: emit_call((int)add_d);break;
5275 case 0x01: emit_call((int)sub_d);break;
5276 case 0x02: emit_call((int)mul_d);break;
5277 case 0x03: emit_call((int)div_d);break;
5278 case 0x04: emit_call((int)sqrt_d);break;
5279 case 0x05: emit_call((int)abs_d);break;
5280 case 0x06: emit_call((int)mov_d);break;
5281 case 0x07: emit_call((int)neg_d);break;
5282 }
5283 restore_regs(reglist);
5284 }
3d624f89 5285#else
5286 cop1_unusable(i, i_regs);
5287#endif
57871462 5288}
5289
5290void multdiv_assemble_arm(int i,struct regstat *i_regs)
5291{
5292 // case 0x18: MULT
5293 // case 0x19: MULTU
5294 // case 0x1A: DIV
5295 // case 0x1B: DIVU
5296 // case 0x1C: DMULT
5297 // case 0x1D: DMULTU
5298 // case 0x1E: DDIV
5299 // case 0x1F: DDIVU
5300 if(rs1[i]&&rs2[i])
5301 {
5302 if((opcode2[i]&4)==0) // 32-bit
5303 {
5304 if(opcode2[i]==0x18) // MULT
5305 {
5306 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5307 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5308 signed char hi=get_reg(i_regs->regmap,HIREG);
5309 signed char lo=get_reg(i_regs->regmap,LOREG);
5310 assert(m1>=0);
5311 assert(m2>=0);
5312 assert(hi>=0);
5313 assert(lo>=0);
5314 emit_smull(m1,m2,hi,lo);
5315 }
5316 if(opcode2[i]==0x19) // MULTU
5317 {
5318 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5319 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5320 signed char hi=get_reg(i_regs->regmap,HIREG);
5321 signed char lo=get_reg(i_regs->regmap,LOREG);
5322 assert(m1>=0);
5323 assert(m2>=0);
5324 assert(hi>=0);
5325 assert(lo>=0);
5326 emit_umull(m1,m2,hi,lo);
5327 }
5328 if(opcode2[i]==0x1A) // DIV
5329 {
5330 signed char d1=get_reg(i_regs->regmap,rs1[i]);
5331 signed char d2=get_reg(i_regs->regmap,rs2[i]);
5332 assert(d1>=0);
5333 assert(d2>=0);
5334 signed char quotient=get_reg(i_regs->regmap,LOREG);
5335 signed char remainder=get_reg(i_regs->regmap,HIREG);
5336 assert(quotient>=0);
5337 assert(remainder>=0);
5338 emit_movs(d1,remainder);
44a80f6a 5339 emit_movimm(0xffffffff,quotient);
5340 emit_negmi(quotient,quotient); // .. quotient and ..
5341 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 5342 emit_movs(d2,HOST_TEMPREG);
5343 emit_jeq((int)out+52); // Division by zero
82336ba3 5344 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 5345#ifdef HAVE_ARMV5
57871462 5346 emit_clz(HOST_TEMPREG,quotient);
5347 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 5348#else
5349 emit_movimm(0,quotient);
5350 emit_addpl_imm(quotient,1,quotient);
5351 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
5352 emit_jns((int)out-2*4);
5353#endif
57871462 5354 emit_orimm(quotient,1<<31,quotient);
5355 emit_shr(quotient,quotient,quotient);
5356 emit_cmp(remainder,HOST_TEMPREG);
5357 emit_subcs(remainder,HOST_TEMPREG,remainder);
5358 emit_adcs(quotient,quotient,quotient);
5359 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
5360 emit_jcc((int)out-16); // -4
5361 emit_teq(d1,d2);
5362 emit_negmi(quotient,quotient);
5363 emit_test(d1,d1);
5364 emit_negmi(remainder,remainder);
5365 }
5366 if(opcode2[i]==0x1B) // DIVU
5367 {
5368 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
5369 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
5370 assert(d1>=0);
5371 assert(d2>=0);
5372 signed char quotient=get_reg(i_regs->regmap,LOREG);
5373 signed char remainder=get_reg(i_regs->regmap,HIREG);
5374 assert(quotient>=0);
5375 assert(remainder>=0);
44a80f6a 5376 emit_mov(d1,remainder);
5377 emit_movimm(0xffffffff,quotient); // div0 case
57871462 5378 emit_test(d2,d2);
44a80f6a 5379 emit_jeq((int)out+40); // Division by zero
665f33e1 5380#ifdef HAVE_ARMV5
57871462 5381 emit_clz(d2,HOST_TEMPREG);
5382 emit_movimm(1<<31,quotient);
5383 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 5384#else
5385 emit_movimm(0,HOST_TEMPREG);
82336ba3 5386 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
5387 emit_lslpls_imm(d2,1,d2);
665f33e1 5388 emit_jns((int)out-2*4);
5389 emit_movimm(1<<31,quotient);
5390#endif
57871462 5391 emit_shr(quotient,HOST_TEMPREG,quotient);
5392 emit_cmp(remainder,d2);
5393 emit_subcs(remainder,d2,remainder);
5394 emit_adcs(quotient,quotient,quotient);
5395 emit_shrcc_imm(d2,1,d2);
5396 emit_jcc((int)out-16); // -4
5397 }
5398 }
5399 else // 64-bit
4600ba03 5400#ifndef FORCE32
57871462 5401 {
5402 if(opcode2[i]==0x1C) // DMULT
5403 {
5404 assert(opcode2[i]!=0x1C);
5405 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5406 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5407 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5408 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5409 assert(m1h>=0);
5410 assert(m2h>=0);
5411 assert(m1l>=0);
5412 assert(m2l>=0);
5413 emit_pushreg(m2h);
5414 emit_pushreg(m2l);
5415 emit_pushreg(m1h);
5416 emit_pushreg(m1l);
5417 emit_call((int)&mult64);
5418 emit_popreg(m1l);
5419 emit_popreg(m1h);
5420 emit_popreg(m2l);
5421 emit_popreg(m2h);
5422 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5423 signed char hil=get_reg(i_regs->regmap,HIREG);
5424 if(hih>=0) emit_loadreg(HIREG|64,hih);
5425 if(hil>=0) emit_loadreg(HIREG,hil);
5426 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5427 signed char lol=get_reg(i_regs->regmap,LOREG);
5428 if(loh>=0) emit_loadreg(LOREG|64,loh);
5429 if(lol>=0) emit_loadreg(LOREG,lol);
5430 }
5431 if(opcode2[i]==0x1D) // DMULTU
5432 {
5433 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5434 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5435 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5436 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5437 assert(m1h>=0);
5438 assert(m2h>=0);
5439 assert(m1l>=0);
5440 assert(m2l>=0);
4d646738 5441 save_regs(CALLER_SAVE_REGS);
57871462 5442 if(m1l!=0) emit_mov(m1l,0);
5443 if(m1h==0) emit_readword((int)&dynarec_local,1);
5444 else if(m1h>1) emit_mov(m1h,1);
5445 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
5446 else if(m2l>2) emit_mov(m2l,2);
5447 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
5448 else if(m2h>3) emit_mov(m2h,3);
5449 emit_call((int)&multu64);
4d646738 5450 restore_regs(CALLER_SAVE_REGS);
57871462 5451 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5452 signed char hil=get_reg(i_regs->regmap,HIREG);
5453 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5454 signed char lol=get_reg(i_regs->regmap,LOREG);
5455 /*signed char temp=get_reg(i_regs->regmap,-1);
5456 signed char rh=get_reg(i_regs->regmap,HIREG|64);
5457 signed char rl=get_reg(i_regs->regmap,HIREG);
5458 assert(m1h>=0);
5459 assert(m2h>=0);
5460 assert(m1l>=0);
5461 assert(m2l>=0);
5462 assert(temp>=0);
5463 //emit_mov(m1l,EAX);
5464 //emit_mul(m2l);
5465 emit_umull(rl,rh,m1l,m2l);
5466 emit_storereg(LOREG,rl);
5467 emit_mov(rh,temp);
5468 //emit_mov(m1h,EAX);
5469 //emit_mul(m2l);
5470 emit_umull(rl,rh,m1h,m2l);
5471 emit_adds(rl,temp,temp);
5472 emit_adcimm(rh,0,rh);
5473 emit_storereg(HIREG,rh);
5474 //emit_mov(m2h,EAX);
5475 //emit_mul(m1l);
5476 emit_umull(rl,rh,m1l,m2h);
5477 emit_adds(rl,temp,temp);
5478 emit_adcimm(rh,0,rh);
5479 emit_storereg(LOREG|64,temp);
5480 emit_mov(rh,temp);
5481 //emit_mov(m2h,EAX);
5482 //emit_mul(m1h);
5483 emit_umull(rl,rh,m1h,m2h);
5484 emit_adds(rl,temp,rl);
5485 emit_loadreg(HIREG,temp);
5486 emit_adcimm(rh,0,rh);
5487 emit_adds(rl,temp,rl);
5488 emit_adcimm(rh,0,rh);
5489 // DEBUG
5490 /*
5491 emit_pushreg(m2h);
5492 emit_pushreg(m2l);
5493 emit_pushreg(m1h);
5494 emit_pushreg(m1l);
5495 emit_call((int)&multu64);
5496 emit_popreg(m1l);
5497 emit_popreg(m1h);
5498 emit_popreg(m2l);
5499 emit_popreg(m2h);
5500 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5501 signed char hil=get_reg(i_regs->regmap,HIREG);
5502 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
5503 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
5504 */
5505 // Shouldn't be necessary
5506 //char loh=get_reg(i_regs->regmap,LOREG|64);
5507 //char lol=get_reg(i_regs->regmap,LOREG);
5508 //if(loh>=0) emit_loadreg(LOREG|64,loh);
5509 //if(lol>=0) emit_loadreg(LOREG,lol);
5510 }
5511 if(opcode2[i]==0x1E) // DDIV
5512 {
5513 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5514 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5515 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5516 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5517 assert(d1h>=0);
5518 assert(d2h>=0);
5519 assert(d1l>=0);
5520 assert(d2l>=0);
4d646738 5521 save_regs(CALLER_SAVE_REGS);
57871462 5522 if(d1l!=0) emit_mov(d1l,0);
5523 if(d1h==0) emit_readword((int)&dynarec_local,1);
5524 else if(d1h>1) emit_mov(d1h,1);
5525 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5526 else if(d2l>2) emit_mov(d2l,2);
5527 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5528 else if(d2h>3) emit_mov(d2h,3);
5529 emit_call((int)&div64);
4d646738 5530 restore_regs(CALLER_SAVE_REGS);
57871462 5531 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5532 signed char hil=get_reg(i_regs->regmap,HIREG);
5533 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5534 signed char lol=get_reg(i_regs->regmap,LOREG);
5535 if(hih>=0) emit_loadreg(HIREG|64,hih);
5536 if(hil>=0) emit_loadreg(HIREG,hil);
5537 if(loh>=0) emit_loadreg(LOREG|64,loh);
5538 if(lol>=0) emit_loadreg(LOREG,lol);
5539 }
5540 if(opcode2[i]==0x1F) // DDIVU
5541 {
5542 //u_int hr,reglist=0;
5543 //for(hr=0;hr<HOST_REGS;hr++) {
5544 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
5545 //}
5546 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5547 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5548 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5549 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5550 assert(d1h>=0);
5551 assert(d2h>=0);
5552 assert(d1l>=0);
5553 assert(d2l>=0);
4d646738 5554 save_regs(CALLER_SAVE_REGS);
57871462 5555 if(d1l!=0) emit_mov(d1l,0);
5556 if(d1h==0) emit_readword((int)&dynarec_local,1);
5557 else if(d1h>1) emit_mov(d1h,1);
5558 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5559 else if(d2l>2) emit_mov(d2l,2);
5560 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5561 else if(d2h>3) emit_mov(d2h,3);
5562 emit_call((int)&divu64);
4d646738 5563 restore_regs(CALLER_SAVE_REGS);
57871462 5564 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5565 signed char hil=get_reg(i_regs->regmap,HIREG);
5566 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5567 signed char lol=get_reg(i_regs->regmap,LOREG);
5568 if(hih>=0) emit_loadreg(HIREG|64,hih);
5569 if(hil>=0) emit_loadreg(HIREG,hil);
5570 if(loh>=0) emit_loadreg(LOREG|64,loh);
5571 if(lol>=0) emit_loadreg(LOREG,lol);
5572 }
5573 }
4600ba03 5574#else
5575 assert(0);
5576#endif
57871462 5577 }
5578 else
5579 {
5580 // Multiply by zero is zero.
5581 // MIPS does not have a divide by zero exception.
5582 // The result is undefined, we return zero.
5583 signed char hr=get_reg(i_regs->regmap,HIREG);
5584 signed char lr=get_reg(i_regs->regmap,LOREG);
5585 if(hr>=0) emit_zeroreg(hr);
5586 if(lr>=0) emit_zeroreg(lr);
5587 }
5588}
5589#define multdiv_assemble multdiv_assemble_arm
5590
5591void do_preload_rhash(int r) {
5592 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
5593 // register. On ARM the hash can be done with a single instruction (below)
5594}
5595
5596void do_preload_rhtbl(int ht) {
5597 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
5598}
5599
5600void do_rhash(int rs,int rh) {
5601 emit_andimm(rs,0xf8,rh);
5602}
5603
5604void do_miniht_load(int ht,int rh) {
5605 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
5606 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
5607}
5608
5609void do_miniht_jump(int rs,int rh,int ht) {
5610 emit_cmp(rh,rs);
5611 emit_ldreq_indexed(ht,4,15);
5612 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5613 emit_mov(rs,7);
5614 emit_jmp(jump_vaddr_reg[7]);
5615 #else
5616 emit_jmp(jump_vaddr_reg[rs]);
5617 #endif
5618}
5619
5620void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 5621 #ifndef HAVE_ARMV7
57871462 5622 emit_movimm(return_address,rt); // PC into link register
5623 add_to_linker((int)out,return_address,1);
5624 emit_pcreladdr(temp);
5625 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5626 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5627 #else
5628 emit_movw(return_address&0x0000FFFF,rt);
5629 add_to_linker((int)out,return_address,1);
5630 emit_pcreladdr(temp);
5631 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5632 emit_movt(return_address&0xFFFF0000,rt);
5633 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5634 #endif
5635}
5636
5637// Sign-extend to 64 bits and write out upper half of a register
5638// This is useful where we have a 32-bit value in a register, and want to
5639// keep it in a 32-bit register, but can't guarantee that it won't be read
5640// as a 64-bit value later.
5641void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
5642{
24385cae 5643#ifndef FORCE32
57871462 5644 if(is32_pre==is32) return;
5645 int hr,reg;
5646 for(hr=0;hr<HOST_REGS;hr++) {
5647 if(hr!=EXCLUDE_REG) {
5648 //if(pre[hr]==entry[hr]) {
5649 if((reg=pre[hr])>=0) {
5650 if((dirty>>hr)&1) {
5651 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
5652 emit_sarimm(hr,31,HOST_TEMPREG);
5653 emit_storereg(reg|64,HOST_TEMPREG);
5654 }
5655 }
5656 }
5657 //}
5658 }
5659 }
24385cae 5660#endif
57871462 5661}
5662
5663void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
5664{
5665 //if(dirty_pre==dirty) return;
5666 int hr,reg,new_hr;
5667 for(hr=0;hr<HOST_REGS;hr++) {
5668 if(hr!=EXCLUDE_REG) {
5669 reg=pre[hr];
5670 if(((~u)>>(reg&63))&1) {
f776eb14 5671 if(reg>0) {
57871462 5672 if(((dirty_pre&~dirty)>>hr)&1) {
5673 if(reg>0&&reg<34) {
5674 emit_storereg(reg,hr);
5675 if( ((is32_pre&~uu)>>reg)&1 ) {
5676 emit_sarimm(hr,31,HOST_TEMPREG);
5677 emit_storereg(reg|64,HOST_TEMPREG);
5678 }
5679 }
5680 else if(reg>=64) {
5681 emit_storereg(reg,hr);
5682 }
5683 }
5684 }
57871462 5685 }
5686 }
5687 }
5688}
5689
5690
5691/* using strd could possibly help but you'd have to allocate registers in pairs
5692void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
5693{
5694 int hr;
5695 int wrote=-1;
5696 for(hr=HOST_REGS-1;hr>=0;hr--) {
5697 if(hr!=EXCLUDE_REG) {
5698 if(pre[hr]!=entry[hr]) {
5699 if(pre[hr]>=0) {
5700 if((dirty>>hr)&1) {
5701 if(get_reg(entry,pre[hr])<0) {
5702 if(pre[hr]<64) {
5703 if(!((u>>pre[hr])&1)) {
5704 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
5705 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5706 emit_sarimm(hr,31,hr+1);
5707 emit_strdreg(pre[hr],hr);
5708 }
5709 else
5710 emit_storereg(pre[hr],hr);
5711 }else{
5712 emit_storereg(pre[hr],hr);
5713 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5714 emit_sarimm(hr,31,hr);
5715 emit_storereg(pre[hr]|64,hr);
5716 }
5717 }
5718 }
5719 }else{
5720 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
5721 emit_storereg(pre[hr],hr);
5722 }
5723 }
5724 wrote=hr;
5725 }
5726 }
5727 }
5728 }
5729 }
5730 }
5731 for(hr=0;hr<HOST_REGS;hr++) {
5732 if(hr!=EXCLUDE_REG) {
5733 if(pre[hr]!=entry[hr]) {
5734 if(pre[hr]>=0) {
5735 int nr;
5736 if((nr=get_reg(entry,pre[hr]))>=0) {
5737 emit_mov(hr,nr);
5738 }
5739 }
5740 }
5741 }
5742 }
5743}
5744#define wb_invalidate wb_invalidate_arm
5745*/
5746
dd3a91a1 5747// Clearing the cache is rather slow on ARM Linux, so mark the areas
5748// that need to be cleared, and then only clear these areas once.
5749void do_clear_cache()
5750{
5751 int i,j;
5752 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
5753 {
5754 u_int bitmap=needs_clear_cache[i];
5755 if(bitmap) {
5756 u_int start,end;
5757 for(j=0;j<32;j++)
5758 {
5759 if(bitmap&(1<<j)) {
bdeade46 5760 start=(u_int)BASE_ADDR+i*131072+j*4096;
dd3a91a1 5761 end=start+4095;
5762 j++;
5763 while(j<32) {
5764 if(bitmap&(1<<j)) {
5765 end+=4096;
5766 j++;
5767 }else{
5768 __clear_cache((void *)start,(void *)end);
5769 break;
5770 }
5771 }
5772 }
5773 }
5774 needs_clear_cache[i]=0;
5775 }
5776 }
5777}
5778
57871462 5779// CPU-architecture-specific initialization
5780void arch_init() {
3d624f89 5781#ifndef DISABLE_COP1
57871462 5782 rounding_modes[0]=0x0<<22; // round
5783 rounding_modes[1]=0x3<<22; // trunc
5784 rounding_modes[2]=0x1<<22; // ceil
5785 rounding_modes[3]=0x2<<22; // floor
3d624f89 5786#endif
57871462 5787}
b9b61529 5788
5789// vim:shiftwidth=2:expandtab