spu: rework synchronization
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
054175e9 22#ifdef PCSX
6c0eefaf 23#include "../gte.h"
24#define FLAGLESS
25#include "../gte.h"
26#undef FLAGLESS
054175e9 27#include "../gte_arm.h"
28#include "../gte_neon.h"
29#include "pcnt.h"
30#endif
665f33e1 31#include "arm_features.h"
054175e9 32
a327ad27 33#if !BASE_ADDR_FIXED
bdeade46 34char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
35#endif
36
4d646738 37#ifndef __MACH__
38#define CALLER_SAVE_REGS 0x100f
39#else
40#define CALLER_SAVE_REGS 0x120f
41#endif
42
57871462 43extern int cycle_count;
44extern int last_count;
45extern int pcaddr;
46extern int pending_exception;
47extern int branch_target;
48extern uint64_t readmem_dword;
3d624f89 49#ifdef MUPEN64
57871462 50extern precomp_instr fake_pc;
3d624f89 51#endif
57871462 52extern void *dynarec_local;
53extern u_int memory_map[1048576];
54extern u_int mini_ht[32][2];
55extern u_int rounding_modes[4];
56
57void indirect_jump_indexed();
58void indirect_jump();
59void do_interrupt();
60void jump_vaddr_r0();
61void jump_vaddr_r1();
62void jump_vaddr_r2();
63void jump_vaddr_r3();
64void jump_vaddr_r4();
65void jump_vaddr_r5();
66void jump_vaddr_r6();
67void jump_vaddr_r7();
68void jump_vaddr_r8();
69void jump_vaddr_r9();
70void jump_vaddr_r10();
71void jump_vaddr_r12();
72
73const u_int jump_vaddr_reg[16] = {
74 (int)jump_vaddr_r0,
75 (int)jump_vaddr_r1,
76 (int)jump_vaddr_r2,
77 (int)jump_vaddr_r3,
78 (int)jump_vaddr_r4,
79 (int)jump_vaddr_r5,
80 (int)jump_vaddr_r6,
81 (int)jump_vaddr_r7,
82 (int)jump_vaddr_r8,
83 (int)jump_vaddr_r9,
84 (int)jump_vaddr_r10,
85 0,
86 (int)jump_vaddr_r12,
87 0,
88 0,
89 0};
90
0bbd1454 91void invalidate_addr_r0();
92void invalidate_addr_r1();
93void invalidate_addr_r2();
94void invalidate_addr_r3();
95void invalidate_addr_r4();
96void invalidate_addr_r5();
97void invalidate_addr_r6();
98void invalidate_addr_r7();
99void invalidate_addr_r8();
100void invalidate_addr_r9();
101void invalidate_addr_r10();
102void invalidate_addr_r12();
103
104const u_int invalidate_addr_reg[16] = {
105 (int)invalidate_addr_r0,
106 (int)invalidate_addr_r1,
107 (int)invalidate_addr_r2,
108 (int)invalidate_addr_r3,
109 (int)invalidate_addr_r4,
110 (int)invalidate_addr_r5,
111 (int)invalidate_addr_r6,
112 (int)invalidate_addr_r7,
113 (int)invalidate_addr_r8,
114 (int)invalidate_addr_r9,
115 (int)invalidate_addr_r10,
116 0,
117 (int)invalidate_addr_r12,
118 0,
119 0,
120 0};
121
57871462 122#include "fpu.h"
123
dd3a91a1 124unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
125
57871462 126/* Linker */
127
128void set_jump_target(int addr,u_int target)
129{
130 u_char *ptr=(u_char *)addr;
131 u_int *ptr2=(u_int *)ptr;
132 if(ptr[3]==0xe2) {
133 assert((target-(u_int)ptr2-8)<1024);
134 assert((addr&3)==0);
135 assert((target&3)==0);
136 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
137 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
138 }
139 else if(ptr[3]==0x72) {
140 // generated by emit_jno_unlikely
141 if((target-(u_int)ptr2-8)<1024) {
142 assert((addr&3)==0);
143 assert((target&3)==0);
144 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
145 }
146 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
147 assert((addr&3)==0);
148 assert((target&3)==0);
149 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
150 }
151 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
152 }
153 else {
154 assert((ptr[3]&0x0e)==0xa);
155 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
156 }
157}
158
159// This optionally copies the instruction from the target of the branch into
160// the space before the branch. Works, but the difference in speed is
161// usually insignificant.
162void set_jump_target_fillslot(int addr,u_int target,int copy)
163{
164 u_char *ptr=(u_char *)addr;
165 u_int *ptr2=(u_int *)ptr;
166 assert(!copy||ptr2[-1]==0xe28dd000);
167 if(ptr[3]==0xe2) {
168 assert(!copy);
169 assert((target-(u_int)ptr2-8)<4096);
170 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
171 }
172 else {
173 assert((ptr[3]&0x0e)==0xa);
174 u_int target_insn=*(u_int *)target;
175 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
176 copy=0;
177 }
178 if((target_insn&0x0c100000)==0x04100000) { // Load
179 copy=0;
180 }
181 if(target_insn&0x08000000) {
182 copy=0;
183 }
184 if(copy) {
185 ptr2[-1]=target_insn;
186 target+=4;
187 }
188 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
189 }
190}
191
192/* Literal pool */
193add_literal(int addr,int val)
194{
15776b68 195 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 196 literals[literalcount][0]=addr;
197 literals[literalcount][1]=val;
198 literalcount++;
199}
200
f76eeef9 201void *kill_pointer(void *stub)
57871462 202{
203 int *ptr=(int *)(stub+4);
204 assert((*ptr&0x0ff00000)==0x05900000);
205 u_int offset=*ptr&0xfff;
206 int **l_ptr=(void *)ptr+offset+8;
207 int *i_ptr=*l_ptr;
208 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 209 return i_ptr;
57871462 210}
211
f968d35d 212// find where external branch is liked to using addr of it's stub:
213// get address that insn one after stub loads (dyna_linker arg1),
214// treat it as a pointer to branch insn,
215// return addr where that branch jumps to
57871462 216int get_pointer(void *stub)
217{
218 //printf("get_pointer(%x)\n",(int)stub);
219 int *ptr=(int *)(stub+4);
f968d35d 220 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 221 u_int offset=*ptr&0xfff;
222 int **l_ptr=(void *)ptr+offset+8;
223 int *i_ptr=*l_ptr;
224 assert((*i_ptr&0x0f000000)==0x0a000000);
225 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
226}
227
228// Find the "clean" entry point from a "dirty" entry point
229// by skipping past the call to verify_code
230u_int get_clean_addr(int addr)
231{
232 int *ptr=(int *)addr;
665f33e1 233 #ifndef HAVE_ARMV7
57871462 234 ptr+=4;
235 #else
236 ptr+=6;
237 #endif
238 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
239 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
240 ptr++;
241 if((*ptr&0xFF000000)==0xea000000) {
242 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
243 }
244 return (u_int)ptr;
245}
246
247int verify_dirty(int addr)
248{
249 u_int *ptr=(u_int *)addr;
665f33e1 250 #ifndef HAVE_ARMV7
57871462 251 // get from literal pool
15776b68 252 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 253 u_int offset=*ptr&0xfff;
254 u_int *l_ptr=(void *)ptr+offset+8;
255 u_int source=l_ptr[0];
256 u_int copy=l_ptr[1];
257 u_int len=l_ptr[2];
258 ptr+=4;
259 #else
260 // ARMv7 movw/movt
261 assert((*ptr&0xFFF00000)==0xe3000000);
262 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
263 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
264 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
265 ptr+=6;
266 #endif
267 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
268 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 269#ifndef DISABLE_TLB
cfcba99a 270 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 271 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
272 unsigned int page=source>>12;
273 unsigned int map_value=memory_map[page];
274 if(map_value>=0x80000000) return 0;
275 while(page<((source+len-1)>>12)) {
276 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
277 }
278 source = source+(map_value<<2);
279 }
63cb0298 280#endif
57871462 281 //printf("verify_dirty: %x %x %x\n",source,copy,len);
282 return !memcmp((void *)source,(void *)copy,len);
283}
284
285// This doesn't necessarily find all clean entry points, just
286// guarantees that it's not dirty
287int isclean(int addr)
288{
665f33e1 289 #ifndef HAVE_ARMV7
57871462 290 int *ptr=((u_int *)addr)+4;
291 #else
292 int *ptr=((u_int *)addr)+6;
293 #endif
294 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
295 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
296 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
297 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
298 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
299 return 1;
300}
301
4a35de07 302// get source that block at addr was compiled from (host pointers)
57871462 303void get_bounds(int addr,u_int *start,u_int *end)
304{
305 u_int *ptr=(u_int *)addr;
665f33e1 306 #ifndef HAVE_ARMV7
57871462 307 // get from literal pool
15776b68 308 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 309 u_int offset=*ptr&0xfff;
310 u_int *l_ptr=(void *)ptr+offset+8;
311 u_int source=l_ptr[0];
312 //u_int copy=l_ptr[1];
313 u_int len=l_ptr[2];
314 ptr+=4;
315 #else
316 // ARMv7 movw/movt
317 assert((*ptr&0xFFF00000)==0xe3000000);
318 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
319 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
320 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
321 ptr+=6;
322 #endif
323 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
324 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 325#ifndef DISABLE_TLB
cfcba99a 326 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 327 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
328 if(memory_map[source>>12]>=0x80000000) source = 0;
329 else source = source+(memory_map[source>>12]<<2);
330 }
63cb0298 331#endif
57871462 332 *start=source;
333 *end=source+len;
334}
335
336/* Register allocation */
337
338// Note: registers are allocated clean (unmodified state)
339// if you intend to modify the register, you must call dirty_reg().
340void alloc_reg(struct regstat *cur,int i,signed char reg)
341{
342 int r,hr;
343 int preferred_reg = (reg&7);
344 if(reg==CCREG) preferred_reg=HOST_CCREG;
345 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
346
347 // Don't allocate unused registers
348 if((cur->u>>reg)&1) return;
349
350 // see if it's already allocated
351 for(hr=0;hr<HOST_REGS;hr++)
352 {
353 if(cur->regmap[hr]==reg) return;
354 }
355
356 // Keep the same mapping if the register was already allocated in a loop
357 preferred_reg = loop_reg(i,reg,preferred_reg);
358
359 // Try to allocate the preferred register
360 if(cur->regmap[preferred_reg]==-1) {
361 cur->regmap[preferred_reg]=reg;
362 cur->dirty&=~(1<<preferred_reg);
363 cur->isconst&=~(1<<preferred_reg);
364 return;
365 }
366 r=cur->regmap[preferred_reg];
367 if(r<64&&((cur->u>>r)&1)) {
368 cur->regmap[preferred_reg]=reg;
369 cur->dirty&=~(1<<preferred_reg);
370 cur->isconst&=~(1<<preferred_reg);
371 return;
372 }
373 if(r>=64&&((cur->uu>>(r&63))&1)) {
374 cur->regmap[preferred_reg]=reg;
375 cur->dirty&=~(1<<preferred_reg);
376 cur->isconst&=~(1<<preferred_reg);
377 return;
378 }
379
380 // Clear any unneeded registers
381 // We try to keep the mapping consistent, if possible, because it
382 // makes branches easier (especially loops). So we try to allocate
383 // first (see above) before removing old mappings. If this is not
384 // possible then go ahead and clear out the registers that are no
385 // longer needed.
386 for(hr=0;hr<HOST_REGS;hr++)
387 {
388 r=cur->regmap[hr];
389 if(r>=0) {
390 if(r<64) {
391 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
392 }
393 else
394 {
395 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
396 }
397 }
398 }
399 // Try to allocate any available register, but prefer
400 // registers that have not been used recently.
401 if(i>0) {
402 for(hr=0;hr<HOST_REGS;hr++) {
403 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
404 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
405 cur->regmap[hr]=reg;
406 cur->dirty&=~(1<<hr);
407 cur->isconst&=~(1<<hr);
408 return;
409 }
410 }
411 }
412 }
413 // Try to allocate any available register
414 for(hr=0;hr<HOST_REGS;hr++) {
415 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
416 cur->regmap[hr]=reg;
417 cur->dirty&=~(1<<hr);
418 cur->isconst&=~(1<<hr);
419 return;
420 }
421 }
422
423 // Ok, now we have to evict someone
424 // Pick a register we hopefully won't need soon
425 u_char hsn[MAXREG+1];
426 memset(hsn,10,sizeof(hsn));
427 int j;
428 lsn(hsn,i,&preferred_reg);
429 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
430 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
431 if(i>0) {
432 // Don't evict the cycle count at entry points, otherwise the entry
433 // stub will have to write it.
434 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
435 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
436 for(j=10;j>=3;j--)
437 {
438 // Alloc preferred register if available
439 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
440 for(hr=0;hr<HOST_REGS;hr++) {
441 // Evict both parts of a 64-bit register
442 if((cur->regmap[hr]&63)==r) {
443 cur->regmap[hr]=-1;
444 cur->dirty&=~(1<<hr);
445 cur->isconst&=~(1<<hr);
446 }
447 }
448 cur->regmap[preferred_reg]=reg;
449 return;
450 }
451 for(r=1;r<=MAXREG;r++)
452 {
453 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
454 for(hr=0;hr<HOST_REGS;hr++) {
455 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
456 if(cur->regmap[hr]==r+64) {
457 cur->regmap[hr]=reg;
458 cur->dirty&=~(1<<hr);
459 cur->isconst&=~(1<<hr);
460 return;
461 }
462 }
463 }
464 for(hr=0;hr<HOST_REGS;hr++) {
465 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
466 if(cur->regmap[hr]==r) {
467 cur->regmap[hr]=reg;
468 cur->dirty&=~(1<<hr);
469 cur->isconst&=~(1<<hr);
470 return;
471 }
472 }
473 }
474 }
475 }
476 }
477 }
478 for(j=10;j>=0;j--)
479 {
480 for(r=1;r<=MAXREG;r++)
481 {
482 if(hsn[r]==j) {
483 for(hr=0;hr<HOST_REGS;hr++) {
484 if(cur->regmap[hr]==r+64) {
485 cur->regmap[hr]=reg;
486 cur->dirty&=~(1<<hr);
487 cur->isconst&=~(1<<hr);
488 return;
489 }
490 }
491 for(hr=0;hr<HOST_REGS;hr++) {
492 if(cur->regmap[hr]==r) {
493 cur->regmap[hr]=reg;
494 cur->dirty&=~(1<<hr);
495 cur->isconst&=~(1<<hr);
496 return;
497 }
498 }
499 }
500 }
501 }
c43b5311 502 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
57871462 503}
504
505void alloc_reg64(struct regstat *cur,int i,signed char reg)
506{
507 int preferred_reg = 8+(reg&1);
508 int r,hr;
509
510 // allocate the lower 32 bits
511 alloc_reg(cur,i,reg);
512
513 // Don't allocate unused registers
514 if((cur->uu>>reg)&1) return;
515
516 // see if the upper half is already allocated
517 for(hr=0;hr<HOST_REGS;hr++)
518 {
519 if(cur->regmap[hr]==reg+64) return;
520 }
521
522 // Keep the same mapping if the register was already allocated in a loop
523 preferred_reg = loop_reg(i,reg,preferred_reg);
524
525 // Try to allocate the preferred register
526 if(cur->regmap[preferred_reg]==-1) {
527 cur->regmap[preferred_reg]=reg|64;
528 cur->dirty&=~(1<<preferred_reg);
529 cur->isconst&=~(1<<preferred_reg);
530 return;
531 }
532 r=cur->regmap[preferred_reg];
533 if(r<64&&((cur->u>>r)&1)) {
534 cur->regmap[preferred_reg]=reg|64;
535 cur->dirty&=~(1<<preferred_reg);
536 cur->isconst&=~(1<<preferred_reg);
537 return;
538 }
539 if(r>=64&&((cur->uu>>(r&63))&1)) {
540 cur->regmap[preferred_reg]=reg|64;
541 cur->dirty&=~(1<<preferred_reg);
542 cur->isconst&=~(1<<preferred_reg);
543 return;
544 }
545
546 // Clear any unneeded registers
547 // We try to keep the mapping consistent, if possible, because it
548 // makes branches easier (especially loops). So we try to allocate
549 // first (see above) before removing old mappings. If this is not
550 // possible then go ahead and clear out the registers that are no
551 // longer needed.
552 for(hr=HOST_REGS-1;hr>=0;hr--)
553 {
554 r=cur->regmap[hr];
555 if(r>=0) {
556 if(r<64) {
557 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
558 }
559 else
560 {
561 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
562 }
563 }
564 }
565 // Try to allocate any available register, but prefer
566 // registers that have not been used recently.
567 if(i>0) {
568 for(hr=0;hr<HOST_REGS;hr++) {
569 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
570 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
571 cur->regmap[hr]=reg|64;
572 cur->dirty&=~(1<<hr);
573 cur->isconst&=~(1<<hr);
574 return;
575 }
576 }
577 }
578 }
579 // Try to allocate any available register
580 for(hr=0;hr<HOST_REGS;hr++) {
581 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
582 cur->regmap[hr]=reg|64;
583 cur->dirty&=~(1<<hr);
584 cur->isconst&=~(1<<hr);
585 return;
586 }
587 }
588
589 // Ok, now we have to evict someone
590 // Pick a register we hopefully won't need soon
591 u_char hsn[MAXREG+1];
592 memset(hsn,10,sizeof(hsn));
593 int j;
594 lsn(hsn,i,&preferred_reg);
595 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
596 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
597 if(i>0) {
598 // Don't evict the cycle count at entry points, otherwise the entry
599 // stub will have to write it.
600 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
601 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
602 for(j=10;j>=3;j--)
603 {
604 // Alloc preferred register if available
605 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
606 for(hr=0;hr<HOST_REGS;hr++) {
607 // Evict both parts of a 64-bit register
608 if((cur->regmap[hr]&63)==r) {
609 cur->regmap[hr]=-1;
610 cur->dirty&=~(1<<hr);
611 cur->isconst&=~(1<<hr);
612 }
613 }
614 cur->regmap[preferred_reg]=reg|64;
615 return;
616 }
617 for(r=1;r<=MAXREG;r++)
618 {
619 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
620 for(hr=0;hr<HOST_REGS;hr++) {
621 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
622 if(cur->regmap[hr]==r+64) {
623 cur->regmap[hr]=reg|64;
624 cur->dirty&=~(1<<hr);
625 cur->isconst&=~(1<<hr);
626 return;
627 }
628 }
629 }
630 for(hr=0;hr<HOST_REGS;hr++) {
631 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
632 if(cur->regmap[hr]==r) {
633 cur->regmap[hr]=reg|64;
634 cur->dirty&=~(1<<hr);
635 cur->isconst&=~(1<<hr);
636 return;
637 }
638 }
639 }
640 }
641 }
642 }
643 }
644 for(j=10;j>=0;j--)
645 {
646 for(r=1;r<=MAXREG;r++)
647 {
648 if(hsn[r]==j) {
649 for(hr=0;hr<HOST_REGS;hr++) {
650 if(cur->regmap[hr]==r+64) {
651 cur->regmap[hr]=reg|64;
652 cur->dirty&=~(1<<hr);
653 cur->isconst&=~(1<<hr);
654 return;
655 }
656 }
657 for(hr=0;hr<HOST_REGS;hr++) {
658 if(cur->regmap[hr]==r) {
659 cur->regmap[hr]=reg|64;
660 cur->dirty&=~(1<<hr);
661 cur->isconst&=~(1<<hr);
662 return;
663 }
664 }
665 }
666 }
667 }
c43b5311 668 SysPrintf("This shouldn't happen");exit(1);
57871462 669}
670
671// Allocate a temporary register. This is done without regard to
672// dirty status or whether the register we request is on the unneeded list
673// Note: This will only allocate one register, even if called multiple times
674void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
675{
676 int r,hr;
677 int preferred_reg = -1;
678
679 // see if it's already allocated
680 for(hr=0;hr<HOST_REGS;hr++)
681 {
682 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
683 }
684
685 // Try to allocate any available register
686 for(hr=HOST_REGS-1;hr>=0;hr--) {
687 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
688 cur->regmap[hr]=reg;
689 cur->dirty&=~(1<<hr);
690 cur->isconst&=~(1<<hr);
691 return;
692 }
693 }
694
695 // Find an unneeded register
696 for(hr=HOST_REGS-1;hr>=0;hr--)
697 {
698 r=cur->regmap[hr];
699 if(r>=0) {
700 if(r<64) {
701 if((cur->u>>r)&1) {
702 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
703 cur->regmap[hr]=reg;
704 cur->dirty&=~(1<<hr);
705 cur->isconst&=~(1<<hr);
706 return;
707 }
708 }
709 }
710 else
711 {
712 if((cur->uu>>(r&63))&1) {
713 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
714 cur->regmap[hr]=reg;
715 cur->dirty&=~(1<<hr);
716 cur->isconst&=~(1<<hr);
717 return;
718 }
719 }
720 }
721 }
722 }
723
724 // Ok, now we have to evict someone
725 // Pick a register we hopefully won't need soon
726 // TODO: we might want to follow unconditional jumps here
727 // TODO: get rid of dupe code and make this into a function
728 u_char hsn[MAXREG+1];
729 memset(hsn,10,sizeof(hsn));
730 int j;
731 lsn(hsn,i,&preferred_reg);
732 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
733 if(i>0) {
734 // Don't evict the cycle count at entry points, otherwise the entry
735 // stub will have to write it.
736 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
737 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
738 for(j=10;j>=3;j--)
739 {
740 for(r=1;r<=MAXREG;r++)
741 {
742 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
743 for(hr=0;hr<HOST_REGS;hr++) {
744 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
745 if(cur->regmap[hr]==r+64) {
746 cur->regmap[hr]=reg;
747 cur->dirty&=~(1<<hr);
748 cur->isconst&=~(1<<hr);
749 return;
750 }
751 }
752 }
753 for(hr=0;hr<HOST_REGS;hr++) {
754 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
755 if(cur->regmap[hr]==r) {
756 cur->regmap[hr]=reg;
757 cur->dirty&=~(1<<hr);
758 cur->isconst&=~(1<<hr);
759 return;
760 }
761 }
762 }
763 }
764 }
765 }
766 }
767 for(j=10;j>=0;j--)
768 {
769 for(r=1;r<=MAXREG;r++)
770 {
771 if(hsn[r]==j) {
772 for(hr=0;hr<HOST_REGS;hr++) {
773 if(cur->regmap[hr]==r+64) {
774 cur->regmap[hr]=reg;
775 cur->dirty&=~(1<<hr);
776 cur->isconst&=~(1<<hr);
777 return;
778 }
779 }
780 for(hr=0;hr<HOST_REGS;hr++) {
781 if(cur->regmap[hr]==r) {
782 cur->regmap[hr]=reg;
783 cur->dirty&=~(1<<hr);
784 cur->isconst&=~(1<<hr);
785 return;
786 }
787 }
788 }
789 }
790 }
c43b5311 791 SysPrintf("This shouldn't happen");exit(1);
57871462 792}
793// Allocate a specific ARM register.
794void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
795{
796 int n;
f776eb14 797 int dirty=0;
57871462 798
799 // see if it's already allocated (and dealloc it)
800 for(n=0;n<HOST_REGS;n++)
801 {
f776eb14 802 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
803 dirty=(cur->dirty>>n)&1;
804 cur->regmap[n]=-1;
805 }
57871462 806 }
807
808 cur->regmap[hr]=reg;
809 cur->dirty&=~(1<<hr);
f776eb14 810 cur->dirty|=dirty<<hr;
57871462 811 cur->isconst&=~(1<<hr);
812}
813
814// Alloc cycle count into dedicated register
815alloc_cc(struct regstat *cur,int i)
816{
817 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
818}
819
820/* Special alloc */
821
822
823/* Assembler */
824
825char regname[16][4] = {
826 "r0",
827 "r1",
828 "r2",
829 "r3",
830 "r4",
831 "r5",
832 "r6",
833 "r7",
834 "r8",
835 "r9",
836 "r10",
837 "fp",
838 "r12",
839 "sp",
840 "lr",
841 "pc"};
842
843void output_byte(u_char byte)
844{
845 *(out++)=byte;
846}
847void output_modrm(u_char mod,u_char rm,u_char ext)
848{
849 assert(mod<4);
850 assert(rm<8);
851 assert(ext<8);
852 u_char byte=(mod<<6)|(ext<<3)|rm;
853 *(out++)=byte;
854}
855void output_sib(u_char scale,u_char index,u_char base)
856{
857 assert(scale<4);
858 assert(index<8);
859 assert(base<8);
860 u_char byte=(scale<<6)|(index<<3)|base;
861 *(out++)=byte;
862}
863void output_w32(u_int word)
864{
865 *((u_int *)out)=word;
866 out+=4;
867}
868u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
869{
870 assert(rd<16);
871 assert(rn<16);
872 assert(rm<16);
873 return((rn<<16)|(rd<<12)|rm);
874}
875u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
876{
877 assert(rd<16);
878 assert(rn<16);
879 assert(imm<256);
880 assert((shift&1)==0);
881 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
882}
883u_int genimm(u_int imm,u_int *encoded)
884{
c2e3bd42 885 *encoded=0;
886 if(imm==0) return 1;
57871462 887 int i=32;
888 while(i>0)
889 {
890 if(imm<256) {
891 *encoded=((i&30)<<7)|imm;
892 return 1;
893 }
894 imm=(imm>>2)|(imm<<30);i-=2;
895 }
896 return 0;
897}
cfbd3c6e 898void genimm_checked(u_int imm,u_int *encoded)
899{
900 u_int ret=genimm(imm,encoded);
901 assert(ret);
902}
57871462 903u_int genjmp(u_int addr)
904{
905 int offset=addr-(int)out-8;
e80343e2 906 if(offset<-33554432||offset>=33554432) {
907 if (addr>2) {
c43b5311 908 SysPrintf("genjmp: out of range: %08x\n", offset);
e80343e2 909 exit(1);
910 }
911 return 0;
912 }
57871462 913 return ((u_int)offset>>2)&0xffffff;
914}
915
916void emit_mov(int rs,int rt)
917{
918 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
919 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
920}
921
922void emit_movs(int rs,int rt)
923{
924 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
925 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
926}
927
928void emit_add(int rs1,int rs2,int rt)
929{
930 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
931 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
932}
933
934void emit_adds(int rs1,int rs2,int rt)
935{
936 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
937 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
938}
939
940void emit_adcs(int rs1,int rs2,int rt)
941{
942 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
943 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
944}
945
946void emit_sbc(int rs1,int rs2,int rt)
947{
948 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
949 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
950}
951
952void emit_sbcs(int rs1,int rs2,int rt)
953{
954 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
955 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
956}
957
958void emit_neg(int rs, int rt)
959{
960 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
961 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
962}
963
964void emit_negs(int rs, int rt)
965{
966 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
967 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
968}
969
970void emit_sub(int rs1,int rs2,int rt)
971{
972 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
973 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
974}
975
976void emit_subs(int rs1,int rs2,int rt)
977{
978 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
979 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
980}
981
982void emit_zeroreg(int rt)
983{
984 assem_debug("mov %s,#0\n",regname[rt]);
985 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
986}
987
790ee18e 988void emit_loadlp(u_int imm,u_int rt)
989{
990 add_literal((int)out,imm);
991 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
992 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
993}
994void emit_movw(u_int imm,u_int rt)
995{
996 assert(imm<65536);
997 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
998 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
999}
1000void emit_movt(u_int imm,u_int rt)
1001{
1002 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
1003 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
1004}
1005void emit_movimm(u_int imm,u_int rt)
1006{
1007 u_int armval;
1008 if(genimm(imm,&armval)) {
1009 assem_debug("mov %s,#%d\n",regname[rt],imm);
1010 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1011 }else if(genimm(~imm,&armval)) {
1012 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1013 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1014 }else if(imm<65536) {
665f33e1 1015 #ifndef HAVE_ARMV7
790ee18e 1016 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1017 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1018 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1019 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1020 #else
1021 emit_movw(imm,rt);
1022 #endif
1023 }else{
665f33e1 1024 #ifndef HAVE_ARMV7
790ee18e 1025 emit_loadlp(imm,rt);
1026 #else
1027 emit_movw(imm&0x0000FFFF,rt);
1028 emit_movt(imm&0xFFFF0000,rt);
1029 #endif
1030 }
1031}
1032void emit_pcreladdr(u_int rt)
1033{
1034 assem_debug("add %s,pc,#?\n",regname[rt]);
1035 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1036}
1037
57871462 1038void emit_loadreg(int r, int hr)
1039{
3d624f89 1040#ifdef FORCE32
1041 if(r&64) {
c43b5311 1042 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 1043 assert(0);
1044 return;
3d624f89 1045 }
1046#endif
57871462 1047 if((r&63)==0)
1048 emit_zeroreg(hr);
1049 else {
3d624f89 1050 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1051 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1052 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1053 if(r==CCREG) addr=(int)&cycle_count;
1054 if(r==CSREG) addr=(int)&Status;
1055 if(r==FSREG) addr=(int)&FCR31;
1056 if(r==INVCP) addr=(int)&invc_ptr;
1057 u_int offset = addr-(u_int)&dynarec_local;
1058 assert(offset<4096);
1059 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1060 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1061 }
1062}
1063void emit_storereg(int r, int hr)
1064{
3d624f89 1065#ifdef FORCE32
1066 if(r&64) {
c43b5311 1067 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 1068 assert(0);
1069 return;
3d624f89 1070 }
1071#endif
1072 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1073 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1074 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1075 if(r==CCREG) addr=(int)&cycle_count;
1076 if(r==FSREG) addr=(int)&FCR31;
1077 u_int offset = addr-(u_int)&dynarec_local;
1078 assert(offset<4096);
1079 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1080 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1081}
1082
1083void emit_test(int rs, int rt)
1084{
1085 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1086 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1087}
1088
1089void emit_testimm(int rs,int imm)
1090{
1091 u_int armval;
5a05d80c 1092 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1093 genimm_checked(imm,&armval);
57871462 1094 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1095}
1096
b9b61529 1097void emit_testeqimm(int rs,int imm)
1098{
1099 u_int armval;
1100 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1101 genimm_checked(imm,&armval);
b9b61529 1102 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1103}
1104
57871462 1105void emit_not(int rs,int rt)
1106{
1107 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1108 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1109}
1110
b9b61529 1111void emit_mvnmi(int rs,int rt)
1112{
1113 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1114 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1115}
1116
57871462 1117void emit_and(u_int rs1,u_int rs2,u_int rt)
1118{
1119 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1120 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1121}
1122
1123void emit_or(u_int rs1,u_int rs2,u_int rt)
1124{
1125 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1126 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1127}
1128void emit_or_and_set_flags(int rs1,int rs2,int rt)
1129{
1130 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1131 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1132}
1133
f70d384d 1134void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1135{
1136 assert(rs<16);
1137 assert(rt<16);
1138 assert(imm<32);
1139 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1140 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1141}
1142
576bbd8f 1143void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1144{
1145 assert(rs<16);
1146 assert(rt<16);
1147 assert(imm<32);
1148 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1149 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1150}
1151
57871462 1152void emit_xor(u_int rs1,u_int rs2,u_int rt)
1153{
1154 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1155 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1156}
1157
57871462 1158void emit_addimm(u_int rs,int imm,u_int rt)
1159{
1160 assert(rs<16);
1161 assert(rt<16);
1162 if(imm!=0) {
57871462 1163 u_int armval;
1164 if(genimm(imm,&armval)) {
1165 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1166 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1167 }else if(genimm(-imm,&armval)) {
8a0a8423 1168 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1169 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1170 }else if(imm<0) {
ffb0b9e0 1171 assert(imm>-65536);
57871462 1172 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1173 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1174 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1175 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1176 }else{
ffb0b9e0 1177 assert(imm<65536);
57871462 1178 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1179 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1180 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1181 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1182 }
1183 }
1184 else if(rs!=rt) emit_mov(rs,rt);
1185}
1186
1187void emit_addimm_and_set_flags(int imm,int rt)
1188{
1189 assert(imm>-65536&&imm<65536);
1190 u_int armval;
1191 if(genimm(imm,&armval)) {
1192 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1193 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1194 }else if(genimm(-imm,&armval)) {
1195 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1196 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1197 }else if(imm<0) {
1198 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1199 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1200 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1201 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1202 }else{
1203 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1204 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1205 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1206 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1207 }
1208}
1209void emit_addimm_no_flags(u_int imm,u_int rt)
1210{
1211 emit_addimm(rt,imm,rt);
1212}
1213
1214void emit_addnop(u_int r)
1215{
1216 assert(r<16);
1217 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1218 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1219}
1220
1221void emit_adcimm(u_int rs,int imm,u_int rt)
1222{
1223 u_int armval;
cfbd3c6e 1224 genimm_checked(imm,&armval);
57871462 1225 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1226 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1227}
1228/*void emit_sbcimm(int imm,u_int rt)
1229{
1230 u_int armval;
cfbd3c6e 1231 genimm_checked(imm,&armval);
57871462 1232 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1233 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1234}*/
1235void emit_sbbimm(int imm,u_int rt)
1236{
1237 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1238 assert(rt<8);
1239 if(imm<128&&imm>=-128) {
1240 output_byte(0x83);
1241 output_modrm(3,rt,3);
1242 output_byte(imm);
1243 }
1244 else
1245 {
1246 output_byte(0x81);
1247 output_modrm(3,rt,3);
1248 output_w32(imm);
1249 }
1250}
1251void emit_rscimm(int rs,int imm,u_int rt)
1252{
1253 assert(0);
1254 u_int armval;
cfbd3c6e 1255 genimm_checked(imm,&armval);
57871462 1256 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1257 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1258}
1259
1260void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1261{
1262 // TODO: if(genimm(imm,&armval)) ...
1263 // else
1264 emit_movimm(imm,HOST_TEMPREG);
1265 emit_adds(HOST_TEMPREG,rsl,rtl);
1266 emit_adcimm(rsh,0,rth);
1267}
1268
1269void emit_sbb(int rs1,int rs2)
1270{
1271 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1272 output_byte(0x19);
1273 output_modrm(3,rs1,rs2);
1274}
1275
1276void emit_andimm(int rs,int imm,int rt)
1277{
1278 u_int armval;
790ee18e 1279 if(imm==0) {
1280 emit_zeroreg(rt);
1281 }else if(genimm(imm,&armval)) {
57871462 1282 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1283 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1284 }else if(genimm(~imm,&armval)) {
1285 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1286 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1287 }else if(imm==65535) {
665f33e1 1288 #ifndef HAVE_ARMV7
57871462 1289 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1290 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1291 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1292 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1293 #else
1294 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1295 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1296 #endif
1297 }else{
1298 assert(imm>0&&imm<65535);
665f33e1 1299 #ifndef HAVE_ARMV7
57871462 1300 assem_debug("mov r14,#%d\n",imm&0xFF00);
1301 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1302 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1303 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1304 #else
1305 emit_movw(imm,HOST_TEMPREG);
1306 #endif
1307 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1308 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1309 }
1310}
1311
1312void emit_orimm(int rs,int imm,int rt)
1313{
1314 u_int armval;
790ee18e 1315 if(imm==0) {
1316 if(rs!=rt) emit_mov(rs,rt);
1317 }else if(genimm(imm,&armval)) {
57871462 1318 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1319 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1320 }else{
1321 assert(imm>0&&imm<65536);
1322 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1323 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1324 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1325 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1326 }
1327}
1328
1329void emit_xorimm(int rs,int imm,int rt)
1330{
57871462 1331 u_int armval;
790ee18e 1332 if(imm==0) {
1333 if(rs!=rt) emit_mov(rs,rt);
1334 }else if(genimm(imm,&armval)) {
57871462 1335 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1336 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1337 }else{
514ed0d9 1338 assert(imm>0&&imm<65536);
57871462 1339 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1340 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1341 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1342 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1343 }
1344}
1345
1346void emit_shlimm(int rs,u_int imm,int rt)
1347{
1348 assert(imm>0);
1349 assert(imm<32);
1350 //if(imm==1) ...
1351 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1352 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1353}
1354
c6c3b1b3 1355void emit_lsls_imm(int rs,int imm,int rt)
1356{
1357 assert(imm>0);
1358 assert(imm<32);
1359 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1360 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1361}
1362
665f33e1 1363void emit_lslpls_imm(int rs,int imm,int rt)
1364{
1365 assert(imm>0);
1366 assert(imm<32);
1367 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1368 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1369}
1370
57871462 1371void emit_shrimm(int rs,u_int imm,int rt)
1372{
1373 assert(imm>0);
1374 assert(imm<32);
1375 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1376 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1377}
1378
1379void emit_sarimm(int rs,u_int imm,int rt)
1380{
1381 assert(imm>0);
1382 assert(imm<32);
1383 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1384 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1385}
1386
1387void emit_rorimm(int rs,u_int imm,int rt)
1388{
1389 assert(imm>0);
1390 assert(imm<32);
1391 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1392 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1393}
1394
1395void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1396{
1397 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1398 assert(imm>0);
1399 assert(imm<32);
1400 //if(imm==1) ...
1401 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1402 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1403 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1404 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1405}
1406
1407void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1408{
1409 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1410 assert(imm>0);
1411 assert(imm<32);
1412 //if(imm==1) ...
1413 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1414 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1415 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1416 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1417}
1418
b9b61529 1419void emit_signextend16(int rs,int rt)
1420{
665f33e1 1421 #ifndef HAVE_ARMV7
b9b61529 1422 emit_shlimm(rs,16,rt);
1423 emit_sarimm(rt,16,rt);
1424 #else
1425 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1426 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1427 #endif
1428}
1429
c6c3b1b3 1430void emit_signextend8(int rs,int rt)
1431{
665f33e1 1432 #ifndef HAVE_ARMV7
c6c3b1b3 1433 emit_shlimm(rs,24,rt);
1434 emit_sarimm(rt,24,rt);
1435 #else
1436 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1437 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1438 #endif
1439}
1440
57871462 1441void emit_shl(u_int rs,u_int shift,u_int rt)
1442{
1443 assert(rs<16);
1444 assert(rt<16);
1445 assert(shift<16);
1446 //if(imm==1) ...
1447 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1448 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1449}
1450void emit_shr(u_int rs,u_int shift,u_int rt)
1451{
1452 assert(rs<16);
1453 assert(rt<16);
1454 assert(shift<16);
1455 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1456 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1457}
1458void emit_sar(u_int rs,u_int shift,u_int rt)
1459{
1460 assert(rs<16);
1461 assert(rt<16);
1462 assert(shift<16);
1463 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1464 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1465}
1466void emit_shlcl(int r)
1467{
1468 assem_debug("shl %%%s,%%cl\n",regname[r]);
1469 assert(0);
1470}
1471void emit_shrcl(int r)
1472{
1473 assem_debug("shr %%%s,%%cl\n",regname[r]);
1474 assert(0);
1475}
1476void emit_sarcl(int r)
1477{
1478 assem_debug("sar %%%s,%%cl\n",regname[r]);
1479 assert(0);
1480}
1481
1482void emit_shldcl(int r1,int r2)
1483{
1484 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1485 assert(0);
1486}
1487void emit_shrdcl(int r1,int r2)
1488{
1489 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1490 assert(0);
1491}
1492void emit_orrshl(u_int rs,u_int shift,u_int rt)
1493{
1494 assert(rs<16);
1495 assert(rt<16);
1496 assert(shift<16);
1497 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1498 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1499}
1500void emit_orrshr(u_int rs,u_int shift,u_int rt)
1501{
1502 assert(rs<16);
1503 assert(rt<16);
1504 assert(shift<16);
1505 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1506 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1507}
1508
1509void emit_cmpimm(int rs,int imm)
1510{
1511 u_int armval;
1512 if(genimm(imm,&armval)) {
5a05d80c 1513 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1514 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1515 }else if(genimm(-imm,&armval)) {
5a05d80c 1516 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1517 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1518 }else if(imm>0) {
1519 assert(imm<65536);
57871462 1520 emit_movimm(imm,HOST_TEMPREG);
57871462 1521 assem_debug("cmp %s,r14\n",regname[rs]);
1522 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1523 }else{
1524 assert(imm>-65536);
57871462 1525 emit_movimm(-imm,HOST_TEMPREG);
57871462 1526 assem_debug("cmn %s,r14\n",regname[rs]);
1527 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1528 }
1529}
1530
1531void emit_cmovne(u_int *addr,int rt)
1532{
1533 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1534 assert(0);
1535}
1536void emit_cmovl(u_int *addr,int rt)
1537{
1538 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1539 assert(0);
1540}
1541void emit_cmovs(u_int *addr,int rt)
1542{
1543 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1544 assert(0);
1545}
1546void emit_cmovne_imm(int imm,int rt)
1547{
1548 assem_debug("movne %s,#%d\n",regname[rt],imm);
1549 u_int armval;
cfbd3c6e 1550 genimm_checked(imm,&armval);
57871462 1551 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1552}
1553void emit_cmovl_imm(int imm,int rt)
1554{
1555 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1556 u_int armval;
cfbd3c6e 1557 genimm_checked(imm,&armval);
57871462 1558 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1559}
1560void emit_cmovb_imm(int imm,int rt)
1561{
1562 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1563 u_int armval;
cfbd3c6e 1564 genimm_checked(imm,&armval);
57871462 1565 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1566}
1567void emit_cmovs_imm(int imm,int rt)
1568{
1569 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1570 u_int armval;
cfbd3c6e 1571 genimm_checked(imm,&armval);
57871462 1572 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1573}
1574void emit_cmove_reg(int rs,int rt)
1575{
1576 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1577 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1578}
1579void emit_cmovne_reg(int rs,int rt)
1580{
1581 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1582 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1583}
1584void emit_cmovl_reg(int rs,int rt)
1585{
1586 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1587 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1588}
1589void emit_cmovs_reg(int rs,int rt)
1590{
1591 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1592 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1593}
1594
1595void emit_slti32(int rs,int imm,int rt)
1596{
1597 if(rs!=rt) emit_zeroreg(rt);
1598 emit_cmpimm(rs,imm);
1599 if(rs==rt) emit_movimm(0,rt);
1600 emit_cmovl_imm(1,rt);
1601}
1602void emit_sltiu32(int rs,int imm,int rt)
1603{
1604 if(rs!=rt) emit_zeroreg(rt);
1605 emit_cmpimm(rs,imm);
1606 if(rs==rt) emit_movimm(0,rt);
1607 emit_cmovb_imm(1,rt);
1608}
1609void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1610{
1611 assert(rsh!=rt);
1612 emit_slti32(rsl,imm,rt);
1613 if(imm>=0)
1614 {
1615 emit_test(rsh,rsh);
1616 emit_cmovne_imm(0,rt);
1617 emit_cmovs_imm(1,rt);
1618 }
1619 else
1620 {
1621 emit_cmpimm(rsh,-1);
1622 emit_cmovne_imm(0,rt);
1623 emit_cmovl_imm(1,rt);
1624 }
1625}
1626void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1627{
1628 assert(rsh!=rt);
1629 emit_sltiu32(rsl,imm,rt);
1630 if(imm>=0)
1631 {
1632 emit_test(rsh,rsh);
1633 emit_cmovne_imm(0,rt);
1634 }
1635 else
1636 {
1637 emit_cmpimm(rsh,-1);
1638 emit_cmovne_imm(1,rt);
1639 }
1640}
1641
1642void emit_cmp(int rs,int rt)
1643{
1644 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1645 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1646}
1647void emit_set_gz32(int rs, int rt)
1648{
1649 //assem_debug("set_gz32\n");
1650 emit_cmpimm(rs,1);
1651 emit_movimm(1,rt);
1652 emit_cmovl_imm(0,rt);
1653}
1654void emit_set_nz32(int rs, int rt)
1655{
1656 //assem_debug("set_nz32\n");
1657 if(rs!=rt) emit_movs(rs,rt);
1658 else emit_test(rs,rs);
1659 emit_cmovne_imm(1,rt);
1660}
1661void emit_set_gz64_32(int rsh, int rsl, int rt)
1662{
1663 //assem_debug("set_gz64\n");
1664 emit_set_gz32(rsl,rt);
1665 emit_test(rsh,rsh);
1666 emit_cmovne_imm(1,rt);
1667 emit_cmovs_imm(0,rt);
1668}
1669void emit_set_nz64_32(int rsh, int rsl, int rt)
1670{
1671 //assem_debug("set_nz64\n");
1672 emit_or_and_set_flags(rsh,rsl,rt);
1673 emit_cmovne_imm(1,rt);
1674}
1675void emit_set_if_less32(int rs1, int rs2, int rt)
1676{
1677 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1678 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1679 emit_cmp(rs1,rs2);
1680 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1681 emit_cmovl_imm(1,rt);
1682}
1683void emit_set_if_carry32(int rs1, int rs2, int rt)
1684{
1685 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1686 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1687 emit_cmp(rs1,rs2);
1688 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1689 emit_cmovb_imm(1,rt);
1690}
1691void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1692{
1693 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1694 assert(u1!=rt);
1695 assert(u2!=rt);
1696 emit_cmp(l1,l2);
1697 emit_movimm(0,rt);
1698 emit_sbcs(u1,u2,HOST_TEMPREG);
1699 emit_cmovl_imm(1,rt);
1700}
1701void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1702{
1703 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1704 assert(u1!=rt);
1705 assert(u2!=rt);
1706 emit_cmp(l1,l2);
1707 emit_movimm(0,rt);
1708 emit_sbcs(u1,u2,HOST_TEMPREG);
1709 emit_cmovb_imm(1,rt);
1710}
1711
1712void emit_call(int a)
1713{
1714 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1715 u_int offset=genjmp(a);
1716 output_w32(0xeb000000|offset);
1717}
1718void emit_jmp(int a)
1719{
1720 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1721 u_int offset=genjmp(a);
1722 output_w32(0xea000000|offset);
1723}
1724void emit_jne(int a)
1725{
1726 assem_debug("bne %x\n",a);
1727 u_int offset=genjmp(a);
1728 output_w32(0x1a000000|offset);
1729}
1730void emit_jeq(int a)
1731{
1732 assem_debug("beq %x\n",a);
1733 u_int offset=genjmp(a);
1734 output_w32(0x0a000000|offset);
1735}
1736void emit_js(int a)
1737{
1738 assem_debug("bmi %x\n",a);
1739 u_int offset=genjmp(a);
1740 output_w32(0x4a000000|offset);
1741}
1742void emit_jns(int a)
1743{
1744 assem_debug("bpl %x\n",a);
1745 u_int offset=genjmp(a);
1746 output_w32(0x5a000000|offset);
1747}
1748void emit_jl(int a)
1749{
1750 assem_debug("blt %x\n",a);
1751 u_int offset=genjmp(a);
1752 output_w32(0xba000000|offset);
1753}
1754void emit_jge(int a)
1755{
1756 assem_debug("bge %x\n",a);
1757 u_int offset=genjmp(a);
1758 output_w32(0xaa000000|offset);
1759}
1760void emit_jno(int a)
1761{
1762 assem_debug("bvc %x\n",a);
1763 u_int offset=genjmp(a);
1764 output_w32(0x7a000000|offset);
1765}
1766void emit_jc(int a)
1767{
1768 assem_debug("bcs %x\n",a);
1769 u_int offset=genjmp(a);
1770 output_w32(0x2a000000|offset);
1771}
1772void emit_jcc(int a)
1773{
1774 assem_debug("bcc %x\n",a);
1775 u_int offset=genjmp(a);
1776 output_w32(0x3a000000|offset);
1777}
1778
1779void emit_pushimm(int imm)
1780{
1781 assem_debug("push $%x\n",imm);
1782 assert(0);
1783}
1784void emit_pusha()
1785{
1786 assem_debug("pusha\n");
1787 assert(0);
1788}
1789void emit_popa()
1790{
1791 assem_debug("popa\n");
1792 assert(0);
1793}
1794void emit_pushreg(u_int r)
1795{
1796 assem_debug("push %%%s\n",regname[r]);
1797 assert(0);
1798}
1799void emit_popreg(u_int r)
1800{
1801 assem_debug("pop %%%s\n",regname[r]);
1802 assert(0);
1803}
1804void emit_callreg(u_int r)
1805{
c6c3b1b3 1806 assert(r<15);
1807 assem_debug("blx %s\n",regname[r]);
1808 output_w32(0xe12fff30|r);
57871462 1809}
1810void emit_jmpreg(u_int r)
1811{
1812 assem_debug("mov pc,%s\n",regname[r]);
1813 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1814}
1815
1816void emit_readword_indexed(int offset, int rs, int rt)
1817{
1818 assert(offset>-4096&&offset<4096);
1819 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1820 if(offset>=0) {
1821 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1822 }else{
1823 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1824 }
1825}
1826void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1827{
1828 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1829 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1830}
c6c3b1b3 1831void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1832{
1833 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1834 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1835}
1836void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1837{
1838 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1839 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1840}
1841void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1842{
1843 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1844 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1845}
1846void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1847{
1848 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1849 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1850}
1851void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1852{
1853 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1854 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1855}
57871462 1856void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1857{
1858 if(map<0) emit_readword_indexed(addr, rs, rt);
1859 else {
1860 assert(addr==0);
1861 emit_readword_dualindexedx4(rs, map, rt);
1862 }
1863}
1864void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1865{
1866 if(map<0) {
1867 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1868 emit_readword_indexed(addr+4, rs, rl);
1869 }else{
1870 assert(rh!=rs);
1871 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1872 emit_addimm(map,1,map);
1873 emit_readword_indexed_tlb(addr, rs, map, rl);
1874 }
1875}
1876void emit_movsbl_indexed(int offset, int rs, int rt)
1877{
1878 assert(offset>-256&&offset<256);
1879 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1880 if(offset>=0) {
1881 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1882 }else{
1883 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1884 }
1885}
1886void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1887{
1888 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1889 else {
1890 if(addr==0) {
1891 emit_shlimm(map,2,map);
1892 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1893 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1894 }else{
1895 assert(addr>-256&&addr<256);
1896 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1897 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1898 emit_movsbl_indexed(addr, rt, rt);
1899 }
1900 }
1901}
1902void emit_movswl_indexed(int offset, int rs, int rt)
1903{
1904 assert(offset>-256&&offset<256);
1905 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1906 if(offset>=0) {
1907 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1908 }else{
1909 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1910 }
1911}
1912void emit_movzbl_indexed(int offset, int rs, int rt)
1913{
1914 assert(offset>-4096&&offset<4096);
1915 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1916 if(offset>=0) {
1917 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1918 }else{
1919 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1920 }
1921}
1922void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1923{
1924 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1925 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1926}
1927void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1928{
1929 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1930 else {
1931 if(addr==0) {
1932 emit_movzbl_dualindexedx4(rs, map, rt);
1933 }else{
1934 emit_addimm(rs,addr,rt);
1935 emit_movzbl_dualindexedx4(rt, map, rt);
1936 }
1937 }
1938}
1939void emit_movzwl_indexed(int offset, int rs, int rt)
1940{
1941 assert(offset>-256&&offset<256);
1942 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1943 if(offset>=0) {
1944 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1945 }else{
1946 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1947 }
1948}
054175e9 1949static void emit_ldrd(int offset, int rs, int rt)
1950{
1951 assert(offset>-256&&offset<256);
1952 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1953 if(offset>=0) {
1954 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1955 }else{
1956 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1957 }
1958}
57871462 1959void emit_readword(int addr, int rt)
1960{
1961 u_int offset = addr-(u_int)&dynarec_local;
1962 assert(offset<4096);
1963 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1964 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1965}
1966void emit_movsbl(int addr, int rt)
1967{
1968 u_int offset = addr-(u_int)&dynarec_local;
1969 assert(offset<256);
1970 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1971 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1972}
1973void emit_movswl(int addr, int rt)
1974{
1975 u_int offset = addr-(u_int)&dynarec_local;
1976 assert(offset<256);
1977 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1978 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1979}
1980void emit_movzbl(int addr, int rt)
1981{
1982 u_int offset = addr-(u_int)&dynarec_local;
1983 assert(offset<4096);
1984 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1985 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1986}
1987void emit_movzwl(int addr, int rt)
1988{
1989 u_int offset = addr-(u_int)&dynarec_local;
1990 assert(offset<256);
1991 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1992 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1993}
1994void emit_movzwl_reg(int rs, int rt)
1995{
1996 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1997 assert(0);
1998}
1999
2000void emit_xchg(int rs, int rt)
2001{
2002 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
2003 assert(0);
2004}
2005void emit_writeword_indexed(int rt, int offset, int rs)
2006{
2007 assert(offset>-4096&&offset<4096);
2008 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
2009 if(offset>=0) {
2010 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
2011 }else{
2012 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
2013 }
2014}
2015void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
2016{
2017 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2018 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2019}
2020void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2021{
2022 if(map<0) emit_writeword_indexed(rt, addr, rs);
2023 else {
2024 assert(addr==0);
2025 emit_writeword_dualindexedx4(rt, rs, map);
2026 }
2027}
2028void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2029{
2030 if(map<0) {
2031 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2032 emit_writeword_indexed(rl, addr+4, rs);
2033 }else{
2034 assert(rh>=0);
2035 if(temp!=rs) emit_addimm(map,1,temp);
2036 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2037 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2038 else {
2039 emit_addimm(rs,4,rs);
2040 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2041 }
2042 }
2043}
2044void emit_writehword_indexed(int rt, int offset, int rs)
2045{
2046 assert(offset>-256&&offset<256);
2047 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2048 if(offset>=0) {
2049 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2050 }else{
2051 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2052 }
2053}
2054void emit_writebyte_indexed(int rt, int offset, int rs)
2055{
2056 assert(offset>-4096&&offset<4096);
2057 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2058 if(offset>=0) {
2059 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2060 }else{
2061 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2062 }
2063}
2064void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2065{
2066 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2067 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2068}
2069void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2070{
2071 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2072 else {
2073 if(addr==0) {
2074 emit_writebyte_dualindexedx4(rt, rs, map);
2075 }else{
2076 emit_addimm(rs,addr,temp);
2077 emit_writebyte_dualindexedx4(rt, temp, map);
2078 }
2079 }
2080}
b96d3df7 2081void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2082{
2083 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2084 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2085}
2086void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2087{
2088 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2089 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2090}
2091void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2092{
2093 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2094 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2095}
57871462 2096void emit_writeword(int rt, int addr)
2097{
2098 u_int offset = addr-(u_int)&dynarec_local;
2099 assert(offset<4096);
2100 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2101 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2102}
2103void emit_writehword(int rt, int addr)
2104{
2105 u_int offset = addr-(u_int)&dynarec_local;
2106 assert(offset<256);
2107 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2108 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2109}
2110void emit_writebyte(int rt, int addr)
2111{
2112 u_int offset = addr-(u_int)&dynarec_local;
2113 assert(offset<4096);
74426039 2114 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2115 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2116}
2117void emit_writeword_imm(int imm, int addr)
2118{
2119 assem_debug("movl $%x,%x\n",imm,addr);
2120 assert(0);
2121}
2122void emit_writebyte_imm(int imm, int addr)
2123{
2124 assem_debug("movb $%x,%x\n",imm,addr);
2125 assert(0);
2126}
2127
2128void emit_mul(int rs)
2129{
2130 assem_debug("mul %%%s\n",regname[rs]);
2131 assert(0);
2132}
2133void emit_imul(int rs)
2134{
2135 assem_debug("imul %%%s\n",regname[rs]);
2136 assert(0);
2137}
2138void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2139{
2140 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2141 assert(rs1<16);
2142 assert(rs2<16);
2143 assert(hi<16);
2144 assert(lo<16);
2145 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2146}
2147void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2148{
2149 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2150 assert(rs1<16);
2151 assert(rs2<16);
2152 assert(hi<16);
2153 assert(lo<16);
2154 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2155}
2156
2157void emit_div(int rs)
2158{
2159 assem_debug("div %%%s\n",regname[rs]);
2160 assert(0);
2161}
2162void emit_idiv(int rs)
2163{
2164 assem_debug("idiv %%%s\n",regname[rs]);
2165 assert(0);
2166}
2167void emit_cdq()
2168{
2169 assem_debug("cdq\n");
2170 assert(0);
2171}
2172
2173void emit_clz(int rs,int rt)
2174{
2175 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2176 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2177}
2178
2179void emit_subcs(int rs1,int rs2,int rt)
2180{
2181 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2182 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2183}
2184
2185void emit_shrcc_imm(int rs,u_int imm,int rt)
2186{
2187 assert(imm>0);
2188 assert(imm<32);
2189 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2190 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2191}
2192
b1be1eee 2193void emit_shrne_imm(int rs,u_int imm,int rt)
2194{
2195 assert(imm>0);
2196 assert(imm<32);
2197 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2198 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2199}
2200
57871462 2201void emit_negmi(int rs, int rt)
2202{
2203 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2204 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2205}
2206
2207void emit_negsmi(int rs, int rt)
2208{
2209 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2210 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2211}
2212
2213void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2214{
2215 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2216 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2217}
2218
2219void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2220{
2221 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2222 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2223}
2224
2225void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2226{
2227 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2228 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2229}
2230
2231void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2232{
2233 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2234 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2235}
2236
2237void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2238{
2239 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2240 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2241}
2242
2243void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2244{
2245 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2246 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2247}
2248
2249void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2250{
2251 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2252 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2253}
2254
2255void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2256{
2257 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2258 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2259}
2260
2261void emit_teq(int rs, int rt)
2262{
2263 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2264 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2265}
2266
2267void emit_rsbimm(int rs, int imm, int rt)
2268{
2269 u_int armval;
cfbd3c6e 2270 genimm_checked(imm,&armval);
57871462 2271 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2272 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2273}
2274
2275// Load 2 immediates optimizing for small code size
2276void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2277{
2278 emit_movimm(imm1,rt1);
2279 u_int armval;
2280 if(genimm(imm2-imm1,&armval)) {
2281 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2282 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2283 }else if(genimm(imm1-imm2,&armval)) {
2284 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2285 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2286 }
2287 else emit_movimm(imm2,rt2);
2288}
2289
2290// Conditionally select one of two immediates, optimizing for small code size
2291// This will only be called if HAVE_CMOV_IMM is defined
2292void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2293{
2294 u_int armval;
2295 if(genimm(imm2-imm1,&armval)) {
2296 emit_movimm(imm1,rt);
2297 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2298 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2299 }else if(genimm(imm1-imm2,&armval)) {
2300 emit_movimm(imm1,rt);
2301 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2302 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2303 }
2304 else {
665f33e1 2305 #ifndef HAVE_ARMV7
57871462 2306 emit_movimm(imm1,rt);
2307 add_literal((int)out,imm2);
2308 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2309 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2310 #else
2311 emit_movw(imm1&0x0000FFFF,rt);
2312 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2313 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2314 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2315 }
2316 emit_movt(imm1&0xFFFF0000,rt);
2317 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2318 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2319 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2320 }
2321 #endif
2322 }
2323}
2324
2325// special case for checking invalid_code
2326void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2327{
2328 assert(0);
2329}
2330
2331// special case for checking invalid_code
2332void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2333{
2334 assert(imm<128&&imm>=0);
2335 assert(r>=0&&r<16);
2336 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2337 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2338 emit_cmpimm(HOST_TEMPREG,imm);
2339}
2340
2341// special case for tlb mapping
2342void emit_addsr12(int rs1,int rs2,int rt)
2343{
2344 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2345 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2346}
2347
0bbd1454 2348void emit_callne(int a)
2349{
2350 assem_debug("blne %x\n",a);
2351 u_int offset=genjmp(a);
2352 output_w32(0x1b000000|offset);
2353}
2354
57871462 2355// Used to preload hash table entries
2356void emit_prefetch(void *addr)
2357{
2358 assem_debug("prefetch %x\n",(int)addr);
2359 output_byte(0x0F);
2360 output_byte(0x18);
2361 output_modrm(0,5,1);
2362 output_w32((int)addr);
2363}
2364void emit_prefetchreg(int r)
2365{
2366 assem_debug("pld %s\n",regname[r]);
2367 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2368}
2369
2370// Special case for mini_ht
2371void emit_ldreq_indexed(int rs, u_int offset, int rt)
2372{
2373 assert(offset<4096);
2374 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2375 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2376}
2377
2378void emit_flds(int r,int sr)
2379{
2380 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2381 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2382}
2383
2384void emit_vldr(int r,int vr)
2385{
2386 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2387 output_w32(0xed900b00|(vr<<12)|(r<<16));
2388}
2389
2390void emit_fsts(int sr,int r)
2391{
2392 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2393 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2394}
2395
2396void emit_vstr(int vr,int r)
2397{
2398 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2399 output_w32(0xed800b00|(vr<<12)|(r<<16));
2400}
2401
2402void emit_ftosizs(int s,int d)
2403{
2404 assem_debug("ftosizs s%d,s%d\n",d,s);
2405 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2406}
2407
2408void emit_ftosizd(int s,int d)
2409{
2410 assem_debug("ftosizd s%d,d%d\n",d,s);
2411 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2412}
2413
2414void emit_fsitos(int s,int d)
2415{
2416 assem_debug("fsitos s%d,s%d\n",d,s);
2417 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2418}
2419
2420void emit_fsitod(int s,int d)
2421{
2422 assem_debug("fsitod d%d,s%d\n",d,s);
2423 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2424}
2425
2426void emit_fcvtds(int s,int d)
2427{
2428 assem_debug("fcvtds d%d,s%d\n",d,s);
2429 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2430}
2431
2432void emit_fcvtsd(int s,int d)
2433{
2434 assem_debug("fcvtsd s%d,d%d\n",d,s);
2435 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2436}
2437
2438void emit_fsqrts(int s,int d)
2439{
2440 assem_debug("fsqrts d%d,s%d\n",d,s);
2441 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2442}
2443
2444void emit_fsqrtd(int s,int d)
2445{
2446 assem_debug("fsqrtd s%d,d%d\n",d,s);
2447 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2448}
2449
2450void emit_fabss(int s,int d)
2451{
2452 assem_debug("fabss d%d,s%d\n",d,s);
2453 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2454}
2455
2456void emit_fabsd(int s,int d)
2457{
2458 assem_debug("fabsd s%d,d%d\n",d,s);
2459 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2460}
2461
2462void emit_fnegs(int s,int d)
2463{
2464 assem_debug("fnegs d%d,s%d\n",d,s);
2465 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2466}
2467
2468void emit_fnegd(int s,int d)
2469{
2470 assem_debug("fnegd s%d,d%d\n",d,s);
2471 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2472}
2473
2474void emit_fadds(int s1,int s2,int d)
2475{
2476 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2477 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2478}
2479
2480void emit_faddd(int s1,int s2,int d)
2481{
2482 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2483 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2484}
2485
2486void emit_fsubs(int s1,int s2,int d)
2487{
2488 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2489 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2490}
2491
2492void emit_fsubd(int s1,int s2,int d)
2493{
2494 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2495 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2496}
2497
2498void emit_fmuls(int s1,int s2,int d)
2499{
2500 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2501 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2502}
2503
2504void emit_fmuld(int s1,int s2,int d)
2505{
2506 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2507 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2508}
2509
2510void emit_fdivs(int s1,int s2,int d)
2511{
2512 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2513 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2514}
2515
2516void emit_fdivd(int s1,int s2,int d)
2517{
2518 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2519 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2520}
2521
2522void emit_fcmps(int x,int y)
2523{
2524 assem_debug("fcmps s14, s15\n");
2525 output_w32(0xeeb47a67);
2526}
2527
2528void emit_fcmpd(int x,int y)
2529{
2530 assem_debug("fcmpd d6, d7\n");
2531 output_w32(0xeeb46b47);
2532}
2533
2534void emit_fmstat()
2535{
2536 assem_debug("fmstat\n");
2537 output_w32(0xeef1fa10);
2538}
2539
2540void emit_bicne_imm(int rs,int imm,int rt)
2541{
2542 u_int armval;
cfbd3c6e 2543 genimm_checked(imm,&armval);
57871462 2544 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2545 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2546}
2547
2548void emit_biccs_imm(int rs,int imm,int rt)
2549{
2550 u_int armval;
cfbd3c6e 2551 genimm_checked(imm,&armval);
57871462 2552 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2553 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2554}
2555
2556void emit_bicvc_imm(int rs,int imm,int rt)
2557{
2558 u_int armval;
cfbd3c6e 2559 genimm_checked(imm,&armval);
57871462 2560 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2561 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2562}
2563
2564void emit_bichi_imm(int rs,int imm,int rt)
2565{
2566 u_int armval;
cfbd3c6e 2567 genimm_checked(imm,&armval);
57871462 2568 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2569 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2570}
2571
2572void emit_orrvs_imm(int rs,int imm,int rt)
2573{
2574 u_int armval;
cfbd3c6e 2575 genimm_checked(imm,&armval);
57871462 2576 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2577 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2578}
2579
b9b61529 2580void emit_orrne_imm(int rs,int imm,int rt)
2581{
2582 u_int armval;
cfbd3c6e 2583 genimm_checked(imm,&armval);
b9b61529 2584 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2585 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2586}
2587
2588void emit_andne_imm(int rs,int imm,int rt)
2589{
2590 u_int armval;
cfbd3c6e 2591 genimm_checked(imm,&armval);
b9b61529 2592 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2593 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2594}
2595
665f33e1 2596void emit_addpl_imm(int rs,int imm,int rt)
2597{
2598 u_int armval;
2599 genimm_checked(imm,&armval);
2600 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2601 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2602}
2603
57871462 2604void emit_jno_unlikely(int a)
2605{
2606 //emit_jno(a);
2607 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2608 output_w32(0x72800000|rd_rn_rm(15,15,0));
2609}
2610
054175e9 2611static void save_regs_all(u_int reglist)
57871462 2612{
054175e9 2613 int i;
57871462 2614 if(!reglist) return;
2615 assem_debug("stmia fp,{");
054175e9 2616 for(i=0;i<16;i++)
2617 if(reglist&(1<<i))
2618 assem_debug("r%d,",i);
57871462 2619 assem_debug("}\n");
2620 output_w32(0xe88b0000|reglist);
2621}
054175e9 2622static void restore_regs_all(u_int reglist)
57871462 2623{
054175e9 2624 int i;
57871462 2625 if(!reglist) return;
2626 assem_debug("ldmia fp,{");
054175e9 2627 for(i=0;i<16;i++)
2628 if(reglist&(1<<i))
2629 assem_debug("r%d,",i);
57871462 2630 assem_debug("}\n");
2631 output_w32(0xe89b0000|reglist);
2632}
054175e9 2633// Save registers before function call
2634static void save_regs(u_int reglist)
2635{
4d646738 2636 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 2637 save_regs_all(reglist);
2638}
2639// Restore registers after function call
2640static void restore_regs(u_int reglist)
2641{
4d646738 2642 reglist&=CALLER_SAVE_REGS;
054175e9 2643 restore_regs_all(reglist);
2644}
57871462 2645
2646// Write back consts using r14 so we don't disturb the other registers
2647void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2648{
2649 int hr;
2650 for(hr=0;hr<HOST_REGS;hr++) {
2651 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2652 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2653 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2654 int value=constmap[i][hr];
2655 if(value==0) {
2656 emit_zeroreg(HOST_TEMPREG);
2657 }
2658 else {
2659 emit_movimm(value,HOST_TEMPREG);
2660 }
2661 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2662#ifndef FORCE32
57871462 2663 if((i_is32>>i_regmap[hr])&1) {
2664 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2665 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2666 }
24385cae 2667#endif
57871462 2668 }
2669 }
2670 }
2671 }
2672}
2673
2674/* Stubs/epilogue */
2675
2676void literal_pool(int n)
2677{
2678 if(!literalcount) return;
2679 if(n) {
2680 if((int)out-literals[0][0]<4096-n) return;
2681 }
2682 u_int *ptr;
2683 int i;
2684 for(i=0;i<literalcount;i++)
2685 {
77750690 2686 u_int l_addr=(u_int)out;
2687 int j;
2688 for(j=0;j<i;j++) {
2689 if(literals[j][1]==literals[i][1]) {
2690 //printf("dup %08x\n",literals[i][1]);
2691 l_addr=literals[j][0];
2692 break;
2693 }
2694 }
57871462 2695 ptr=(u_int *)literals[i][0];
77750690 2696 u_int offset=l_addr-(u_int)ptr-8;
57871462 2697 assert(offset<4096);
2698 assert(!(offset&3));
2699 *ptr|=offset;
77750690 2700 if(l_addr==(u_int)out) {
2701 literals[i][0]=l_addr; // remember for dupes
2702 output_w32(literals[i][1]);
2703 }
57871462 2704 }
2705 literalcount=0;
2706}
2707
2708void literal_pool_jumpover(int n)
2709{
2710 if(!literalcount) return;
2711 if(n) {
2712 if((int)out-literals[0][0]<4096-n) return;
2713 }
2714 int jaddr=(int)out;
2715 emit_jmp(0);
2716 literal_pool(0);
2717 set_jump_target(jaddr,(int)out);
2718}
2719
c67af2ac 2720emit_extjump2(u_int addr, int target, int linker)
57871462 2721{
2722 u_char *ptr=(u_char *)addr;
2723 assert((ptr[3]&0x0e)==0xa);
2724 emit_loadlp(target,0);
2725 emit_loadlp(addr,1);
24385cae 2726 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2727 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2728//DEBUG >
2729#ifdef DEBUG_CYCLE_COUNT
2730 emit_readword((int)&last_count,ECX);
2731 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2732 emit_readword((int)&next_interupt,ECX);
2733 emit_writeword(HOST_CCREG,(int)&Count);
2734 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2735 emit_writeword(ECX,(int)&last_count);
2736#endif
2737//DEBUG <
2738 emit_jmp(linker);
2739}
2740
2741emit_extjump(int addr, int target)
2742{
2743 emit_extjump2(addr, target, (int)dyna_linker);
2744}
2745emit_extjump_ds(int addr, int target)
2746{
2747 emit_extjump2(addr, target, (int)dyna_linker_ds);
2748}
2749
13e35c04 2750// put rt_val into rt, potentially making use of rs with value rs_val
2751static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2752{
8575a877 2753 u_int armval;
2754 int diff;
2755 if(genimm(rt_val,&armval)) {
2756 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2757 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2758 return;
2759 }
2760 if(genimm(~rt_val,&armval)) {
2761 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2762 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2763 return;
2764 }
2765 diff=rt_val-rs_val;
2766 if(genimm(diff,&armval)) {
2767 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2768 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2769 return;
2770 }else if(genimm(-diff,&armval)) {
2771 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2772 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2773 return;
2774 }
2775 emit_movimm(rt_val,rt);
2776}
2777
2778// return 1 if above function can do it's job cheaply
2779static int is_similar_value(u_int v1,u_int v2)
2780{
13e35c04 2781 u_int xs;
8575a877 2782 int diff;
2783 if(v1==v2) return 1;
2784 diff=v2-v1;
2785 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2786 ;
8575a877 2787 if(xs<0x100) return 1;
2788 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2789 ;
2790 if(xs<0x100) return 1;
2791 return 0;
13e35c04 2792}
cbbab9cd 2793
b96d3df7 2794// trashes r2
2795static void pass_args(int a0, int a1)
2796{
2797 if(a0==1&&a1==0) {
2798 // must swap
2799 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2800 }
2801 else if(a0!=0&&a1==0) {
2802 emit_mov(a1,1);
2803 if (a0>=0) emit_mov(a0,0);
2804 }
2805 else {
2806 if(a0>=0&&a0!=0) emit_mov(a0,0);
2807 if(a1>=0&&a1!=1) emit_mov(a1,1);
2808 }
2809}
2810
b1be1eee 2811static void mov_loadtype_adj(int type,int rs,int rt)
2812{
2813 switch(type) {
2814 case LOADB_STUB: emit_signextend8(rs,rt); break;
2815 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2816 case LOADH_STUB: emit_signextend16(rs,rt); break;
2817 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2818 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2819 default: assert(0);
2820 }
2821}
2822
2823#ifdef PCSX
2824#include "pcsxmem.h"
2825#include "pcsxmem_inline.c"
2826#endif
2827
57871462 2828do_readstub(int n)
2829{
2830 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2831 literal_pool(256);
2832 set_jump_target(stubs[n][1],(int)out);
2833 int type=stubs[n][0];
2834 int i=stubs[n][3];
2835 int rs=stubs[n][4];
2836 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2837 u_int reglist=stubs[n][7];
2838 signed char *i_regmap=i_regs->regmap;
2839 int addr=get_reg(i_regmap,AGEN1+(i&1));
2840 int rth,rt;
2841 int ds;
b9b61529 2842 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2843 rth=get_reg(i_regmap,FTEMP|64);
2844 rt=get_reg(i_regmap,FTEMP);
2845 }else{
2846 rth=get_reg(i_regmap,rt1[i]|64);
2847 rt=get_reg(i_regmap,rt1[i]);
2848 }
2849 assert(rs>=0);
c6c3b1b3 2850#ifdef PCSX
2851 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2852 reglist|=(1<<rs);
2853 for(r=0;r<=12;r++) {
2854 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2855 temp=r; break;
2856 }
2857 }
db829eeb 2858 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2859 reglist&=~(1<<rt);
2860 if(temp==-1) {
2861 save_regs(reglist);
2862 regs_saved=1;
2863 temp=(rs==0)?2:0;
2864 }
2865 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2866 temp2=1;
2867 emit_readword((int)&mem_rtab,temp);
2868 emit_shrimm(rs,12,temp2);
2869 emit_readword_dualindexedx4(temp,temp2,temp2);
2870 emit_lsls_imm(temp2,1,temp2);
2871 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2872 switch(type) {
2873 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2874 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2875 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2876 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2877 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2878 }
2879 }
2880 if(regs_saved) {
2881 restore_jump=(int)out;
2882 emit_jcc(0); // jump to reg restore
2883 }
2884 else
2885 emit_jcc(stubs[n][2]); // return address
2886
2887 if(!regs_saved)
2888 save_regs(reglist);
2889 int handler=0;
2890 if(type==LOADB_STUB||type==LOADBU_STUB)
2891 handler=(int)jump_handler_read8;
2892 if(type==LOADH_STUB||type==LOADHU_STUB)
2893 handler=(int)jump_handler_read16;
2894 if(type==LOADW_STUB)
2895 handler=(int)jump_handler_read32;
2896 assert(handler!=0);
b96d3df7 2897 pass_args(rs,temp2);
c6c3b1b3 2898 int cc=get_reg(i_regmap,CCREG);
2899 if(cc<0)
2900 emit_loadreg(CCREG,2);
2573466a 2901 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2902 emit_call(handler);
2903 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2904 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2905 }
2906 if(restore_jump)
2907 set_jump_target(restore_jump,(int)out);
2908 restore_regs(reglist);
2909 emit_jmp(stubs[n][2]); // return address
2910#else // !PCSX
57871462 2911 if(addr<0) addr=rt;
535d208a 2912 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2913 assert(addr>=0);
2914 int ftable=0;
2915 if(type==LOADB_STUB||type==LOADBU_STUB)
2916 ftable=(int)readmemb;
2917 if(type==LOADH_STUB||type==LOADHU_STUB)
2918 ftable=(int)readmemh;
2919 if(type==LOADW_STUB)
2920 ftable=(int)readmem;
24385cae 2921#ifndef FORCE32
57871462 2922 if(type==LOADD_STUB)
2923 ftable=(int)readmemd;
24385cae 2924#endif
2925 assert(ftable!=0);
57871462 2926 emit_writeword(rs,(int)&address);
2927 //emit_pusha();
2928 save_regs(reglist);
97a238a6 2929#ifndef PCSX
57871462 2930 ds=i_regs!=&regs[i];
2931 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2932 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2933 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2934 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2935 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2936#endif
57871462 2937 emit_shrimm(rs,16,1);
2938 int cc=get_reg(i_regmap,CCREG);
2939 if(cc<0) {
2940 emit_loadreg(CCREG,2);
2941 }
2942 emit_movimm(ftable,0);
2943 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2944#ifndef PCSX
57871462 2945 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2946#endif
57871462 2947 //emit_readword((int)&last_count,temp);
2948 //emit_add(cc,temp,cc);
2949 //emit_writeword(cc,(int)&Count);
2950 //emit_mov(15,14);
2951 emit_call((int)&indirect_jump_indexed);
2952 //emit_callreg(rs);
2953 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2954#ifndef PCSX
57871462 2955 // We really shouldn't need to update the count here,
2956 // but not doing so causes random crashes...
2957 emit_readword((int)&Count,HOST_TEMPREG);
2958 emit_readword((int)&next_interupt,2);
2959 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2960 emit_writeword(2,(int)&last_count);
2961 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2962 if(cc<0) {
2963 emit_storereg(CCREG,HOST_TEMPREG);
2964 }
f51dc36c 2965#endif
57871462 2966 //emit_popa();
2967 restore_regs(reglist);
2968 //if((cc=get_reg(regmap,CCREG))>=0) {
2969 // emit_loadreg(CCREG,cc);
2970 //}
f18c0f46 2971 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2972 assert(rt>=0);
2973 if(type==LOADB_STUB)
2974 emit_movsbl((int)&readmem_dword,rt);
2975 if(type==LOADBU_STUB)
2976 emit_movzbl((int)&readmem_dword,rt);
2977 if(type==LOADH_STUB)
2978 emit_movswl((int)&readmem_dword,rt);
2979 if(type==LOADHU_STUB)
2980 emit_movzwl((int)&readmem_dword,rt);
2981 if(type==LOADW_STUB)
2982 emit_readword((int)&readmem_dword,rt);
2983 if(type==LOADD_STUB) {
2984 emit_readword((int)&readmem_dword,rt);
2985 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2986 }
57871462 2987 }
2988 emit_jmp(stubs[n][2]); // return address
c6c3b1b3 2989#endif // !PCSX
57871462 2990}
2991
c6c3b1b3 2992#ifdef PCSX
2993// return memhandler, or get directly accessable address and return 0
2994u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2995{
2996 u_int l1,l2=0;
2997 l1=((u_int *)table)[addr>>12];
2998 if((l1&(1<<31))==0) {
2999 u_int v=l1<<1;
3000 *addr_host=v+addr;
3001 return 0;
3002 }
3003 else {
3004 l1<<=1;
3005 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
3006 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 3007 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 3008 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
3009 else
3010 l2=((u_int *)l1)[(addr&0xfff)/4];
3011 if((l2&(1<<31))==0) {
3012 u_int v=l2<<1;
3013 *addr_host=v+(addr&0xfff);
3014 return 0;
3015 }
3016 return l2<<1;
3017 }
3018}
3019#endif
3020
57871462 3021inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3022{
3023 int rs=get_reg(regmap,target);
3024 int rth=get_reg(regmap,target|64);
3025 int rt=get_reg(regmap,target);
535d208a 3026 if(rs<0) rs=get_reg(regmap,-1);
57871462 3027 assert(rs>=0);
c6c3b1b3 3028#ifdef PCSX
b1be1eee 3029 u_int handler,host_addr=0,is_dynamic,far_call=0;
3030 int cc=get_reg(regmap,CCREG);
3031 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
3032 return;
c6c3b1b3 3033 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
3034 if (handler==0) {
db829eeb 3035 if(rt<0||rt1[i]==0)
c6c3b1b3 3036 return;
13e35c04 3037 if(addr!=host_addr)
3038 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 3039 switch(type) {
3040 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
3041 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
3042 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
3043 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
3044 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
3045 default: assert(0);
3046 }
3047 return;
3048 }
b1be1eee 3049 is_dynamic=pcsxmem_is_handler_dynamic(addr);
3050 if(is_dynamic) {
3051 if(type==LOADB_STUB||type==LOADBU_STUB)
3052 handler=(int)jump_handler_read8;
3053 if(type==LOADH_STUB||type==LOADHU_STUB)
3054 handler=(int)jump_handler_read16;
3055 if(type==LOADW_STUB)
3056 handler=(int)jump_handler_read32;
3057 }
c6c3b1b3 3058
3059 // call a memhandler
db829eeb 3060 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 3061 reglist&=~(1<<rt);
3062 save_regs(reglist);
3063 if(target==0)
3064 emit_movimm(addr,0);
3065 else if(rs!=0)
3066 emit_mov(rs,0);
c6c3b1b3 3067 int offset=(int)handler-(int)out-8;
3068 if(offset<-33554432||offset>=33554432) {
3069 // unreachable memhandler, a plugin func perhaps
b1be1eee 3070 emit_movimm(handler,12);
3071 far_call=1;
3072 }
3073 if(cc<0)
3074 emit_loadreg(CCREG,2);
3075 if(is_dynamic) {
3076 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
3077 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 3078 }
b1be1eee 3079 else {
3080 emit_readword((int)&last_count,3);
3081 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
3082 emit_add(2,3,2);
3083 emit_writeword(2,(int)&Count);
3084 }
3085
3086 if(far_call)
3087 emit_callreg(12);
c6c3b1b3 3088 else
3089 emit_call(handler);
b1be1eee 3090
db829eeb 3091 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 3092 switch(type) {
3093 case LOADB_STUB: emit_signextend8(0,rt); break;
3094 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
3095 case LOADH_STUB: emit_signextend16(0,rt); break;
3096 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
3097 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
3098 default: assert(0);
3099 }
3100 }
3101 restore_regs(reglist);
3102#else // if !PCSX
57871462 3103 int ftable=0;
3104 if(type==LOADB_STUB||type==LOADBU_STUB)
3105 ftable=(int)readmemb;
3106 if(type==LOADH_STUB||type==LOADHU_STUB)
3107 ftable=(int)readmemh;
3108 if(type==LOADW_STUB)
3109 ftable=(int)readmem;
24385cae 3110#ifndef FORCE32
57871462 3111 if(type==LOADD_STUB)
3112 ftable=(int)readmemd;
24385cae 3113#endif
3114 assert(ftable!=0);
fd99c415 3115 if(target==0)
3116 emit_movimm(addr,rs);
57871462 3117 emit_writeword(rs,(int)&address);
3118 //emit_pusha();
3119 save_regs(reglist);
0c1fe38b 3120#ifndef PCSX
3121 if((signed int)addr>=(signed int)0xC0000000) {
3122 // Theoretically we can have a pagefault here, if the TLB has never
3123 // been enabled and the address is outside the range 80000000..BFFFFFFF
3124 // Write out the registers so the pagefault can be handled. This is
3125 // a very rare case and likely represents a bug.
3126 int ds=regmap!=regs[i].regmap;
3127 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3128 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3129 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3130 }
3131#endif
57871462 3132 //emit_shrimm(rs,16,1);
3133 int cc=get_reg(regmap,CCREG);
3134 if(cc<0) {
3135 emit_loadreg(CCREG,2);
3136 }
3137 //emit_movimm(ftable,0);
3138 emit_movimm(((u_int *)ftable)[addr>>16],0);
3139 //emit_readword((int)&last_count,12);
2573466a 3140 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3141#ifndef PCSX
57871462 3142 if((signed int)addr>=(signed int)0xC0000000) {
3143 // Pagefault address
3144 int ds=regmap!=regs[i].regmap;
3145 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3146 }
f51dc36c 3147#endif
57871462 3148 //emit_add(12,2,2);
3149 //emit_writeword(2,(int)&Count);
3150 //emit_call(((u_int *)ftable)[addr>>16]);
3151 emit_call((int)&indirect_jump);
f51dc36c 3152#ifndef PCSX
57871462 3153 // We really shouldn't need to update the count here,
3154 // but not doing so causes random crashes...
3155 emit_readword((int)&Count,HOST_TEMPREG);
3156 emit_readword((int)&next_interupt,2);
2573466a 3157 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3158 emit_writeword(2,(int)&last_count);
3159 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3160 if(cc<0) {
3161 emit_storereg(CCREG,HOST_TEMPREG);
3162 }
f51dc36c 3163#endif
57871462 3164 //emit_popa();
3165 restore_regs(reglist);
fd99c415 3166 if(rt>=0) {
3167 if(type==LOADB_STUB)
3168 emit_movsbl((int)&readmem_dword,rt);
3169 if(type==LOADBU_STUB)
3170 emit_movzbl((int)&readmem_dword,rt);
3171 if(type==LOADH_STUB)
3172 emit_movswl((int)&readmem_dword,rt);
3173 if(type==LOADHU_STUB)
3174 emit_movzwl((int)&readmem_dword,rt);
3175 if(type==LOADW_STUB)
3176 emit_readword((int)&readmem_dword,rt);
3177 if(type==LOADD_STUB) {
3178 emit_readword((int)&readmem_dword,rt);
3179 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
3180 }
57871462 3181 }
c6c3b1b3 3182#endif // !PCSX
57871462 3183}
3184
3185do_writestub(int n)
3186{
3187 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
3188 literal_pool(256);
3189 set_jump_target(stubs[n][1],(int)out);
3190 int type=stubs[n][0];
3191 int i=stubs[n][3];
3192 int rs=stubs[n][4];
3193 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3194 u_int reglist=stubs[n][7];
3195 signed char *i_regmap=i_regs->regmap;
3196 int addr=get_reg(i_regmap,AGEN1+(i&1));
3197 int rth,rt,r;
3198 int ds;
b9b61529 3199 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 3200 rth=get_reg(i_regmap,FTEMP|64);
3201 rt=get_reg(i_regmap,r=FTEMP);
3202 }else{
3203 rth=get_reg(i_regmap,rs2[i]|64);
3204 rt=get_reg(i_regmap,r=rs2[i]);
3205 }
3206 assert(rs>=0);
3207 assert(rt>=0);
b96d3df7 3208#ifdef PCSX
3209 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
3210 int reglist2=reglist|(1<<rs)|(1<<rt);
3211 for(rtmp=0;rtmp<=12;rtmp++) {
3212 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
3213 temp=rtmp; break;
3214 }
3215 }
3216 if(temp==-1) {
3217 save_regs(reglist);
3218 regs_saved=1;
3219 for(rtmp=0;rtmp<=3;rtmp++)
3220 if(rtmp!=rs&&rtmp!=rt)
3221 {temp=rtmp;break;}
3222 }
3223 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
3224 temp2=3;
3225 emit_readword((int)&mem_wtab,temp);
3226 emit_shrimm(rs,12,temp2);
3227 emit_readword_dualindexedx4(temp,temp2,temp2);
3228 emit_lsls_imm(temp2,1,temp2);
3229 switch(type) {
3230 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
3231 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
3232 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
3233 default: assert(0);
3234 }
3235 if(regs_saved) {
3236 restore_jump=(int)out;
3237 emit_jcc(0); // jump to reg restore
3238 }
3239 else
3240 emit_jcc(stubs[n][2]); // return address (invcode check)
3241
3242 if(!regs_saved)
3243 save_regs(reglist);
3244 int handler=0;
3245 switch(type) {
3246 case STOREB_STUB: handler=(int)jump_handler_write8; break;
3247 case STOREH_STUB: handler=(int)jump_handler_write16; break;
3248 case STOREW_STUB: handler=(int)jump_handler_write32; break;
3249 }
3250 assert(handler!=0);
3251 pass_args(rs,rt);
3252 if(temp2!=3)
3253 emit_mov(temp2,3);
3254 int cc=get_reg(i_regmap,CCREG);
3255 if(cc<0)
3256 emit_loadreg(CCREG,2);
2573466a 3257 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3258 // returns new cycle_count
3259 emit_call(handler);
2573466a 3260 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3261 if(cc<0)
3262 emit_storereg(CCREG,2);
3263 if(restore_jump)
3264 set_jump_target(restore_jump,(int)out);
3265 restore_regs(reglist);
3266 ra=stubs[n][2];
b96d3df7 3267 emit_jmp(ra);
3268#else // if !PCSX
57871462 3269 if(addr<0) addr=get_reg(i_regmap,-1);
3270 assert(addr>=0);
3271 int ftable=0;
3272 if(type==STOREB_STUB)
3273 ftable=(int)writememb;
3274 if(type==STOREH_STUB)
3275 ftable=(int)writememh;
3276 if(type==STOREW_STUB)
3277 ftable=(int)writemem;
24385cae 3278#ifndef FORCE32
57871462 3279 if(type==STORED_STUB)
3280 ftable=(int)writememd;
24385cae 3281#endif
3282 assert(ftable!=0);
57871462 3283 emit_writeword(rs,(int)&address);
3284 //emit_shrimm(rs,16,rs);
3285 //emit_movmem_indexedx4(ftable,rs,rs);
3286 if(type==STOREB_STUB)
3287 emit_writebyte(rt,(int)&byte);
3288 if(type==STOREH_STUB)
3289 emit_writehword(rt,(int)&hword);
3290 if(type==STOREW_STUB)
3291 emit_writeword(rt,(int)&word);
3292 if(type==STORED_STUB) {
3d624f89 3293#ifndef FORCE32
57871462 3294 emit_writeword(rt,(int)&dword);
3295 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 3296#else
c43b5311 3297 SysPrintf("STORED_STUB\n");
3d624f89 3298#endif
57871462 3299 }
3300 //emit_pusha();
3301 save_regs(reglist);
97a238a6 3302#ifndef PCSX
57871462 3303 ds=i_regs!=&regs[i];
3304 int real_rs=get_reg(i_regmap,rs1[i]);
3305 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3306 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3307 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3308 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3309#endif
57871462 3310 emit_shrimm(rs,16,1);
3311 int cc=get_reg(i_regmap,CCREG);
3312 if(cc<0) {
3313 emit_loadreg(CCREG,2);
3314 }
3315 emit_movimm(ftable,0);
3316 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3317#ifndef PCSX
57871462 3318 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 3319#endif
57871462 3320 //emit_readword((int)&last_count,temp);
3321 //emit_addimm(cc,2*stubs[n][5]+2,cc);
3322 //emit_add(cc,temp,cc);
3323 //emit_writeword(cc,(int)&Count);
3324 emit_call((int)&indirect_jump_indexed);
3325 //emit_callreg(rs);
3326 emit_readword((int)&Count,HOST_TEMPREG);
3327 emit_readword((int)&next_interupt,2);
3328 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3329 emit_writeword(2,(int)&last_count);
3330 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3331 if(cc<0) {
3332 emit_storereg(CCREG,HOST_TEMPREG);
3333 }
3334 //emit_popa();
3335 restore_regs(reglist);
3336 //if((cc=get_reg(regmap,CCREG))>=0) {
3337 // emit_loadreg(CCREG,cc);
3338 //}
3339 emit_jmp(stubs[n][2]); // return address
b96d3df7 3340#endif // !PCSX
57871462 3341}
3342
3343inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3344{
3345 int rs=get_reg(regmap,-1);
3346 int rth=get_reg(regmap,target|64);
3347 int rt=get_reg(regmap,target);
3348 assert(rs>=0);
3349 assert(rt>=0);
cbbab9cd 3350#ifdef PCSX
b96d3df7 3351 u_int handler,host_addr=0;
b96d3df7 3352 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
3353 if (handler==0) {
13e35c04 3354 if(addr!=host_addr)
3355 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 3356 switch(type) {
3357 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
3358 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
3359 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
3360 default: assert(0);
3361 }
3362 return;
3363 }
3364
3365 // call a memhandler
3366 save_regs(reglist);
13e35c04 3367 pass_args(rs,rt);
b96d3df7 3368 int cc=get_reg(regmap,CCREG);
3369 if(cc<0)
3370 emit_loadreg(CCREG,2);
2573466a 3371 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 3372 emit_movimm(handler,3);
3373 // returns new cycle_count
3374 emit_call((int)jump_handler_write_h);
2573466a 3375 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 3376 if(cc<0)
3377 emit_storereg(CCREG,2);
3378 restore_regs(reglist);
3379#else // if !pcsx
57871462 3380 int ftable=0;
3381 if(type==STOREB_STUB)
3382 ftable=(int)writememb;
3383 if(type==STOREH_STUB)
3384 ftable=(int)writememh;
3385 if(type==STOREW_STUB)
3386 ftable=(int)writemem;
24385cae 3387#ifndef FORCE32
57871462 3388 if(type==STORED_STUB)
3389 ftable=(int)writememd;
24385cae 3390#endif
3391 assert(ftable!=0);
57871462 3392 emit_writeword(rs,(int)&address);
3393 //emit_shrimm(rs,16,rs);
3394 //emit_movmem_indexedx4(ftable,rs,rs);
3395 if(type==STOREB_STUB)
3396 emit_writebyte(rt,(int)&byte);
3397 if(type==STOREH_STUB)
3398 emit_writehword(rt,(int)&hword);
3399 if(type==STOREW_STUB)
3400 emit_writeword(rt,(int)&word);
3401 if(type==STORED_STUB) {
3d624f89 3402#ifndef FORCE32
57871462 3403 emit_writeword(rt,(int)&dword);
3404 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 3405#else
c43b5311 3406 SysPrintf("STORED_STUB\n");
3d624f89 3407#endif
57871462 3408 }
3409 //emit_pusha();
3410 save_regs(reglist);
0c1fe38b 3411#ifndef PCSX
3412 // rearmed note: load_all_consts prevents BIOS boot, some bug?
3413 if((signed int)addr>=(signed int)0xC0000000) {
3414 // Theoretically we can have a pagefault here, if the TLB has never
3415 // been enabled and the address is outside the range 80000000..BFFFFFFF
3416 // Write out the registers so the pagefault can be handled. This is
3417 // a very rare case and likely represents a bug.
3418 int ds=regmap!=regs[i].regmap;
3419 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3420 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3421 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3422 }
3423#endif
57871462 3424 //emit_shrimm(rs,16,1);
3425 int cc=get_reg(regmap,CCREG);
3426 if(cc<0) {
3427 emit_loadreg(CCREG,2);
3428 }
3429 //emit_movimm(ftable,0);
3430 emit_movimm(((u_int *)ftable)[addr>>16],0);
3431 //emit_readword((int)&last_count,12);
2573466a 3432 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3433#ifndef PCSX
57871462 3434 if((signed int)addr>=(signed int)0xC0000000) {
3435 // Pagefault address
3436 int ds=regmap!=regs[i].regmap;
3437 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3438 }
f51dc36c 3439#endif
57871462 3440 //emit_add(12,2,2);
3441 //emit_writeword(2,(int)&Count);
3442 //emit_call(((u_int *)ftable)[addr>>16]);
3443 emit_call((int)&indirect_jump);
3444 emit_readword((int)&Count,HOST_TEMPREG);
3445 emit_readword((int)&next_interupt,2);
2573466a 3446 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3447 emit_writeword(2,(int)&last_count);
3448 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3449 if(cc<0) {
3450 emit_storereg(CCREG,HOST_TEMPREG);
3451 }
3452 //emit_popa();
3453 restore_regs(reglist);
b96d3df7 3454#endif
57871462 3455}
3456
3457do_unalignedwritestub(int n)
3458{
b7918751 3459 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3460 literal_pool(256);
57871462 3461 set_jump_target(stubs[n][1],(int)out);
b7918751 3462
3463 int i=stubs[n][3];
3464 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3465 int addr=stubs[n][5];
3466 u_int reglist=stubs[n][7];
3467 signed char *i_regmap=i_regs->regmap;
3468 int temp2=get_reg(i_regmap,FTEMP);
3469 int rt;
3470 int ds, real_rs;
3471 rt=get_reg(i_regmap,rs2[i]);
3472 assert(rt>=0);
3473 assert(addr>=0);
3474 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3475 reglist|=(1<<addr);
3476 reglist&=~(1<<temp2);
3477
b96d3df7 3478#if 1
3479 // don't bother with it and call write handler
3480 save_regs(reglist);
3481 pass_args(addr,rt);
3482 int cc=get_reg(i_regmap,CCREG);
3483 if(cc<0)
3484 emit_loadreg(CCREG,2);
2573466a 3485 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3486 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 3487 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3488 if(cc<0)
3489 emit_storereg(CCREG,2);
3490 restore_regs(reglist);
3491 emit_jmp(stubs[n][2]); // return address
3492#else
b7918751 3493 emit_andimm(addr,0xfffffffc,temp2);
3494 emit_writeword(temp2,(int)&address);
3495
3496 save_regs(reglist);
97a238a6 3497#ifndef PCSX
b7918751 3498 ds=i_regs!=&regs[i];
3499 real_rs=get_reg(i_regmap,rs1[i]);
3500 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3501 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3502 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3503 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3504#endif
b7918751 3505 emit_shrimm(addr,16,1);
3506 int cc=get_reg(i_regmap,CCREG);
3507 if(cc<0) {
3508 emit_loadreg(CCREG,2);
3509 }
3510 emit_movimm((u_int)readmem,0);
3511 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3512#ifndef PCSX
3513 // pagefault address
3514 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3515#endif
b7918751 3516 emit_call((int)&indirect_jump_indexed);
3517 restore_regs(reglist);
3518
3519 emit_readword((int)&readmem_dword,temp2);
3520 int temp=addr; //hmh
3521 emit_shlimm(addr,3,temp);
3522 emit_andimm(temp,24,temp);
3523#ifdef BIG_ENDIAN_MIPS
3524 if (opcode[i]==0x2e) // SWR
3525#else
3526 if (opcode[i]==0x2a) // SWL
3527#endif
3528 emit_xorimm(temp,24,temp);
3529 emit_movimm(-1,HOST_TEMPREG);
55439448 3530 if (opcode[i]==0x2a) { // SWL
b7918751 3531 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3532 emit_orrshr(rt,temp,temp2);
3533 }else{
3534 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3535 emit_orrshl(rt,temp,temp2);
3536 }
3537 emit_readword((int)&address,addr);
3538 emit_writeword(temp2,(int)&word);
3539 //save_regs(reglist); // don't need to, no state changes
3540 emit_shrimm(addr,16,1);
3541 emit_movimm((u_int)writemem,0);
3542 //emit_call((int)&indirect_jump_indexed);
3543 emit_mov(15,14);
3544 emit_readword_dualindexedx4(0,1,15);
3545 emit_readword((int)&Count,HOST_TEMPREG);
3546 emit_readword((int)&next_interupt,2);
3547 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3548 emit_writeword(2,(int)&last_count);
3549 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3550 if(cc<0) {
3551 emit_storereg(CCREG,HOST_TEMPREG);
3552 }
3553 restore_regs(reglist);
57871462 3554 emit_jmp(stubs[n][2]); // return address
b96d3df7 3555#endif
57871462 3556}
3557
3558void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3559{
3560 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3561}
3562
3563do_invstub(int n)
3564{
3565 literal_pool(20);
3566 u_int reglist=stubs[n][3];
3567 set_jump_target(stubs[n][1],(int)out);
3568 save_regs(reglist);
3569 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3570 emit_call((int)&invalidate_addr);
3571 restore_regs(reglist);
3572 emit_jmp(stubs[n][2]); // return address
3573}
3574
3575int do_dirty_stub(int i)
3576{
3577 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3578 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3579 #ifdef PCSX
3580 addr=(u_int)source;
3581 #endif
57871462 3582 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 3583 #ifndef HAVE_ARMV7
ac545b3a 3584 emit_loadlp(addr,1);
57871462 3585 emit_loadlp((int)copy,2);
3586 emit_loadlp(slen*4,3);
3587 #else
ac545b3a 3588 emit_movw(addr&0x0000FFFF,1);
57871462 3589 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3590 emit_movt(addr&0xFFFF0000,1);
57871462 3591 emit_movt(((u_int)copy)&0xFFFF0000,2);
3592 emit_movw(slen*4,3);
3593 #endif
3594 emit_movimm(start+i*4,0);
3595 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3596 int entry=(int)out;
3597 load_regs_entry(i);
3598 if(entry==(int)out) entry=instr_addr[i];
3599 emit_jmp(instr_addr[i]);
3600 return entry;
3601}
3602
3603void do_dirty_stub_ds()
3604{
3605 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 3606 #ifndef HAVE_ARMV7
57871462 3607 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3608 emit_loadlp((int)copy,2);
3609 emit_loadlp(slen*4,3);
3610 #else
3611 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3612 emit_movw(((u_int)copy)&0x0000FFFF,2);
3613 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3614 emit_movt(((u_int)copy)&0xFFFF0000,2);
3615 emit_movw(slen*4,3);
3616 #endif
3617 emit_movimm(start+1,0);
3618 emit_call((int)&verify_code_ds);
3619}
3620
3621do_cop1stub(int n)
3622{
3623 literal_pool(256);
3624 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3625 set_jump_target(stubs[n][1],(int)out);
3626 int i=stubs[n][3];
3d624f89 3627// int rs=stubs[n][4];
57871462 3628 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3629 int ds=stubs[n][6];
3630 if(!ds) {
3631 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3632 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3633 }
3634 //else {printf("fp exception in delay slot\n");}
3635 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3636 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3637 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 3638 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 3639 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3640}
3641
63cb0298 3642#ifndef DISABLE_TLB
3643
57871462 3644/* TLB */
3645
3646int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3647{
3648 if(c) {
3649 if((signed int)addr>=(signed int)0xC0000000) {
3650 // address_generation already loaded the const
3651 emit_readword_dualindexedx4(FP,map,map);
3652 }
3653 else
3654 return -1; // No mapping
3655 }
3656 else {
3657 assert(s!=map);
3658 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3659 emit_addsr12(map,s,map);
3660 // Schedule this while we wait on the load
3661 //if(x) emit_xorimm(s,x,ar);
3662 if(shift>=0) emit_shlimm(s,3,shift);
3663 if(~a) emit_andimm(s,a,ar);
3664 emit_readword_dualindexedx4(FP,map,map);
3665 }
3666 return map;
3667}
3668int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3669{
3670 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3671 emit_test(map,map);
3672 *jaddr=(int)out;
3673 emit_js(0);
3674 }
3675 return map;
3676}
3677
3678int gen_tlb_addr_r(int ar, int map) {
3679 if(map>=0) {
3680 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3681 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3682 }
3683}
3684
3685int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3686{
3687 if(c) {
3688 if(addr<0x80800000||addr>=0xC0000000) {
3689 // address_generation already loaded the const
3690 emit_readword_dualindexedx4(FP,map,map);
3691 }
3692 else
3693 return -1; // No mapping
3694 }
3695 else {
3696 assert(s!=map);
3697 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3698 emit_addsr12(map,s,map);
3699 // Schedule this while we wait on the load
3700 //if(x) emit_xorimm(s,x,ar);
3701 emit_readword_dualindexedx4(FP,map,map);
3702 }
3703 return map;
3704}
3705int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3706{
3707 if(!c||addr<0x80800000||addr>=0xC0000000) {
3708 emit_testimm(map,0x40000000);
3709 *jaddr=(int)out;
3710 emit_jne(0);
3711 }
3712}
3713
3714int gen_tlb_addr_w(int ar, int map) {
3715 if(map>=0) {
3716 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3717 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3718 }
3719}
3720
3721// Generate the address of the memory_map entry, relative to dynarec_local
3722generate_map_const(u_int addr,int reg) {
3723 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3724 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3725}
3726
63cb0298 3727#else
3728
57ab9898 3729static int do_tlb_r(int a, ...) { return 0; }
3730static int do_tlb_r_branch(int a, ...) { return 0; }
3731static int gen_tlb_addr_r(int a, ...) { return 0; }
3732static int do_tlb_w(int a, ...) { return 0; }
3733static int do_tlb_w_branch(int a, ...) { return 0; }
3734static int gen_tlb_addr_w(int a, ...) { return 0; }
63cb0298 3735
3736#endif // DISABLE_TLB
3737
57871462 3738/* Special assem */
3739
3740void shift_assemble_arm(int i,struct regstat *i_regs)
3741{
3742 if(rt1[i]) {
3743 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3744 {
3745 signed char s,t,shift;
3746 t=get_reg(i_regs->regmap,rt1[i]);
3747 s=get_reg(i_regs->regmap,rs1[i]);
3748 shift=get_reg(i_regs->regmap,rs2[i]);
3749 if(t>=0){
3750 if(rs1[i]==0)
3751 {
3752 emit_zeroreg(t);
3753 }
3754 else if(rs2[i]==0)
3755 {
3756 assert(s>=0);
3757 if(s!=t) emit_mov(s,t);
3758 }
3759 else
3760 {
3761 emit_andimm(shift,31,HOST_TEMPREG);
3762 if(opcode2[i]==4) // SLLV
3763 {
3764 emit_shl(s,HOST_TEMPREG,t);
3765 }
3766 if(opcode2[i]==6) // SRLV
3767 {
3768 emit_shr(s,HOST_TEMPREG,t);
3769 }
3770 if(opcode2[i]==7) // SRAV
3771 {
3772 emit_sar(s,HOST_TEMPREG,t);
3773 }
3774 }
3775 }
3776 } else { // DSLLV/DSRLV/DSRAV
3777 signed char sh,sl,th,tl,shift;
3778 th=get_reg(i_regs->regmap,rt1[i]|64);
3779 tl=get_reg(i_regs->regmap,rt1[i]);
3780 sh=get_reg(i_regs->regmap,rs1[i]|64);
3781 sl=get_reg(i_regs->regmap,rs1[i]);
3782 shift=get_reg(i_regs->regmap,rs2[i]);
3783 if(tl>=0){
3784 if(rs1[i]==0)
3785 {
3786 emit_zeroreg(tl);
3787 if(th>=0) emit_zeroreg(th);
3788 }
3789 else if(rs2[i]==0)
3790 {
3791 assert(sl>=0);
3792 if(sl!=tl) emit_mov(sl,tl);
3793 if(th>=0&&sh!=th) emit_mov(sh,th);
3794 }
3795 else
3796 {
3797 // FIXME: What if shift==tl ?
3798 assert(shift!=tl);
3799 int temp=get_reg(i_regs->regmap,-1);
3800 int real_th=th;
3801 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3802 assert(sl>=0);
3803 assert(sh>=0);
3804 emit_andimm(shift,31,HOST_TEMPREG);
3805 if(opcode2[i]==0x14) // DSLLV
3806 {
3807 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3808 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3809 emit_orrshr(sl,HOST_TEMPREG,th);
3810 emit_andimm(shift,31,HOST_TEMPREG);
3811 emit_testimm(shift,32);
3812 emit_shl(sl,HOST_TEMPREG,tl);
3813 if(th>=0) emit_cmovne_reg(tl,th);
3814 emit_cmovne_imm(0,tl);
3815 }
3816 if(opcode2[i]==0x16) // DSRLV
3817 {
3818 assert(th>=0);
3819 emit_shr(sl,HOST_TEMPREG,tl);
3820 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3821 emit_orrshl(sh,HOST_TEMPREG,tl);
3822 emit_andimm(shift,31,HOST_TEMPREG);
3823 emit_testimm(shift,32);
3824 emit_shr(sh,HOST_TEMPREG,th);
3825 emit_cmovne_reg(th,tl);
3826 if(real_th>=0) emit_cmovne_imm(0,th);
3827 }
3828 if(opcode2[i]==0x17) // DSRAV
3829 {
3830 assert(th>=0);
3831 emit_shr(sl,HOST_TEMPREG,tl);
3832 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3833 if(real_th>=0) {
3834 assert(temp>=0);
3835 emit_sarimm(th,31,temp);
3836 }
3837 emit_orrshl(sh,HOST_TEMPREG,tl);
3838 emit_andimm(shift,31,HOST_TEMPREG);
3839 emit_testimm(shift,32);
3840 emit_sar(sh,HOST_TEMPREG,th);
3841 emit_cmovne_reg(th,tl);
3842 if(real_th>=0) emit_cmovne_reg(temp,th);
3843 }
3844 }
3845 }
3846 }
3847 }
3848}
ffb0b9e0 3849
3850#ifdef PCSX
3851static void speculate_mov(int rs,int rt)
3852{
3853 if(rt!=0) {
3854 smrv_strong_next|=1<<rt;
3855 smrv[rt]=smrv[rs];
3856 }
3857}
3858
3859static void speculate_mov_weak(int rs,int rt)
3860{
3861 if(rt!=0) {
3862 smrv_weak_next|=1<<rt;
3863 smrv[rt]=smrv[rs];
3864 }
3865}
3866
3867static void speculate_register_values(int i)
3868{
3869 if(i==0) {
3870 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3871 // gp,sp are likely to stay the same throughout the block
3872 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3873 smrv_weak_next=~smrv_strong_next;
3874 //printf(" llr %08x\n", smrv[4]);
3875 }
3876 smrv_strong=smrv_strong_next;
3877 smrv_weak=smrv_weak_next;
3878 switch(itype[i]) {
3879 case ALU:
3880 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3881 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3882 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3883 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3884 else {
3885 smrv_strong_next&=~(1<<rt1[i]);
3886 smrv_weak_next&=~(1<<rt1[i]);
3887 }
3888 break;
3889 case SHIFTIMM:
3890 smrv_strong_next&=~(1<<rt1[i]);
3891 smrv_weak_next&=~(1<<rt1[i]);
3892 // fallthrough
3893 case IMM16:
3894 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3895 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3896 if(hr>=0) {
3897 if(get_final_value(hr,i,&value))
3898 smrv[rt1[i]]=value;
3899 else smrv[rt1[i]]=constmap[i][hr];
3900 smrv_strong_next|=1<<rt1[i];
3901 }
3902 }
3903 else {
3904 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3905 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3906 }
3907 break;
3908 case LOAD:
3909 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3910 // special case for BIOS
3911 smrv[rt1[i]]=0xa0000000;
3912 smrv_strong_next|=1<<rt1[i];
3913 break;
3914 }
3915 // fallthrough
3916 case SHIFT:
3917 case LOADLR:
3918 case MOV:
3919 smrv_strong_next&=~(1<<rt1[i]);
3920 smrv_weak_next&=~(1<<rt1[i]);
3921 break;
3922 case COP0:
3923 case COP2:
3924 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3925 smrv_strong_next&=~(1<<rt1[i]);
3926 smrv_weak_next&=~(1<<rt1[i]);
3927 }
3928 break;
3929 case C2LS:
3930 if (opcode[i]==0x32) { // LWC2
3931 smrv_strong_next&=~(1<<rt1[i]);
3932 smrv_weak_next&=~(1<<rt1[i]);
3933 }
3934 break;
3935 }
3936#if 0
3937 int r=4;
3938 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3939 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3940#endif
3941}
3942
3943enum {
3944 MTYPE_8000 = 0,
3945 MTYPE_8020,
3946 MTYPE_0000,
3947 MTYPE_A000,
3948 MTYPE_1F80,
3949};
3950
3951static int get_ptr_mem_type(u_int a)
3952{
3953 if(a < 0x00200000) {
3954 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3955 // return wrong, must use memhandler for BIOS self-test to pass
3956 // 007 does similar stuff from a00 mirror, weird stuff
3957 return MTYPE_8000;
3958 return MTYPE_0000;
3959 }
3960 if(0x1f800000 <= a && a < 0x1f801000)
3961 return MTYPE_1F80;
3962 if(0x80200000 <= a && a < 0x80800000)
3963 return MTYPE_8020;
3964 if(0xa0000000 <= a && a < 0xa0200000)
3965 return MTYPE_A000;
3966 return MTYPE_8000;
3967}
3968#endif
3969
3970static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3971{
3972 int jaddr,type=0;
3973
3974#ifdef PCSX
3975 int mr=rs1[i];
3976 if(((smrv_strong|smrv_weak)>>mr)&1) {
3977 type=get_ptr_mem_type(smrv[mr]);
3978 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3979 }
3980 else {
3981 // use the mirror we are running on
3982 type=get_ptr_mem_type(start);
3983 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3984 }
3985
3986 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3987 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3988 addr=*addr_reg_override=HOST_TEMPREG;
3989 type=0;
3990 }
3991 else if(type==MTYPE_0000) { // RAM 0 mirror
3992 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3993 addr=*addr_reg_override=HOST_TEMPREG;
3994 type=0;
3995 }
3996 else if(type==MTYPE_A000) { // RAM A mirror
3997 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3998 addr=*addr_reg_override=HOST_TEMPREG;
3999 type=0;
4000 }
4001 else if(type==MTYPE_1F80) { // scratchpad
6d760c92 4002 if (psxH == (void *)0x1f800000) {
4003 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
4004 emit_cmpimm(HOST_TEMPREG,0x1000);
4005 jaddr=(int)out;
4006 emit_jc(0);
4007 }
4008 else {
4009 // do usual RAM check, jump will go to the right handler
4010 type=0;
4011 }
ffb0b9e0 4012 }
4013#endif
4014
4015 if(type==0)
4016 {
4017 emit_cmpimm(addr,RAM_SIZE);
4018 jaddr=(int)out;
4019 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4020 // Hint to branch predictor that the branch is unlikely to be taken
4021 if(rs1[i]>=28)
4022 emit_jno_unlikely(0);
4023 else
4024 #endif
4025 emit_jno(0);
a327ad27 4026 if(ram_offset!=0) {
4027 emit_addimm(addr,ram_offset,HOST_TEMPREG);
4028 addr=*addr_reg_override=HOST_TEMPREG;
4029 }
ffb0b9e0 4030 }
4031
4032 return jaddr;
4033}
4034
57871462 4035#define shift_assemble shift_assemble_arm
4036
4037void loadlr_assemble_arm(int i,struct regstat *i_regs)
4038{
4039 int s,th,tl,temp,temp2,addr,map=-1;
4040 int offset;
4041 int jaddr=0;
af4ee1fe 4042 int memtarget=0,c=0;
ffb0b9e0 4043 int fastload_reg_override=0;
57871462 4044 u_int hr,reglist=0;
4045 th=get_reg(i_regs->regmap,rt1[i]|64);
4046 tl=get_reg(i_regs->regmap,rt1[i]);
4047 s=get_reg(i_regs->regmap,rs1[i]);
4048 temp=get_reg(i_regs->regmap,-1);
4049 temp2=get_reg(i_regs->regmap,FTEMP);
4050 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
4051 assert(addr<0);
4052 offset=imm[i];
4053 for(hr=0;hr<HOST_REGS;hr++) {
4054 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4055 }
4056 reglist|=1<<temp;
4057 if(offset||s<0||c) addr=temp2;
4058 else addr=s;
4059 if(s>=0) {
4060 c=(i_regs->wasconst>>s)&1;
af4ee1fe 4061 if(c) {
4062 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
4063 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
4064 }
57871462 4065 }
535d208a 4066 if(!using_tlb) {
4067 if(!c) {
4068 #ifdef RAM_OFFSET
4069 map=get_reg(i_regs->regmap,ROREG);
4070 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
4071 #endif
4072 emit_shlimm(addr,3,temp);
4073 if (opcode[i]==0x22||opcode[i]==0x26) {
4074 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 4075 }else{
535d208a 4076 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 4077 }
ffb0b9e0 4078 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
535d208a 4079 }
4080 else {
a327ad27 4081 if(ram_offset&&memtarget) {
4082 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
4083 fastload_reg_override=HOST_TEMPREG;
4084 }
535d208a 4085 if (opcode[i]==0x22||opcode[i]==0x26) {
4086 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4087 }else{
4088 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4089 }
57871462 4090 }
535d208a 4091 }else{ // using tlb
4092 int a;
4093 if(c) {
4094 a=-1;
4095 }else if (opcode[i]==0x22||opcode[i]==0x26) {
4096 a=0xFFFFFFFC; // LWL/LWR
4097 }else{
4098 a=0xFFFFFFF8; // LDL/LDR
4099 }
4100 map=get_reg(i_regs->regmap,TLREG);
4101 assert(map>=0);
ea3d2e6e 4102 reglist&=~(1<<map);
535d208a 4103 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
4104 if(c) {
4105 if (opcode[i]==0x22||opcode[i]==0x26) {
4106 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4107 }else{
4108 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4109 }
535d208a 4110 }
4111 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
4112 }
4113 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
4114 if(!c||memtarget) {
ffb0b9e0 4115 int a=temp2;
4116 if(fastload_reg_override) a=fastload_reg_override;
535d208a 4117 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 4118 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 4119 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4120 }
4121 else
4122 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
4123 if(rt1[i]) {
4124 assert(tl>=0);
57871462 4125 emit_andimm(temp,24,temp);
2002a1db 4126#ifdef BIG_ENDIAN_MIPS
4127 if (opcode[i]==0x26) // LWR
4128#else
4129 if (opcode[i]==0x22) // LWL
4130#endif
4131 emit_xorimm(temp,24,temp);
57871462 4132 emit_movimm(-1,HOST_TEMPREG);
4133 if (opcode[i]==0x26) {
4134 emit_shr(temp2,temp,temp2);
4135 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
4136 }else{
4137 emit_shl(temp2,temp,temp2);
4138 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
4139 }
4140 emit_or(temp2,tl,tl);
57871462 4141 }
535d208a 4142 //emit_storereg(rt1[i],tl); // DEBUG
4143 }
4144 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 4145 // FIXME: little endian, fastload_reg_override
535d208a 4146 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
4147 if(!c||memtarget) {
4148 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
4149 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
4150 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
4151 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4152 }
4153 else
4154 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
4155 if(rt1[i]) {
4156 assert(th>=0);
4157 assert(tl>=0);
57871462 4158 emit_testimm(temp,32);
4159 emit_andimm(temp,24,temp);
4160 if (opcode[i]==0x1A) { // LDL
4161 emit_rsbimm(temp,32,HOST_TEMPREG);
4162 emit_shl(temp2h,temp,temp2h);
4163 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
4164 emit_movimm(-1,HOST_TEMPREG);
4165 emit_shl(temp2,temp,temp2);
4166 emit_cmove_reg(temp2h,th);
4167 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
4168 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
4169 emit_orreq(temp2,tl,tl);
4170 emit_orrne(temp2,th,th);
4171 }
4172 if (opcode[i]==0x1B) { // LDR
4173 emit_xorimm(temp,24,temp);
4174 emit_rsbimm(temp,32,HOST_TEMPREG);
4175 emit_shr(temp2,temp,temp2);
4176 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
4177 emit_movimm(-1,HOST_TEMPREG);
4178 emit_shr(temp2h,temp,temp2h);
4179 emit_cmovne_reg(temp2,tl);
4180 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
4181 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
4182 emit_orrne(temp2h,th,th);
4183 emit_orreq(temp2h,tl,tl);
4184 }
4185 }
4186 }
4187}
4188#define loadlr_assemble loadlr_assemble_arm
4189
4190void cop0_assemble(int i,struct regstat *i_regs)
4191{
4192 if(opcode2[i]==0) // MFC0
4193 {
4194 signed char t=get_reg(i_regs->regmap,rt1[i]);
4195 char copr=(source[i]>>11)&0x1f;
4196 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 4197 if(t>=0&&rt1[i]!=0) {
7139f3c8 4198#ifdef MUPEN64
57871462 4199 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4200 emit_movimm((source[i]>>11)&0x1f,1);
4201 emit_writeword(0,(int)&PC);
4202 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
4203 if(copr==9) {
4204 emit_readword((int)&last_count,ECX);
4205 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
4206 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4207 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4208 emit_writeword(HOST_CCREG,(int)&Count);
4209 }
4210 emit_call((int)MFC0);
4211 emit_readword((int)&readmem_dword,t);
7139f3c8 4212#else
4213 emit_readword((int)&reg_cop0+copr*4,t);
4214#endif
57871462 4215 }
4216 }
4217 else if(opcode2[i]==4) // MTC0
4218 {
4219 signed char s=get_reg(i_regs->regmap,rs1[i]);
4220 char copr=(source[i]>>11)&0x1f;
4221 assert(s>=0);
63cb0298 4222#ifdef MUPEN64
57871462 4223 emit_writeword(s,(int)&readmem_dword);
4224 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
4225 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4226 emit_movimm((source[i]>>11)&0x1f,1);
4227 emit_writeword(0,(int)&PC);
4228 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
63cb0298 4229#else
4230 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 4231#endif
4232 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 4233 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 4234 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 4235 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 4236 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4237 emit_writeword(HOST_CCREG,(int)&Count);
4238 }
4239 // What a mess. The status register (12) can enable interrupts,
4240 // so needs a special case to handle a pending interrupt.
4241 // The interrupt must be taken immediately, because a subsequent
4242 // instruction might disable interrupts again.
7139f3c8 4243 if(copr==12||copr==13) {
fca1aef2 4244#ifdef PCSX
4245 if (is_delayslot) {
4246 // burn cycles to cause cc_interrupt, which will
4247 // reschedule next_interupt. Relies on CCREG from above.
4248 assem_debug("MTC0 DS %d\n", copr);
4249 emit_writeword(HOST_CCREG,(int)&last_count);
4250 emit_movimm(0,HOST_CCREG);
4251 emit_storereg(CCREG,HOST_CCREG);
caeefe31 4252 emit_loadreg(rs1[i],1);
fca1aef2 4253 emit_movimm(copr,0);
4254 emit_call((int)pcsx_mtc0_ds);
042c7287 4255 emit_loadreg(rs1[i],s);
fca1aef2 4256 return;
4257 }
4258#endif
63cb0298 4259 emit_movimm(start+i*4+4,HOST_TEMPREG);
4260 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
4261 emit_movimm(0,HOST_TEMPREG);
4262 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 4263 }
4264 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
4265 //else
fca1aef2 4266#ifdef PCSX
caeefe31 4267 if(s==HOST_CCREG)
4268 emit_loadreg(rs1[i],1);
4269 else if(s!=1)
63cb0298 4270 emit_mov(s,1);
fca1aef2 4271 emit_movimm(copr,0);
4272 emit_call((int)pcsx_mtc0);
4273#else
57871462 4274 emit_call((int)MTC0);
fca1aef2 4275#endif
7139f3c8 4276 if(copr==9||copr==11||copr==12||copr==13) {
57871462 4277 emit_readword((int)&Count,HOST_CCREG);
042c7287 4278 emit_readword((int)&next_interupt,HOST_TEMPREG);
2573466a 4279 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 4280 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
4281 emit_writeword(HOST_TEMPREG,(int)&last_count);
57871462 4282 emit_storereg(CCREG,HOST_CCREG);
4283 }
7139f3c8 4284 if(copr==12||copr==13) {
57871462 4285 assert(!is_delayslot);
4286 emit_readword((int)&pending_exception,14);
042c7287 4287 emit_test(14,14);
4288 emit_jne((int)&do_interrupt);
57871462 4289 }
4290 emit_loadreg(rs1[i],s);
4291 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
4292 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 4293 cop1_usable=0;
4294 }
4295 else
4296 {
4297 assert(opcode2[i]==0x10);
3d624f89 4298#ifndef DISABLE_TLB
57871462 4299 if((source[i]&0x3f)==0x01) // TLBR
4300 emit_call((int)TLBR);
4301 if((source[i]&0x3f)==0x02) // TLBWI
4302 emit_call((int)TLBWI_new);
4303 if((source[i]&0x3f)==0x06) { // TLBWR
4304 // The TLB entry written by TLBWR is dependent on the count,
4305 // so update the cycle count
4306 emit_readword((int)&last_count,ECX);
4307 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
4308 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4309 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4310 emit_writeword(HOST_CCREG,(int)&Count);
4311 emit_call((int)TLBWR_new);
4312 }
4313 if((source[i]&0x3f)==0x08) // TLBP
4314 emit_call((int)TLBP);
3d624f89 4315#endif
576bbd8f 4316#ifdef PCSX
4317 if((source[i]&0x3f)==0x10) // RFE
4318 {
4319 emit_readword((int)&Status,0);
4320 emit_andimm(0,0x3c,1);
4321 emit_andimm(0,~0xf,0);
4322 emit_orrshr_imm(1,2,0);
4323 emit_writeword(0,(int)&Status);
4324 }
4325#else
57871462 4326 if((source[i]&0x3f)==0x18) // ERET
4327 {
4328 int count=ccadj[i];
4329 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2573466a 4330 emit_addimm(HOST_CCREG,CLOCK_ADJUST(count),HOST_CCREG); // TODO: Should there be an extra cycle here?
57871462 4331 emit_jmp((int)jump_eret);
4332 }
576bbd8f 4333#endif
57871462 4334 }
4335}
4336
b9b61529 4337static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
4338{
4339 switch (copr) {
4340 case 1:
4341 case 3:
4342 case 5:
4343 case 8:
4344 case 9:
4345 case 10:
4346 case 11:
4347 emit_readword((int)&reg_cop2d[copr],tl);
4348 emit_signextend16(tl,tl);
4349 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
4350 break;
4351 case 7:
4352 case 16:
4353 case 17:
4354 case 18:
4355 case 19:
4356 emit_readword((int)&reg_cop2d[copr],tl);
4357 emit_andimm(tl,0xffff,tl);
4358 emit_writeword(tl,(int)&reg_cop2d[copr]);
4359 break;
4360 case 15:
4361 emit_readword((int)&reg_cop2d[14],tl); // SXY2
4362 emit_writeword(tl,(int)&reg_cop2d[copr]);
4363 break;
4364 case 28:
b9b61529 4365 case 29:
4366 emit_readword((int)&reg_cop2d[9],temp);
4367 emit_testimm(temp,0x8000); // do we need this?
4368 emit_andimm(temp,0xf80,temp);
4369 emit_andne_imm(temp,0,temp);
f70d384d 4370 emit_shrimm(temp,7,tl);
b9b61529 4371 emit_readword((int)&reg_cop2d[10],temp);
4372 emit_testimm(temp,0x8000);
4373 emit_andimm(temp,0xf80,temp);
4374 emit_andne_imm(temp,0,temp);
f70d384d 4375 emit_orrshr_imm(temp,2,tl);
b9b61529 4376 emit_readword((int)&reg_cop2d[11],temp);
4377 emit_testimm(temp,0x8000);
4378 emit_andimm(temp,0xf80,temp);
4379 emit_andne_imm(temp,0,temp);
f70d384d 4380 emit_orrshl_imm(temp,3,tl);
b9b61529 4381 emit_writeword(tl,(int)&reg_cop2d[copr]);
4382 break;
4383 default:
4384 emit_readword((int)&reg_cop2d[copr],tl);
4385 break;
4386 }
4387}
4388
4389static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
4390{
4391 switch (copr) {
4392 case 15:
4393 emit_readword((int)&reg_cop2d[13],temp); // SXY1
4394 emit_writeword(sl,(int)&reg_cop2d[copr]);
4395 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
4396 emit_readword((int)&reg_cop2d[14],temp); // SXY2
4397 emit_writeword(sl,(int)&reg_cop2d[14]);
4398 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
4399 break;
4400 case 28:
4401 emit_andimm(sl,0x001f,temp);
f70d384d 4402 emit_shlimm(temp,7,temp);
b9b61529 4403 emit_writeword(temp,(int)&reg_cop2d[9]);
4404 emit_andimm(sl,0x03e0,temp);
f70d384d 4405 emit_shlimm(temp,2,temp);
b9b61529 4406 emit_writeword(temp,(int)&reg_cop2d[10]);
4407 emit_andimm(sl,0x7c00,temp);
f70d384d 4408 emit_shrimm(temp,3,temp);
b9b61529 4409 emit_writeword(temp,(int)&reg_cop2d[11]);
4410 emit_writeword(sl,(int)&reg_cop2d[28]);
4411 break;
4412 case 30:
4413 emit_movs(sl,temp);
4414 emit_mvnmi(temp,temp);
665f33e1 4415#ifdef HAVE_ARMV5
b9b61529 4416 emit_clz(temp,temp);
665f33e1 4417#else
4418 emit_movs(temp,HOST_TEMPREG);
4419 emit_movimm(0,temp);
4420 emit_jeq((int)out+4*4);
4421 emit_addpl_imm(temp,1,temp);
4422 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
4423 emit_jns((int)out-2*4);
4424#endif
b9b61529 4425 emit_writeword(sl,(int)&reg_cop2d[30]);
4426 emit_writeword(temp,(int)&reg_cop2d[31]);
4427 break;
b9b61529 4428 case 31:
4429 break;
4430 default:
4431 emit_writeword(sl,(int)&reg_cop2d[copr]);
4432 break;
4433 }
4434}
4435
4436void cop2_assemble(int i,struct regstat *i_regs)
4437{
4438 u_int copr=(source[i]>>11)&0x1f;
4439 signed char temp=get_reg(i_regs->regmap,-1);
4440 if (opcode2[i]==0) { // MFC2
4441 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4442 if(tl>=0&&rt1[i]!=0)
b9b61529 4443 cop2_get_dreg(copr,tl,temp);
4444 }
4445 else if (opcode2[i]==4) { // MTC2
4446 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4447 cop2_put_dreg(copr,sl,temp);
4448 }
4449 else if (opcode2[i]==2) // CFC2
4450 {
4451 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4452 if(tl>=0&&rt1[i]!=0)
b9b61529 4453 emit_readword((int)&reg_cop2c[copr],tl);
4454 }
4455 else if (opcode2[i]==6) // CTC2
4456 {
4457 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4458 switch(copr) {
4459 case 4:
4460 case 12:
4461 case 20:
4462 case 26:
4463 case 27:
4464 case 29:
4465 case 30:
4466 emit_signextend16(sl,temp);
4467 break;
4468 case 31:
4469 //value = value & 0x7ffff000;
4470 //if (value & 0x7f87e000) value |= 0x80000000;
4471 emit_shrimm(sl,12,temp);
4472 emit_shlimm(temp,12,temp);
4473 emit_testimm(temp,0x7f000000);
4474 emit_testeqimm(temp,0x00870000);
4475 emit_testeqimm(temp,0x0000e000);
4476 emit_orrne_imm(temp,0x80000000,temp);
4477 break;
4478 default:
4479 temp=sl;
4480 break;
4481 }
4482 emit_writeword(temp,(int)&reg_cop2c[copr]);
4483 assert(sl>=0);
4484 }
4485}
4486
054175e9 4487static void c2op_prologue(u_int op,u_int reglist)
4488{
4489 save_regs_all(reglist);
82ed88eb 4490#ifdef PCNT
4491 emit_movimm(op,0);
4492 emit_call((int)pcnt_gte_start);
4493#endif
054175e9 4494 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
4495}
4496
4497static void c2op_epilogue(u_int op,u_int reglist)
4498{
82ed88eb 4499#ifdef PCNT
4500 emit_movimm(op,0);
4501 emit_call((int)pcnt_gte_end);
4502#endif
054175e9 4503 restore_regs_all(reglist);
4504}
4505
6c0eefaf 4506static void c2op_call_MACtoIR(int lm,int need_flags)
4507{
4508 if(need_flags)
4509 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
4510 else
4511 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
4512}
4513
4514static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
4515{
4516 emit_call((int)func);
4517 // func is C code and trashes r0
4518 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4519 if(need_flags||need_ir)
4520 c2op_call_MACtoIR(lm,need_flags);
4521 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
4522}
4523
054175e9 4524static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 4525{
4526 signed char temp=get_reg(i_regs->regmap,-1);
4527 u_int c2op=source[i]&0x3f;
6c0eefaf 4528 u_int hr,reglist_full=0,reglist;
054175e9 4529 int need_flags,need_ir;
b9b61529 4530 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 4531 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 4532 }
4d646738 4533 reglist=reglist_full&CALLER_SAVE_REGS;
b9b61529 4534
4535 if (gte_handlers[c2op]!=NULL) {
bedfea38 4536 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 4537 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 4538 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
4539 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 4540 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
4541 need_flags=0;
6c0eefaf 4542 int shift = (source[i] >> 19) & 1;
4543 int lm = (source[i] >> 10) & 1;
054175e9 4544 switch(c2op) {
19776aef 4545#ifndef DRC_DBG
054175e9 4546 case GTE_MVMVA: {
82336ba3 4547#ifdef HAVE_ARMV5
054175e9 4548 int v = (source[i] >> 15) & 3;
4549 int cv = (source[i] >> 13) & 3;
4550 int mx = (source[i] >> 17) & 3;
4d646738 4551 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
054175e9 4552 c2op_prologue(c2op,reglist);
4553 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
4554 if(v<3)
4555 emit_ldrd(v*8,0,4);
4556 else {
4557 emit_movzwl_indexed(9*4,0,4); // gteIR
4558 emit_movzwl_indexed(10*4,0,6);
4559 emit_movzwl_indexed(11*4,0,5);
4560 emit_orrshl_imm(6,16,4);
4561 }
4562 if(mx<3)
4563 emit_addimm(0,32*4+mx*8*4,6);
4564 else
4565 emit_readword((int)&zeromem_ptr,6);
4566 if(cv<3)
4567 emit_addimm(0,32*4+(cv*8+5)*4,7);
4568 else
4569 emit_readword((int)&zeromem_ptr,7);
4570#ifdef __ARM_NEON__
4571 emit_movimm(source[i],1); // opcode
4572 emit_call((int)gteMVMVA_part_neon);
4573 if(need_flags) {
4574 emit_movimm(lm,1);
4575 emit_call((int)gteMACtoIR_flags_neon);
4576 }
4577#else
4578 if(cv==3&&shift)
4579 emit_call((int)gteMVMVA_part_cv3sh12_arm);
4580 else {
4581 emit_movimm(shift,1);
4582 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
4583 }
6c0eefaf 4584 if(need_flags||need_ir)
4585 c2op_call_MACtoIR(lm,need_flags);
82336ba3 4586#endif
4587#else /* if not HAVE_ARMV5 */
4588 c2op_prologue(c2op,reglist);
4589 emit_movimm(source[i],1); // opcode
4590 emit_writeword(1,(int)&psxRegs.code);
4591 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
054175e9 4592#endif
4593 break;
4594 }
6c0eefaf 4595 case GTE_OP:
4596 c2op_prologue(c2op,reglist);
4597 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
4598 if(need_flags||need_ir) {
4599 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4600 c2op_call_MACtoIR(lm,need_flags);
4601 }
4602 break;
4603 case GTE_DPCS:
4604 c2op_prologue(c2op,reglist);
4605 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
4606 break;
4607 case GTE_INTPL:
4608 c2op_prologue(c2op,reglist);
4609 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
4610 break;
4611 case GTE_SQR:
4612 c2op_prologue(c2op,reglist);
4613 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
4614 if(need_flags||need_ir) {
4615 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4616 c2op_call_MACtoIR(lm,need_flags);
4617 }
4618 break;
4619 case GTE_DCPL:
4620 c2op_prologue(c2op,reglist);
4621 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
4622 break;
4623 case GTE_GPF:
4624 c2op_prologue(c2op,reglist);
4625 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
4626 break;
4627 case GTE_GPL:
4628 c2op_prologue(c2op,reglist);
4629 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
4630 break;
19776aef 4631#endif
054175e9 4632 default:
054175e9 4633 c2op_prologue(c2op,reglist);
19776aef 4634#ifdef DRC_DBG
4635 emit_movimm(source[i],1); // opcode
4636 emit_writeword(1,(int)&psxRegs.code);
4637#endif
054175e9 4638 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
4639 break;
4640 }
4641 c2op_epilogue(c2op,reglist);
4642 }
b9b61529 4643}
4644
4645void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 4646{
4647 // XXX: should just just do the exception instead
4648 if(!cop1_usable) {
4649 int jaddr=(int)out;
4650 emit_jmp(0);
4651 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
4652 cop1_usable=1;
4653 }
4654}
4655
57871462 4656void cop1_assemble(int i,struct regstat *i_regs)
4657{
3d624f89 4658#ifndef DISABLE_COP1
57871462 4659 // Check cop1 unusable
4660 if(!cop1_usable) {
4661 signed char rs=get_reg(i_regs->regmap,CSREG);
4662 assert(rs>=0);
4663 emit_testimm(rs,0x20000000);
4664 int jaddr=(int)out;
4665 emit_jeq(0);
4666 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4667 cop1_usable=1;
4668 }
4669 if (opcode2[i]==0) { // MFC1
4670 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4671 if(tl>=0) {
4672 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
4673 emit_readword_indexed(0,tl,tl);
4674 }
4675 }
4676 else if (opcode2[i]==1) { // DMFC1
4677 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4678 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
4679 if(tl>=0) {
4680 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
4681 if(th>=0) emit_readword_indexed(4,tl,th);
4682 emit_readword_indexed(0,tl,tl);
4683 }
4684 }
4685 else if (opcode2[i]==4) { // MTC1
4686 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4687 signed char temp=get_reg(i_regs->regmap,-1);
4688 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4689 emit_writeword_indexed(sl,0,temp);
4690 }
4691 else if (opcode2[i]==5) { // DMTC1
4692 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4693 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
4694 signed char temp=get_reg(i_regs->regmap,-1);
4695 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4696 emit_writeword_indexed(sh,4,temp);
4697 emit_writeword_indexed(sl,0,temp);
4698 }
4699 else if (opcode2[i]==2) // CFC1
4700 {
4701 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4702 if(tl>=0) {
4703 u_int copr=(source[i]>>11)&0x1f;
4704 if(copr==0) emit_readword((int)&FCR0,tl);
4705 if(copr==31) emit_readword((int)&FCR31,tl);
4706 }
4707 }
4708 else if (opcode2[i]==6) // CTC1
4709 {
4710 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4711 u_int copr=(source[i]>>11)&0x1f;
4712 assert(sl>=0);
4713 if(copr==31)
4714 {
4715 emit_writeword(sl,(int)&FCR31);
4716 // Set the rounding mode
4717 //FIXME
4718 //char temp=get_reg(i_regs->regmap,-1);
4719 //emit_andimm(sl,3,temp);
4720 //emit_fldcw_indexed((int)&rounding_modes,temp);
4721 }
4722 }
3d624f89 4723#else
4724 cop1_unusable(i, i_regs);
4725#endif
57871462 4726}
4727
4728void fconv_assemble_arm(int i,struct regstat *i_regs)
4729{
3d624f89 4730#ifndef DISABLE_COP1
57871462 4731 signed char temp=get_reg(i_regs->regmap,-1);
4732 assert(temp>=0);
4733 // Check cop1 unusable
4734 if(!cop1_usable) {
4735 signed char rs=get_reg(i_regs->regmap,CSREG);
4736 assert(rs>=0);
4737 emit_testimm(rs,0x20000000);
4738 int jaddr=(int)out;
4739 emit_jeq(0);
4740 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4741 cop1_usable=1;
4742 }
4743
4744 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4745 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
4746 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4747 emit_flds(temp,15);
4748 emit_ftosizs(15,15); // float->int, truncate
4749 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4750 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4751 emit_fsts(15,temp);
4752 return;
4753 }
4754 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
4755 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4756 emit_vldr(temp,7);
4757 emit_ftosizd(7,13); // double->int, truncate
4758 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4759 emit_fsts(13,temp);
4760 return;
4761 }
4762
4763 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
4764 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4765 emit_flds(temp,13);
4766 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4767 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4768 emit_fsitos(13,15);
4769 emit_fsts(15,temp);
4770 return;
4771 }
4772 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
4773 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4774 emit_flds(temp,13);
4775 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4776 emit_fsitod(13,7);
4777 emit_vstr(7,temp);
4778 return;
4779 }
4780
4781 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
4782 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4783 emit_flds(temp,13);
4784 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4785 emit_fcvtds(13,7);
4786 emit_vstr(7,temp);
4787 return;
4788 }
4789 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
4790 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4791 emit_vldr(temp,7);
4792 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4793 emit_fcvtsd(7,13);
4794 emit_fsts(13,temp);
4795 return;
4796 }
4797 #endif
4798
4799 // C emulation code
4800
4801 u_int hr,reglist=0;
4802 for(hr=0;hr<HOST_REGS;hr++) {
4803 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4804 }
4805 save_regs(reglist);
4806
4807 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
4808 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4809 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4810 emit_call((int)cvt_s_w);
4811 }
4812 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
4813 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4814 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4815 emit_call((int)cvt_d_w);
4816 }
4817 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
4818 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4819 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4820 emit_call((int)cvt_s_l);
4821 }
4822 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
4823 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4824 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4825 emit_call((int)cvt_d_l);
4826 }
4827
4828 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
4829 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4830 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4831 emit_call((int)cvt_d_s);
4832 }
4833 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4834 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4835 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4836 emit_call((int)cvt_w_s);
4837 }
4838 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4839 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4840 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4841 emit_call((int)cvt_l_s);
4842 }
4843
4844 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4845 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4846 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4847 emit_call((int)cvt_s_d);
4848 }
4849 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4850 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4851 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4852 emit_call((int)cvt_w_d);
4853 }
4854 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4855 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4856 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4857 emit_call((int)cvt_l_d);
4858 }
4859
4860 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4861 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4862 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4863 emit_call((int)round_l_s);
4864 }
4865 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4866 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4867 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4868 emit_call((int)trunc_l_s);
4869 }
4870 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4871 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4872 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4873 emit_call((int)ceil_l_s);
4874 }
4875 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4876 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4877 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4878 emit_call((int)floor_l_s);
4879 }
4880 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4881 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4882 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4883 emit_call((int)round_w_s);
4884 }
4885 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4886 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4887 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4888 emit_call((int)trunc_w_s);
4889 }
4890 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4891 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4892 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4893 emit_call((int)ceil_w_s);
4894 }
4895 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4896 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4897 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4898 emit_call((int)floor_w_s);
4899 }
4900
4901 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4902 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4903 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4904 emit_call((int)round_l_d);
4905 }
4906 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4907 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4908 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4909 emit_call((int)trunc_l_d);
4910 }
4911 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4912 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4913 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4914 emit_call((int)ceil_l_d);
4915 }
4916 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4917 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4918 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4919 emit_call((int)floor_l_d);
4920 }
4921 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4922 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4923 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4924 emit_call((int)round_w_d);
4925 }
4926 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4927 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4928 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4929 emit_call((int)trunc_w_d);
4930 }
4931 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4932 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4933 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4934 emit_call((int)ceil_w_d);
4935 }
4936 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4937 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4938 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4939 emit_call((int)floor_w_d);
4940 }
4941
4942 restore_regs(reglist);
3d624f89 4943#else
4944 cop1_unusable(i, i_regs);
4945#endif
57871462 4946}
4947#define fconv_assemble fconv_assemble_arm
4948
4949void fcomp_assemble(int i,struct regstat *i_regs)
4950{
3d624f89 4951#ifndef DISABLE_COP1
57871462 4952 signed char fs=get_reg(i_regs->regmap,FSREG);
4953 signed char temp=get_reg(i_regs->regmap,-1);
4954 assert(temp>=0);
4955 // Check cop1 unusable
4956 if(!cop1_usable) {
4957 signed char cs=get_reg(i_regs->regmap,CSREG);
4958 assert(cs>=0);
4959 emit_testimm(cs,0x20000000);
4960 int jaddr=(int)out;
4961 emit_jeq(0);
4962 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4963 cop1_usable=1;
4964 }
4965
4966 if((source[i]&0x3f)==0x30) {
4967 emit_andimm(fs,~0x800000,fs);
4968 return;
4969 }
4970
4971 if((source[i]&0x3e)==0x38) {
4972 // sf/ngle - these should throw exceptions for NaNs
4973 emit_andimm(fs,~0x800000,fs);
4974 return;
4975 }
4976
4977 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4978 if(opcode2[i]==0x10) {
4979 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4980 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4981 emit_orimm(fs,0x800000,fs);
4982 emit_flds(temp,14);
4983 emit_flds(HOST_TEMPREG,15);
4984 emit_fcmps(14,15);
4985 emit_fmstat();
4986 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4987 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4988 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4989 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4990 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4991 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4992 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4993 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4994 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4995 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4996 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4997 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4998 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4999 return;
5000 }
5001 if(opcode2[i]==0x11) {
5002 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5003 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
5004 emit_orimm(fs,0x800000,fs);
5005 emit_vldr(temp,6);
5006 emit_vldr(HOST_TEMPREG,7);
5007 emit_fcmpd(6,7);
5008 emit_fmstat();
5009 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
5010 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
5011 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
5012 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
5013 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
5014 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
5015 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
5016 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
5017 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
5018 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
5019 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
5020 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
5021 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
5022 return;
5023 }
5024 #endif
5025
5026 // C only
5027
5028 u_int hr,reglist=0;
5029 for(hr=0;hr<HOST_REGS;hr++) {
5030 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
5031 }
5032 reglist&=~(1<<fs);
5033 save_regs(reglist);
5034 if(opcode2[i]==0x10) {
5035 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
5036 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
5037 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
5038 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
5039 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
5040 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
5041 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
5042 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
5043 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
5044 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
5045 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
5046 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
5047 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
5048 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
5049 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
5050 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
5051 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
5052 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
5053 }
5054 if(opcode2[i]==0x11) {
5055 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5056 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5057 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
5058 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
5059 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
5060 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
5061 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
5062 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
5063 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
5064 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
5065 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
5066 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
5067 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
5068 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
5069 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
5070 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
5071 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
5072 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
5073 }
5074 restore_regs(reglist);
5075 emit_loadreg(FSREG,fs);
3d624f89 5076#else
5077 cop1_unusable(i, i_regs);
5078#endif
57871462 5079}
5080
5081void float_assemble(int i,struct regstat *i_regs)
5082{
3d624f89 5083#ifndef DISABLE_COP1
57871462 5084 signed char temp=get_reg(i_regs->regmap,-1);
5085 assert(temp>=0);
5086 // Check cop1 unusable
5087 if(!cop1_usable) {
5088 signed char cs=get_reg(i_regs->regmap,CSREG);
5089 assert(cs>=0);
5090 emit_testimm(cs,0x20000000);
5091 int jaddr=(int)out;
5092 emit_jeq(0);
5093 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
5094 cop1_usable=1;
5095 }
5096
5097 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
5098 if((source[i]&0x3f)==6) // mov
5099 {
5100 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5101 if(opcode2[i]==0x10) {
5102 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5103 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
5104 emit_readword_indexed(0,temp,temp);
5105 emit_writeword_indexed(temp,0,HOST_TEMPREG);
5106 }
5107 if(opcode2[i]==0x11) {
5108 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5109 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
5110 emit_vldr(temp,7);
5111 emit_vstr(7,HOST_TEMPREG);
5112 }
5113 }
5114 return;
5115 }
5116
5117 if((source[i]&0x3f)>3)
5118 {
5119 if(opcode2[i]==0x10) {
5120 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5121 emit_flds(temp,15);
5122 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5123 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5124 }
5125 if((source[i]&0x3f)==4) // sqrt
5126 emit_fsqrts(15,15);
5127 if((source[i]&0x3f)==5) // abs
5128 emit_fabss(15,15);
5129 if((source[i]&0x3f)==7) // neg
5130 emit_fnegs(15,15);
5131 emit_fsts(15,temp);
5132 }
5133 if(opcode2[i]==0x11) {
5134 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5135 emit_vldr(temp,7);
5136 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5137 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5138 }
5139 if((source[i]&0x3f)==4) // sqrt
5140 emit_fsqrtd(7,7);
5141 if((source[i]&0x3f)==5) // abs
5142 emit_fabsd(7,7);
5143 if((source[i]&0x3f)==7) // neg
5144 emit_fnegd(7,7);
5145 emit_vstr(7,temp);
5146 }
5147 return;
5148 }
5149 if((source[i]&0x3f)<4)
5150 {
5151 if(opcode2[i]==0x10) {
5152 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5153 }
5154 if(opcode2[i]==0x11) {
5155 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5156 }
5157 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
5158 if(opcode2[i]==0x10) {
5159 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
5160 emit_flds(temp,15);
5161 emit_flds(HOST_TEMPREG,13);
5162 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5163 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5164 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5165 }
5166 }
5167 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
5168 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
5169 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
5170 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
5171 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5172 emit_fsts(15,HOST_TEMPREG);
5173 }else{
5174 emit_fsts(15,temp);
5175 }
5176 }
5177 else if(opcode2[i]==0x11) {
5178 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
5179 emit_vldr(temp,7);
5180 emit_vldr(HOST_TEMPREG,6);
5181 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5182 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5183 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5184 }
5185 }
5186 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
5187 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
5188 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
5189 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
5190 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5191 emit_vstr(7,HOST_TEMPREG);
5192 }else{
5193 emit_vstr(7,temp);
5194 }
5195 }
5196 }
5197 else {
5198 if(opcode2[i]==0x10) {
5199 emit_flds(temp,15);
5200 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5201 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5202 }
5203 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
5204 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
5205 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
5206 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
5207 emit_fsts(15,temp);
5208 }
5209 else if(opcode2[i]==0x11) {
5210 emit_vldr(temp,7);
5211 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5212 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5213 }
5214 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
5215 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
5216 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
5217 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
5218 emit_vstr(7,temp);
5219 }
5220 }
5221 return;
5222 }
5223 #endif
5224
5225 u_int hr,reglist=0;
5226 for(hr=0;hr<HOST_REGS;hr++) {
5227 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
5228 }
5229 if(opcode2[i]==0x10) { // Single precision
5230 save_regs(reglist);
5231 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
5232 if((source[i]&0x3f)<4) {
5233 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
5234 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
5235 }else{
5236 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
5237 }
5238 switch(source[i]&0x3f)
5239 {
5240 case 0x00: emit_call((int)add_s);break;
5241 case 0x01: emit_call((int)sub_s);break;
5242 case 0x02: emit_call((int)mul_s);break;
5243 case 0x03: emit_call((int)div_s);break;
5244 case 0x04: emit_call((int)sqrt_s);break;
5245 case 0x05: emit_call((int)abs_s);break;
5246 case 0x06: emit_call((int)mov_s);break;
5247 case 0x07: emit_call((int)neg_s);break;
5248 }
5249 restore_regs(reglist);
5250 }
5251 if(opcode2[i]==0x11) { // Double precision
5252 save_regs(reglist);
5253 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5254 if((source[i]&0x3f)<4) {
5255 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5256 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
5257 }else{
5258 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
5259 }
5260 switch(source[i]&0x3f)
5261 {
5262 case 0x00: emit_call((int)add_d);break;
5263 case 0x01: emit_call((int)sub_d);break;
5264 case 0x02: emit_call((int)mul_d);break;
5265 case 0x03: emit_call((int)div_d);break;
5266 case 0x04: emit_call((int)sqrt_d);break;
5267 case 0x05: emit_call((int)abs_d);break;
5268 case 0x06: emit_call((int)mov_d);break;
5269 case 0x07: emit_call((int)neg_d);break;
5270 }
5271 restore_regs(reglist);
5272 }
3d624f89 5273#else
5274 cop1_unusable(i, i_regs);
5275#endif
57871462 5276}
5277
5278void multdiv_assemble_arm(int i,struct regstat *i_regs)
5279{
5280 // case 0x18: MULT
5281 // case 0x19: MULTU
5282 // case 0x1A: DIV
5283 // case 0x1B: DIVU
5284 // case 0x1C: DMULT
5285 // case 0x1D: DMULTU
5286 // case 0x1E: DDIV
5287 // case 0x1F: DDIVU
5288 if(rs1[i]&&rs2[i])
5289 {
5290 if((opcode2[i]&4)==0) // 32-bit
5291 {
5292 if(opcode2[i]==0x18) // MULT
5293 {
5294 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5295 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5296 signed char hi=get_reg(i_regs->regmap,HIREG);
5297 signed char lo=get_reg(i_regs->regmap,LOREG);
5298 assert(m1>=0);
5299 assert(m2>=0);
5300 assert(hi>=0);
5301 assert(lo>=0);
5302 emit_smull(m1,m2,hi,lo);
5303 }
5304 if(opcode2[i]==0x19) // MULTU
5305 {
5306 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5307 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5308 signed char hi=get_reg(i_regs->regmap,HIREG);
5309 signed char lo=get_reg(i_regs->regmap,LOREG);
5310 assert(m1>=0);
5311 assert(m2>=0);
5312 assert(hi>=0);
5313 assert(lo>=0);
5314 emit_umull(m1,m2,hi,lo);
5315 }
5316 if(opcode2[i]==0x1A) // DIV
5317 {
5318 signed char d1=get_reg(i_regs->regmap,rs1[i]);
5319 signed char d2=get_reg(i_regs->regmap,rs2[i]);
5320 assert(d1>=0);
5321 assert(d2>=0);
5322 signed char quotient=get_reg(i_regs->regmap,LOREG);
5323 signed char remainder=get_reg(i_regs->regmap,HIREG);
5324 assert(quotient>=0);
5325 assert(remainder>=0);
5326 emit_movs(d1,remainder);
44a80f6a 5327 emit_movimm(0xffffffff,quotient);
5328 emit_negmi(quotient,quotient); // .. quotient and ..
5329 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 5330 emit_movs(d2,HOST_TEMPREG);
5331 emit_jeq((int)out+52); // Division by zero
82336ba3 5332 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 5333#ifdef HAVE_ARMV5
57871462 5334 emit_clz(HOST_TEMPREG,quotient);
5335 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 5336#else
5337 emit_movimm(0,quotient);
5338 emit_addpl_imm(quotient,1,quotient);
5339 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
5340 emit_jns((int)out-2*4);
5341#endif
57871462 5342 emit_orimm(quotient,1<<31,quotient);
5343 emit_shr(quotient,quotient,quotient);
5344 emit_cmp(remainder,HOST_TEMPREG);
5345 emit_subcs(remainder,HOST_TEMPREG,remainder);
5346 emit_adcs(quotient,quotient,quotient);
5347 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
5348 emit_jcc((int)out-16); // -4
5349 emit_teq(d1,d2);
5350 emit_negmi(quotient,quotient);
5351 emit_test(d1,d1);
5352 emit_negmi(remainder,remainder);
5353 }
5354 if(opcode2[i]==0x1B) // DIVU
5355 {
5356 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
5357 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
5358 assert(d1>=0);
5359 assert(d2>=0);
5360 signed char quotient=get_reg(i_regs->regmap,LOREG);
5361 signed char remainder=get_reg(i_regs->regmap,HIREG);
5362 assert(quotient>=0);
5363 assert(remainder>=0);
44a80f6a 5364 emit_mov(d1,remainder);
5365 emit_movimm(0xffffffff,quotient); // div0 case
57871462 5366 emit_test(d2,d2);
44a80f6a 5367 emit_jeq((int)out+40); // Division by zero
665f33e1 5368#ifdef HAVE_ARMV5
57871462 5369 emit_clz(d2,HOST_TEMPREG);
5370 emit_movimm(1<<31,quotient);
5371 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 5372#else
5373 emit_movimm(0,HOST_TEMPREG);
82336ba3 5374 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
5375 emit_lslpls_imm(d2,1,d2);
665f33e1 5376 emit_jns((int)out-2*4);
5377 emit_movimm(1<<31,quotient);
5378#endif
57871462 5379 emit_shr(quotient,HOST_TEMPREG,quotient);
5380 emit_cmp(remainder,d2);
5381 emit_subcs(remainder,d2,remainder);
5382 emit_adcs(quotient,quotient,quotient);
5383 emit_shrcc_imm(d2,1,d2);
5384 emit_jcc((int)out-16); // -4
5385 }
5386 }
5387 else // 64-bit
4600ba03 5388#ifndef FORCE32
57871462 5389 {
5390 if(opcode2[i]==0x1C) // DMULT
5391 {
5392 assert(opcode2[i]!=0x1C);
5393 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5394 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5395 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5396 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5397 assert(m1h>=0);
5398 assert(m2h>=0);
5399 assert(m1l>=0);
5400 assert(m2l>=0);
5401 emit_pushreg(m2h);
5402 emit_pushreg(m2l);
5403 emit_pushreg(m1h);
5404 emit_pushreg(m1l);
5405 emit_call((int)&mult64);
5406 emit_popreg(m1l);
5407 emit_popreg(m1h);
5408 emit_popreg(m2l);
5409 emit_popreg(m2h);
5410 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5411 signed char hil=get_reg(i_regs->regmap,HIREG);
5412 if(hih>=0) emit_loadreg(HIREG|64,hih);
5413 if(hil>=0) emit_loadreg(HIREG,hil);
5414 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5415 signed char lol=get_reg(i_regs->regmap,LOREG);
5416 if(loh>=0) emit_loadreg(LOREG|64,loh);
5417 if(lol>=0) emit_loadreg(LOREG,lol);
5418 }
5419 if(opcode2[i]==0x1D) // DMULTU
5420 {
5421 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5422 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5423 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5424 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5425 assert(m1h>=0);
5426 assert(m2h>=0);
5427 assert(m1l>=0);
5428 assert(m2l>=0);
4d646738 5429 save_regs(CALLER_SAVE_REGS);
57871462 5430 if(m1l!=0) emit_mov(m1l,0);
5431 if(m1h==0) emit_readword((int)&dynarec_local,1);
5432 else if(m1h>1) emit_mov(m1h,1);
5433 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
5434 else if(m2l>2) emit_mov(m2l,2);
5435 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
5436 else if(m2h>3) emit_mov(m2h,3);
5437 emit_call((int)&multu64);
4d646738 5438 restore_regs(CALLER_SAVE_REGS);
57871462 5439 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5440 signed char hil=get_reg(i_regs->regmap,HIREG);
5441 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5442 signed char lol=get_reg(i_regs->regmap,LOREG);
5443 /*signed char temp=get_reg(i_regs->regmap,-1);
5444 signed char rh=get_reg(i_regs->regmap,HIREG|64);
5445 signed char rl=get_reg(i_regs->regmap,HIREG);
5446 assert(m1h>=0);
5447 assert(m2h>=0);
5448 assert(m1l>=0);
5449 assert(m2l>=0);
5450 assert(temp>=0);
5451 //emit_mov(m1l,EAX);
5452 //emit_mul(m2l);
5453 emit_umull(rl,rh,m1l,m2l);
5454 emit_storereg(LOREG,rl);
5455 emit_mov(rh,temp);
5456 //emit_mov(m1h,EAX);
5457 //emit_mul(m2l);
5458 emit_umull(rl,rh,m1h,m2l);
5459 emit_adds(rl,temp,temp);
5460 emit_adcimm(rh,0,rh);
5461 emit_storereg(HIREG,rh);
5462 //emit_mov(m2h,EAX);
5463 //emit_mul(m1l);
5464 emit_umull(rl,rh,m1l,m2h);
5465 emit_adds(rl,temp,temp);
5466 emit_adcimm(rh,0,rh);
5467 emit_storereg(LOREG|64,temp);
5468 emit_mov(rh,temp);
5469 //emit_mov(m2h,EAX);
5470 //emit_mul(m1h);
5471 emit_umull(rl,rh,m1h,m2h);
5472 emit_adds(rl,temp,rl);
5473 emit_loadreg(HIREG,temp);
5474 emit_adcimm(rh,0,rh);
5475 emit_adds(rl,temp,rl);
5476 emit_adcimm(rh,0,rh);
5477 // DEBUG
5478 /*
5479 emit_pushreg(m2h);
5480 emit_pushreg(m2l);
5481 emit_pushreg(m1h);
5482 emit_pushreg(m1l);
5483 emit_call((int)&multu64);
5484 emit_popreg(m1l);
5485 emit_popreg(m1h);
5486 emit_popreg(m2l);
5487 emit_popreg(m2h);
5488 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5489 signed char hil=get_reg(i_regs->regmap,HIREG);
5490 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
5491 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
5492 */
5493 // Shouldn't be necessary
5494 //char loh=get_reg(i_regs->regmap,LOREG|64);
5495 //char lol=get_reg(i_regs->regmap,LOREG);
5496 //if(loh>=0) emit_loadreg(LOREG|64,loh);
5497 //if(lol>=0) emit_loadreg(LOREG,lol);
5498 }
5499 if(opcode2[i]==0x1E) // DDIV
5500 {
5501 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5502 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5503 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5504 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5505 assert(d1h>=0);
5506 assert(d2h>=0);
5507 assert(d1l>=0);
5508 assert(d2l>=0);
4d646738 5509 save_regs(CALLER_SAVE_REGS);
57871462 5510 if(d1l!=0) emit_mov(d1l,0);
5511 if(d1h==0) emit_readword((int)&dynarec_local,1);
5512 else if(d1h>1) emit_mov(d1h,1);
5513 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5514 else if(d2l>2) emit_mov(d2l,2);
5515 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5516 else if(d2h>3) emit_mov(d2h,3);
5517 emit_call((int)&div64);
4d646738 5518 restore_regs(CALLER_SAVE_REGS);
57871462 5519 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5520 signed char hil=get_reg(i_regs->regmap,HIREG);
5521 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5522 signed char lol=get_reg(i_regs->regmap,LOREG);
5523 if(hih>=0) emit_loadreg(HIREG|64,hih);
5524 if(hil>=0) emit_loadreg(HIREG,hil);
5525 if(loh>=0) emit_loadreg(LOREG|64,loh);
5526 if(lol>=0) emit_loadreg(LOREG,lol);
5527 }
5528 if(opcode2[i]==0x1F) // DDIVU
5529 {
5530 //u_int hr,reglist=0;
5531 //for(hr=0;hr<HOST_REGS;hr++) {
5532 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
5533 //}
5534 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5535 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5536 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5537 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5538 assert(d1h>=0);
5539 assert(d2h>=0);
5540 assert(d1l>=0);
5541 assert(d2l>=0);
4d646738 5542 save_regs(CALLER_SAVE_REGS);
57871462 5543 if(d1l!=0) emit_mov(d1l,0);
5544 if(d1h==0) emit_readword((int)&dynarec_local,1);
5545 else if(d1h>1) emit_mov(d1h,1);
5546 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5547 else if(d2l>2) emit_mov(d2l,2);
5548 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5549 else if(d2h>3) emit_mov(d2h,3);
5550 emit_call((int)&divu64);
4d646738 5551 restore_regs(CALLER_SAVE_REGS);
57871462 5552 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5553 signed char hil=get_reg(i_regs->regmap,HIREG);
5554 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5555 signed char lol=get_reg(i_regs->regmap,LOREG);
5556 if(hih>=0) emit_loadreg(HIREG|64,hih);
5557 if(hil>=0) emit_loadreg(HIREG,hil);
5558 if(loh>=0) emit_loadreg(LOREG|64,loh);
5559 if(lol>=0) emit_loadreg(LOREG,lol);
5560 }
5561 }
4600ba03 5562#else
5563 assert(0);
5564#endif
57871462 5565 }
5566 else
5567 {
5568 // Multiply by zero is zero.
5569 // MIPS does not have a divide by zero exception.
5570 // The result is undefined, we return zero.
5571 signed char hr=get_reg(i_regs->regmap,HIREG);
5572 signed char lr=get_reg(i_regs->regmap,LOREG);
5573 if(hr>=0) emit_zeroreg(hr);
5574 if(lr>=0) emit_zeroreg(lr);
5575 }
5576}
5577#define multdiv_assemble multdiv_assemble_arm
5578
5579void do_preload_rhash(int r) {
5580 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
5581 // register. On ARM the hash can be done with a single instruction (below)
5582}
5583
5584void do_preload_rhtbl(int ht) {
5585 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
5586}
5587
5588void do_rhash(int rs,int rh) {
5589 emit_andimm(rs,0xf8,rh);
5590}
5591
5592void do_miniht_load(int ht,int rh) {
5593 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
5594 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
5595}
5596
5597void do_miniht_jump(int rs,int rh,int ht) {
5598 emit_cmp(rh,rs);
5599 emit_ldreq_indexed(ht,4,15);
5600 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5601 emit_mov(rs,7);
5602 emit_jmp(jump_vaddr_reg[7]);
5603 #else
5604 emit_jmp(jump_vaddr_reg[rs]);
5605 #endif
5606}
5607
5608void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 5609 #ifndef HAVE_ARMV7
57871462 5610 emit_movimm(return_address,rt); // PC into link register
5611 add_to_linker((int)out,return_address,1);
5612 emit_pcreladdr(temp);
5613 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5614 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5615 #else
5616 emit_movw(return_address&0x0000FFFF,rt);
5617 add_to_linker((int)out,return_address,1);
5618 emit_pcreladdr(temp);
5619 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5620 emit_movt(return_address&0xFFFF0000,rt);
5621 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5622 #endif
5623}
5624
5625// Sign-extend to 64 bits and write out upper half of a register
5626// This is useful where we have a 32-bit value in a register, and want to
5627// keep it in a 32-bit register, but can't guarantee that it won't be read
5628// as a 64-bit value later.
5629void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
5630{
24385cae 5631#ifndef FORCE32
57871462 5632 if(is32_pre==is32) return;
5633 int hr,reg;
5634 for(hr=0;hr<HOST_REGS;hr++) {
5635 if(hr!=EXCLUDE_REG) {
5636 //if(pre[hr]==entry[hr]) {
5637 if((reg=pre[hr])>=0) {
5638 if((dirty>>hr)&1) {
5639 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
5640 emit_sarimm(hr,31,HOST_TEMPREG);
5641 emit_storereg(reg|64,HOST_TEMPREG);
5642 }
5643 }
5644 }
5645 //}
5646 }
5647 }
24385cae 5648#endif
57871462 5649}
5650
5651void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
5652{
5653 //if(dirty_pre==dirty) return;
5654 int hr,reg,new_hr;
5655 for(hr=0;hr<HOST_REGS;hr++) {
5656 if(hr!=EXCLUDE_REG) {
5657 reg=pre[hr];
5658 if(((~u)>>(reg&63))&1) {
f776eb14 5659 if(reg>0) {
57871462 5660 if(((dirty_pre&~dirty)>>hr)&1) {
5661 if(reg>0&&reg<34) {
5662 emit_storereg(reg,hr);
5663 if( ((is32_pre&~uu)>>reg)&1 ) {
5664 emit_sarimm(hr,31,HOST_TEMPREG);
5665 emit_storereg(reg|64,HOST_TEMPREG);
5666 }
5667 }
5668 else if(reg>=64) {
5669 emit_storereg(reg,hr);
5670 }
5671 }
5672 }
57871462 5673 }
5674 }
5675 }
5676}
5677
5678
5679/* using strd could possibly help but you'd have to allocate registers in pairs
5680void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
5681{
5682 int hr;
5683 int wrote=-1;
5684 for(hr=HOST_REGS-1;hr>=0;hr--) {
5685 if(hr!=EXCLUDE_REG) {
5686 if(pre[hr]!=entry[hr]) {
5687 if(pre[hr]>=0) {
5688 if((dirty>>hr)&1) {
5689 if(get_reg(entry,pre[hr])<0) {
5690 if(pre[hr]<64) {
5691 if(!((u>>pre[hr])&1)) {
5692 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
5693 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5694 emit_sarimm(hr,31,hr+1);
5695 emit_strdreg(pre[hr],hr);
5696 }
5697 else
5698 emit_storereg(pre[hr],hr);
5699 }else{
5700 emit_storereg(pre[hr],hr);
5701 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5702 emit_sarimm(hr,31,hr);
5703 emit_storereg(pre[hr]|64,hr);
5704 }
5705 }
5706 }
5707 }else{
5708 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
5709 emit_storereg(pre[hr],hr);
5710 }
5711 }
5712 wrote=hr;
5713 }
5714 }
5715 }
5716 }
5717 }
5718 }
5719 for(hr=0;hr<HOST_REGS;hr++) {
5720 if(hr!=EXCLUDE_REG) {
5721 if(pre[hr]!=entry[hr]) {
5722 if(pre[hr]>=0) {
5723 int nr;
5724 if((nr=get_reg(entry,pre[hr]))>=0) {
5725 emit_mov(hr,nr);
5726 }
5727 }
5728 }
5729 }
5730 }
5731}
5732#define wb_invalidate wb_invalidate_arm
5733*/
5734
dd3a91a1 5735// Clearing the cache is rather slow on ARM Linux, so mark the areas
5736// that need to be cleared, and then only clear these areas once.
5737void do_clear_cache()
5738{
5739 int i,j;
5740 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
5741 {
5742 u_int bitmap=needs_clear_cache[i];
5743 if(bitmap) {
5744 u_int start,end;
5745 for(j=0;j<32;j++)
5746 {
5747 if(bitmap&(1<<j)) {
bdeade46 5748 start=(u_int)BASE_ADDR+i*131072+j*4096;
dd3a91a1 5749 end=start+4095;
5750 j++;
5751 while(j<32) {
5752 if(bitmap&(1<<j)) {
5753 end+=4096;
5754 j++;
5755 }else{
5756 __clear_cache((void *)start,(void *)end);
5757 break;
5758 }
5759 }
5760 }
5761 }
5762 needs_clear_cache[i]=0;
5763 }
5764 }
5765}
5766
57871462 5767// CPU-architecture-specific initialization
5768void arch_init() {
3d624f89 5769#ifndef DISABLE_COP1
57871462 5770 rounding_modes[0]=0x0<<22; // round
5771 rounding_modes[1]=0x3<<22; // trunc
5772 rounding_modes[2]=0x1<<22; // ceil
5773 rounding_modes[3]=0x2<<22; // floor
3d624f89 5774#endif
57871462 5775}
b9b61529 5776
5777// vim:shiftwidth=2:expandtab