drc: try to avoid full constant reload
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
054175e9 22#ifdef PCSX
23#include "../gte_arm.h"
24#include "../gte_neon.h"
25#include "pcnt.h"
26#endif
27
57871462 28extern int cycle_count;
29extern int last_count;
30extern int pcaddr;
31extern int pending_exception;
32extern int branch_target;
33extern uint64_t readmem_dword;
3d624f89 34#ifdef MUPEN64
57871462 35extern precomp_instr fake_pc;
3d624f89 36#endif
57871462 37extern void *dynarec_local;
38extern u_int memory_map[1048576];
39extern u_int mini_ht[32][2];
40extern u_int rounding_modes[4];
41
42void indirect_jump_indexed();
43void indirect_jump();
44void do_interrupt();
45void jump_vaddr_r0();
46void jump_vaddr_r1();
47void jump_vaddr_r2();
48void jump_vaddr_r3();
49void jump_vaddr_r4();
50void jump_vaddr_r5();
51void jump_vaddr_r6();
52void jump_vaddr_r7();
53void jump_vaddr_r8();
54void jump_vaddr_r9();
55void jump_vaddr_r10();
56void jump_vaddr_r12();
57
58const u_int jump_vaddr_reg[16] = {
59 (int)jump_vaddr_r0,
60 (int)jump_vaddr_r1,
61 (int)jump_vaddr_r2,
62 (int)jump_vaddr_r3,
63 (int)jump_vaddr_r4,
64 (int)jump_vaddr_r5,
65 (int)jump_vaddr_r6,
66 (int)jump_vaddr_r7,
67 (int)jump_vaddr_r8,
68 (int)jump_vaddr_r9,
69 (int)jump_vaddr_r10,
70 0,
71 (int)jump_vaddr_r12,
72 0,
73 0,
74 0};
75
0bbd1454 76void invalidate_addr_r0();
77void invalidate_addr_r1();
78void invalidate_addr_r2();
79void invalidate_addr_r3();
80void invalidate_addr_r4();
81void invalidate_addr_r5();
82void invalidate_addr_r6();
83void invalidate_addr_r7();
84void invalidate_addr_r8();
85void invalidate_addr_r9();
86void invalidate_addr_r10();
87void invalidate_addr_r12();
88
89const u_int invalidate_addr_reg[16] = {
90 (int)invalidate_addr_r0,
91 (int)invalidate_addr_r1,
92 (int)invalidate_addr_r2,
93 (int)invalidate_addr_r3,
94 (int)invalidate_addr_r4,
95 (int)invalidate_addr_r5,
96 (int)invalidate_addr_r6,
97 (int)invalidate_addr_r7,
98 (int)invalidate_addr_r8,
99 (int)invalidate_addr_r9,
100 (int)invalidate_addr_r10,
101 0,
102 (int)invalidate_addr_r12,
103 0,
104 0,
105 0};
106
57871462 107#include "fpu.h"
108
dd3a91a1 109unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
110
57871462 111/* Linker */
112
113void set_jump_target(int addr,u_int target)
114{
115 u_char *ptr=(u_char *)addr;
116 u_int *ptr2=(u_int *)ptr;
117 if(ptr[3]==0xe2) {
118 assert((target-(u_int)ptr2-8)<1024);
119 assert((addr&3)==0);
120 assert((target&3)==0);
121 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
122 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
123 }
124 else if(ptr[3]==0x72) {
125 // generated by emit_jno_unlikely
126 if((target-(u_int)ptr2-8)<1024) {
127 assert((addr&3)==0);
128 assert((target&3)==0);
129 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
130 }
131 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
132 assert((addr&3)==0);
133 assert((target&3)==0);
134 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
135 }
136 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
137 }
138 else {
139 assert((ptr[3]&0x0e)==0xa);
140 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
141 }
142}
143
144// This optionally copies the instruction from the target of the branch into
145// the space before the branch. Works, but the difference in speed is
146// usually insignificant.
147void set_jump_target_fillslot(int addr,u_int target,int copy)
148{
149 u_char *ptr=(u_char *)addr;
150 u_int *ptr2=(u_int *)ptr;
151 assert(!copy||ptr2[-1]==0xe28dd000);
152 if(ptr[3]==0xe2) {
153 assert(!copy);
154 assert((target-(u_int)ptr2-8)<4096);
155 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
156 }
157 else {
158 assert((ptr[3]&0x0e)==0xa);
159 u_int target_insn=*(u_int *)target;
160 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
161 copy=0;
162 }
163 if((target_insn&0x0c100000)==0x04100000) { // Load
164 copy=0;
165 }
166 if(target_insn&0x08000000) {
167 copy=0;
168 }
169 if(copy) {
170 ptr2[-1]=target_insn;
171 target+=4;
172 }
173 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
174 }
175}
176
177/* Literal pool */
178add_literal(int addr,int val)
179{
15776b68 180 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 181 literals[literalcount][0]=addr;
182 literals[literalcount][1]=val;
183 literalcount++;
184}
185
f76eeef9 186void *kill_pointer(void *stub)
57871462 187{
188 int *ptr=(int *)(stub+4);
189 assert((*ptr&0x0ff00000)==0x05900000);
190 u_int offset=*ptr&0xfff;
191 int **l_ptr=(void *)ptr+offset+8;
192 int *i_ptr=*l_ptr;
193 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 194 return i_ptr;
57871462 195}
196
f968d35d 197// find where external branch is liked to using addr of it's stub:
198// get address that insn one after stub loads (dyna_linker arg1),
199// treat it as a pointer to branch insn,
200// return addr where that branch jumps to
57871462 201int get_pointer(void *stub)
202{
203 //printf("get_pointer(%x)\n",(int)stub);
204 int *ptr=(int *)(stub+4);
f968d35d 205 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 206 u_int offset=*ptr&0xfff;
207 int **l_ptr=(void *)ptr+offset+8;
208 int *i_ptr=*l_ptr;
209 assert((*i_ptr&0x0f000000)==0x0a000000);
210 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
211}
212
213// Find the "clean" entry point from a "dirty" entry point
214// by skipping past the call to verify_code
215u_int get_clean_addr(int addr)
216{
217 int *ptr=(int *)addr;
218 #ifdef ARMv5_ONLY
219 ptr+=4;
220 #else
221 ptr+=6;
222 #endif
223 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
224 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
225 ptr++;
226 if((*ptr&0xFF000000)==0xea000000) {
227 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
228 }
229 return (u_int)ptr;
230}
231
232int verify_dirty(int addr)
233{
234 u_int *ptr=(u_int *)addr;
235 #ifdef ARMv5_ONLY
236 // get from literal pool
15776b68 237 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 238 u_int offset=*ptr&0xfff;
239 u_int *l_ptr=(void *)ptr+offset+8;
240 u_int source=l_ptr[0];
241 u_int copy=l_ptr[1];
242 u_int len=l_ptr[2];
243 ptr+=4;
244 #else
245 // ARMv7 movw/movt
246 assert((*ptr&0xFFF00000)==0xe3000000);
247 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
248 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
249 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
250 ptr+=6;
251 #endif
252 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
253 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 254#ifndef DISABLE_TLB
cfcba99a 255 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 256 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
257 unsigned int page=source>>12;
258 unsigned int map_value=memory_map[page];
259 if(map_value>=0x80000000) return 0;
260 while(page<((source+len-1)>>12)) {
261 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
262 }
263 source = source+(map_value<<2);
264 }
63cb0298 265#endif
57871462 266 //printf("verify_dirty: %x %x %x\n",source,copy,len);
267 return !memcmp((void *)source,(void *)copy,len);
268}
269
270// This doesn't necessarily find all clean entry points, just
271// guarantees that it's not dirty
272int isclean(int addr)
273{
274 #ifdef ARMv5_ONLY
275 int *ptr=((u_int *)addr)+4;
276 #else
277 int *ptr=((u_int *)addr)+6;
278 #endif
279 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
280 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
281 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
282 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
283 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
284 return 1;
285}
286
287void get_bounds(int addr,u_int *start,u_int *end)
288{
289 u_int *ptr=(u_int *)addr;
290 #ifdef ARMv5_ONLY
291 // get from literal pool
15776b68 292 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 293 u_int offset=*ptr&0xfff;
294 u_int *l_ptr=(void *)ptr+offset+8;
295 u_int source=l_ptr[0];
296 //u_int copy=l_ptr[1];
297 u_int len=l_ptr[2];
298 ptr+=4;
299 #else
300 // ARMv7 movw/movt
301 assert((*ptr&0xFFF00000)==0xe3000000);
302 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
303 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
304 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
305 ptr+=6;
306 #endif
307 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
308 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 309#ifndef DISABLE_TLB
cfcba99a 310 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 311 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
312 if(memory_map[source>>12]>=0x80000000) source = 0;
313 else source = source+(memory_map[source>>12]<<2);
314 }
63cb0298 315#endif
57871462 316 *start=source;
317 *end=source+len;
318}
319
320/* Register allocation */
321
322// Note: registers are allocated clean (unmodified state)
323// if you intend to modify the register, you must call dirty_reg().
324void alloc_reg(struct regstat *cur,int i,signed char reg)
325{
326 int r,hr;
327 int preferred_reg = (reg&7);
328 if(reg==CCREG) preferred_reg=HOST_CCREG;
329 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
330
331 // Don't allocate unused registers
332 if((cur->u>>reg)&1) return;
333
334 // see if it's already allocated
335 for(hr=0;hr<HOST_REGS;hr++)
336 {
337 if(cur->regmap[hr]==reg) return;
338 }
339
340 // Keep the same mapping if the register was already allocated in a loop
341 preferred_reg = loop_reg(i,reg,preferred_reg);
342
343 // Try to allocate the preferred register
344 if(cur->regmap[preferred_reg]==-1) {
345 cur->regmap[preferred_reg]=reg;
346 cur->dirty&=~(1<<preferred_reg);
347 cur->isconst&=~(1<<preferred_reg);
348 return;
349 }
350 r=cur->regmap[preferred_reg];
351 if(r<64&&((cur->u>>r)&1)) {
352 cur->regmap[preferred_reg]=reg;
353 cur->dirty&=~(1<<preferred_reg);
354 cur->isconst&=~(1<<preferred_reg);
355 return;
356 }
357 if(r>=64&&((cur->uu>>(r&63))&1)) {
358 cur->regmap[preferred_reg]=reg;
359 cur->dirty&=~(1<<preferred_reg);
360 cur->isconst&=~(1<<preferred_reg);
361 return;
362 }
363
364 // Clear any unneeded registers
365 // We try to keep the mapping consistent, if possible, because it
366 // makes branches easier (especially loops). So we try to allocate
367 // first (see above) before removing old mappings. If this is not
368 // possible then go ahead and clear out the registers that are no
369 // longer needed.
370 for(hr=0;hr<HOST_REGS;hr++)
371 {
372 r=cur->regmap[hr];
373 if(r>=0) {
374 if(r<64) {
375 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
376 }
377 else
378 {
379 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
380 }
381 }
382 }
383 // Try to allocate any available register, but prefer
384 // registers that have not been used recently.
385 if(i>0) {
386 for(hr=0;hr<HOST_REGS;hr++) {
387 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
388 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
389 cur->regmap[hr]=reg;
390 cur->dirty&=~(1<<hr);
391 cur->isconst&=~(1<<hr);
392 return;
393 }
394 }
395 }
396 }
397 // Try to allocate any available register
398 for(hr=0;hr<HOST_REGS;hr++) {
399 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
400 cur->regmap[hr]=reg;
401 cur->dirty&=~(1<<hr);
402 cur->isconst&=~(1<<hr);
403 return;
404 }
405 }
406
407 // Ok, now we have to evict someone
408 // Pick a register we hopefully won't need soon
409 u_char hsn[MAXREG+1];
410 memset(hsn,10,sizeof(hsn));
411 int j;
412 lsn(hsn,i,&preferred_reg);
413 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
414 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
415 if(i>0) {
416 // Don't evict the cycle count at entry points, otherwise the entry
417 // stub will have to write it.
418 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
419 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
420 for(j=10;j>=3;j--)
421 {
422 // Alloc preferred register if available
423 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
424 for(hr=0;hr<HOST_REGS;hr++) {
425 // Evict both parts of a 64-bit register
426 if((cur->regmap[hr]&63)==r) {
427 cur->regmap[hr]=-1;
428 cur->dirty&=~(1<<hr);
429 cur->isconst&=~(1<<hr);
430 }
431 }
432 cur->regmap[preferred_reg]=reg;
433 return;
434 }
435 for(r=1;r<=MAXREG;r++)
436 {
437 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
438 for(hr=0;hr<HOST_REGS;hr++) {
439 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
440 if(cur->regmap[hr]==r+64) {
441 cur->regmap[hr]=reg;
442 cur->dirty&=~(1<<hr);
443 cur->isconst&=~(1<<hr);
444 return;
445 }
446 }
447 }
448 for(hr=0;hr<HOST_REGS;hr++) {
449 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
450 if(cur->regmap[hr]==r) {
451 cur->regmap[hr]=reg;
452 cur->dirty&=~(1<<hr);
453 cur->isconst&=~(1<<hr);
454 return;
455 }
456 }
457 }
458 }
459 }
460 }
461 }
462 for(j=10;j>=0;j--)
463 {
464 for(r=1;r<=MAXREG;r++)
465 {
466 if(hsn[r]==j) {
467 for(hr=0;hr<HOST_REGS;hr++) {
468 if(cur->regmap[hr]==r+64) {
469 cur->regmap[hr]=reg;
470 cur->dirty&=~(1<<hr);
471 cur->isconst&=~(1<<hr);
472 return;
473 }
474 }
475 for(hr=0;hr<HOST_REGS;hr++) {
476 if(cur->regmap[hr]==r) {
477 cur->regmap[hr]=reg;
478 cur->dirty&=~(1<<hr);
479 cur->isconst&=~(1<<hr);
480 return;
481 }
482 }
483 }
484 }
485 }
486 printf("This shouldn't happen (alloc_reg)");exit(1);
487}
488
489void alloc_reg64(struct regstat *cur,int i,signed char reg)
490{
491 int preferred_reg = 8+(reg&1);
492 int r,hr;
493
494 // allocate the lower 32 bits
495 alloc_reg(cur,i,reg);
496
497 // Don't allocate unused registers
498 if((cur->uu>>reg)&1) return;
499
500 // see if the upper half is already allocated
501 for(hr=0;hr<HOST_REGS;hr++)
502 {
503 if(cur->regmap[hr]==reg+64) return;
504 }
505
506 // Keep the same mapping if the register was already allocated in a loop
507 preferred_reg = loop_reg(i,reg,preferred_reg);
508
509 // Try to allocate the preferred register
510 if(cur->regmap[preferred_reg]==-1) {
511 cur->regmap[preferred_reg]=reg|64;
512 cur->dirty&=~(1<<preferred_reg);
513 cur->isconst&=~(1<<preferred_reg);
514 return;
515 }
516 r=cur->regmap[preferred_reg];
517 if(r<64&&((cur->u>>r)&1)) {
518 cur->regmap[preferred_reg]=reg|64;
519 cur->dirty&=~(1<<preferred_reg);
520 cur->isconst&=~(1<<preferred_reg);
521 return;
522 }
523 if(r>=64&&((cur->uu>>(r&63))&1)) {
524 cur->regmap[preferred_reg]=reg|64;
525 cur->dirty&=~(1<<preferred_reg);
526 cur->isconst&=~(1<<preferred_reg);
527 return;
528 }
529
530 // Clear any unneeded registers
531 // We try to keep the mapping consistent, if possible, because it
532 // makes branches easier (especially loops). So we try to allocate
533 // first (see above) before removing old mappings. If this is not
534 // possible then go ahead and clear out the registers that are no
535 // longer needed.
536 for(hr=HOST_REGS-1;hr>=0;hr--)
537 {
538 r=cur->regmap[hr];
539 if(r>=0) {
540 if(r<64) {
541 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
542 }
543 else
544 {
545 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
546 }
547 }
548 }
549 // Try to allocate any available register, but prefer
550 // registers that have not been used recently.
551 if(i>0) {
552 for(hr=0;hr<HOST_REGS;hr++) {
553 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
554 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
555 cur->regmap[hr]=reg|64;
556 cur->dirty&=~(1<<hr);
557 cur->isconst&=~(1<<hr);
558 return;
559 }
560 }
561 }
562 }
563 // Try to allocate any available register
564 for(hr=0;hr<HOST_REGS;hr++) {
565 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
566 cur->regmap[hr]=reg|64;
567 cur->dirty&=~(1<<hr);
568 cur->isconst&=~(1<<hr);
569 return;
570 }
571 }
572
573 // Ok, now we have to evict someone
574 // Pick a register we hopefully won't need soon
575 u_char hsn[MAXREG+1];
576 memset(hsn,10,sizeof(hsn));
577 int j;
578 lsn(hsn,i,&preferred_reg);
579 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
580 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
581 if(i>0) {
582 // Don't evict the cycle count at entry points, otherwise the entry
583 // stub will have to write it.
584 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
585 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
586 for(j=10;j>=3;j--)
587 {
588 // Alloc preferred register if available
589 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
590 for(hr=0;hr<HOST_REGS;hr++) {
591 // Evict both parts of a 64-bit register
592 if((cur->regmap[hr]&63)==r) {
593 cur->regmap[hr]=-1;
594 cur->dirty&=~(1<<hr);
595 cur->isconst&=~(1<<hr);
596 }
597 }
598 cur->regmap[preferred_reg]=reg|64;
599 return;
600 }
601 for(r=1;r<=MAXREG;r++)
602 {
603 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
604 for(hr=0;hr<HOST_REGS;hr++) {
605 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
606 if(cur->regmap[hr]==r+64) {
607 cur->regmap[hr]=reg|64;
608 cur->dirty&=~(1<<hr);
609 cur->isconst&=~(1<<hr);
610 return;
611 }
612 }
613 }
614 for(hr=0;hr<HOST_REGS;hr++) {
615 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
616 if(cur->regmap[hr]==r) {
617 cur->regmap[hr]=reg|64;
618 cur->dirty&=~(1<<hr);
619 cur->isconst&=~(1<<hr);
620 return;
621 }
622 }
623 }
624 }
625 }
626 }
627 }
628 for(j=10;j>=0;j--)
629 {
630 for(r=1;r<=MAXREG;r++)
631 {
632 if(hsn[r]==j) {
633 for(hr=0;hr<HOST_REGS;hr++) {
634 if(cur->regmap[hr]==r+64) {
635 cur->regmap[hr]=reg|64;
636 cur->dirty&=~(1<<hr);
637 cur->isconst&=~(1<<hr);
638 return;
639 }
640 }
641 for(hr=0;hr<HOST_REGS;hr++) {
642 if(cur->regmap[hr]==r) {
643 cur->regmap[hr]=reg|64;
644 cur->dirty&=~(1<<hr);
645 cur->isconst&=~(1<<hr);
646 return;
647 }
648 }
649 }
650 }
651 }
652 printf("This shouldn't happen");exit(1);
653}
654
655// Allocate a temporary register. This is done without regard to
656// dirty status or whether the register we request is on the unneeded list
657// Note: This will only allocate one register, even if called multiple times
658void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
659{
660 int r,hr;
661 int preferred_reg = -1;
662
663 // see if it's already allocated
664 for(hr=0;hr<HOST_REGS;hr++)
665 {
666 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
667 }
668
669 // Try to allocate any available register
670 for(hr=HOST_REGS-1;hr>=0;hr--) {
671 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
672 cur->regmap[hr]=reg;
673 cur->dirty&=~(1<<hr);
674 cur->isconst&=~(1<<hr);
675 return;
676 }
677 }
678
679 // Find an unneeded register
680 for(hr=HOST_REGS-1;hr>=0;hr--)
681 {
682 r=cur->regmap[hr];
683 if(r>=0) {
684 if(r<64) {
685 if((cur->u>>r)&1) {
686 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
687 cur->regmap[hr]=reg;
688 cur->dirty&=~(1<<hr);
689 cur->isconst&=~(1<<hr);
690 return;
691 }
692 }
693 }
694 else
695 {
696 if((cur->uu>>(r&63))&1) {
697 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
698 cur->regmap[hr]=reg;
699 cur->dirty&=~(1<<hr);
700 cur->isconst&=~(1<<hr);
701 return;
702 }
703 }
704 }
705 }
706 }
707
708 // Ok, now we have to evict someone
709 // Pick a register we hopefully won't need soon
710 // TODO: we might want to follow unconditional jumps here
711 // TODO: get rid of dupe code and make this into a function
712 u_char hsn[MAXREG+1];
713 memset(hsn,10,sizeof(hsn));
714 int j;
715 lsn(hsn,i,&preferred_reg);
716 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
717 if(i>0) {
718 // Don't evict the cycle count at entry points, otherwise the entry
719 // stub will have to write it.
720 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
721 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
722 for(j=10;j>=3;j--)
723 {
724 for(r=1;r<=MAXREG;r++)
725 {
726 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
727 for(hr=0;hr<HOST_REGS;hr++) {
728 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
729 if(cur->regmap[hr]==r+64) {
730 cur->regmap[hr]=reg;
731 cur->dirty&=~(1<<hr);
732 cur->isconst&=~(1<<hr);
733 return;
734 }
735 }
736 }
737 for(hr=0;hr<HOST_REGS;hr++) {
738 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
739 if(cur->regmap[hr]==r) {
740 cur->regmap[hr]=reg;
741 cur->dirty&=~(1<<hr);
742 cur->isconst&=~(1<<hr);
743 return;
744 }
745 }
746 }
747 }
748 }
749 }
750 }
751 for(j=10;j>=0;j--)
752 {
753 for(r=1;r<=MAXREG;r++)
754 {
755 if(hsn[r]==j) {
756 for(hr=0;hr<HOST_REGS;hr++) {
757 if(cur->regmap[hr]==r+64) {
758 cur->regmap[hr]=reg;
759 cur->dirty&=~(1<<hr);
760 cur->isconst&=~(1<<hr);
761 return;
762 }
763 }
764 for(hr=0;hr<HOST_REGS;hr++) {
765 if(cur->regmap[hr]==r) {
766 cur->regmap[hr]=reg;
767 cur->dirty&=~(1<<hr);
768 cur->isconst&=~(1<<hr);
769 return;
770 }
771 }
772 }
773 }
774 }
775 printf("This shouldn't happen");exit(1);
776}
777// Allocate a specific ARM register.
778void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
779{
780 int n;
f776eb14 781 int dirty=0;
57871462 782
783 // see if it's already allocated (and dealloc it)
784 for(n=0;n<HOST_REGS;n++)
785 {
f776eb14 786 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
787 dirty=(cur->dirty>>n)&1;
788 cur->regmap[n]=-1;
789 }
57871462 790 }
791
792 cur->regmap[hr]=reg;
793 cur->dirty&=~(1<<hr);
f776eb14 794 cur->dirty|=dirty<<hr;
57871462 795 cur->isconst&=~(1<<hr);
796}
797
798// Alloc cycle count into dedicated register
799alloc_cc(struct regstat *cur,int i)
800{
801 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
802}
803
804/* Special alloc */
805
806
807/* Assembler */
808
809char regname[16][4] = {
810 "r0",
811 "r1",
812 "r2",
813 "r3",
814 "r4",
815 "r5",
816 "r6",
817 "r7",
818 "r8",
819 "r9",
820 "r10",
821 "fp",
822 "r12",
823 "sp",
824 "lr",
825 "pc"};
826
827void output_byte(u_char byte)
828{
829 *(out++)=byte;
830}
831void output_modrm(u_char mod,u_char rm,u_char ext)
832{
833 assert(mod<4);
834 assert(rm<8);
835 assert(ext<8);
836 u_char byte=(mod<<6)|(ext<<3)|rm;
837 *(out++)=byte;
838}
839void output_sib(u_char scale,u_char index,u_char base)
840{
841 assert(scale<4);
842 assert(index<8);
843 assert(base<8);
844 u_char byte=(scale<<6)|(index<<3)|base;
845 *(out++)=byte;
846}
847void output_w32(u_int word)
848{
849 *((u_int *)out)=word;
850 out+=4;
851}
852u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
853{
854 assert(rd<16);
855 assert(rn<16);
856 assert(rm<16);
857 return((rn<<16)|(rd<<12)|rm);
858}
859u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
860{
861 assert(rd<16);
862 assert(rn<16);
863 assert(imm<256);
864 assert((shift&1)==0);
865 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
866}
867u_int genimm(u_int imm,u_int *encoded)
868{
c2e3bd42 869 *encoded=0;
870 if(imm==0) return 1;
57871462 871 int i=32;
872 while(i>0)
873 {
874 if(imm<256) {
875 *encoded=((i&30)<<7)|imm;
876 return 1;
877 }
878 imm=(imm>>2)|(imm<<30);i-=2;
879 }
880 return 0;
881}
cfbd3c6e 882void genimm_checked(u_int imm,u_int *encoded)
883{
884 u_int ret=genimm(imm,encoded);
885 assert(ret);
886}
57871462 887u_int genjmp(u_int addr)
888{
889 int offset=addr-(int)out-8;
e80343e2 890 if(offset<-33554432||offset>=33554432) {
891 if (addr>2) {
892 printf("genjmp: out of range: %08x\n", offset);
893 exit(1);
894 }
895 return 0;
896 }
57871462 897 return ((u_int)offset>>2)&0xffffff;
898}
899
900void emit_mov(int rs,int rt)
901{
902 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
903 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
904}
905
906void emit_movs(int rs,int rt)
907{
908 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
909 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
910}
911
912void emit_add(int rs1,int rs2,int rt)
913{
914 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
915 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
916}
917
918void emit_adds(int rs1,int rs2,int rt)
919{
920 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
921 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
922}
923
924void emit_adcs(int rs1,int rs2,int rt)
925{
926 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
927 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
928}
929
930void emit_sbc(int rs1,int rs2,int rt)
931{
932 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
933 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
934}
935
936void emit_sbcs(int rs1,int rs2,int rt)
937{
938 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
939 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
940}
941
942void emit_neg(int rs, int rt)
943{
944 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
945 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
946}
947
948void emit_negs(int rs, int rt)
949{
950 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
951 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
952}
953
954void emit_sub(int rs1,int rs2,int rt)
955{
956 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
957 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
958}
959
960void emit_subs(int rs1,int rs2,int rt)
961{
962 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
963 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
964}
965
966void emit_zeroreg(int rt)
967{
968 assem_debug("mov %s,#0\n",regname[rt]);
969 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
970}
971
790ee18e 972void emit_loadlp(u_int imm,u_int rt)
973{
974 add_literal((int)out,imm);
975 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
976 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
977}
978void emit_movw(u_int imm,u_int rt)
979{
980 assert(imm<65536);
981 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
982 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
983}
984void emit_movt(u_int imm,u_int rt)
985{
986 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
987 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
988}
989void emit_movimm(u_int imm,u_int rt)
990{
991 u_int armval;
992 if(genimm(imm,&armval)) {
993 assem_debug("mov %s,#%d\n",regname[rt],imm);
994 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
995 }else if(genimm(~imm,&armval)) {
996 assem_debug("mvn %s,#%d\n",regname[rt],imm);
997 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
998 }else if(imm<65536) {
999 #ifdef ARMv5_ONLY
1000 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1001 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1002 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1003 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1004 #else
1005 emit_movw(imm,rt);
1006 #endif
1007 }else{
1008 #ifdef ARMv5_ONLY
1009 emit_loadlp(imm,rt);
1010 #else
1011 emit_movw(imm&0x0000FFFF,rt);
1012 emit_movt(imm&0xFFFF0000,rt);
1013 #endif
1014 }
1015}
1016void emit_pcreladdr(u_int rt)
1017{
1018 assem_debug("add %s,pc,#?\n",regname[rt]);
1019 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1020}
1021
57871462 1022void emit_loadreg(int r, int hr)
1023{
3d624f89 1024#ifdef FORCE32
1025 if(r&64) {
1026 printf("64bit load in 32bit mode!\n");
7f2607ea 1027 assert(0);
1028 return;
3d624f89 1029 }
1030#endif
57871462 1031 if((r&63)==0)
1032 emit_zeroreg(hr);
1033 else {
3d624f89 1034 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1035 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1036 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1037 if(r==CCREG) addr=(int)&cycle_count;
1038 if(r==CSREG) addr=(int)&Status;
1039 if(r==FSREG) addr=(int)&FCR31;
1040 if(r==INVCP) addr=(int)&invc_ptr;
1041 u_int offset = addr-(u_int)&dynarec_local;
1042 assert(offset<4096);
1043 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1044 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1045 }
1046}
1047void emit_storereg(int r, int hr)
1048{
3d624f89 1049#ifdef FORCE32
1050 if(r&64) {
1051 printf("64bit store in 32bit mode!\n");
7f2607ea 1052 assert(0);
1053 return;
3d624f89 1054 }
1055#endif
1056 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1057 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1058 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1059 if(r==CCREG) addr=(int)&cycle_count;
1060 if(r==FSREG) addr=(int)&FCR31;
1061 u_int offset = addr-(u_int)&dynarec_local;
1062 assert(offset<4096);
1063 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1064 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1065}
1066
1067void emit_test(int rs, int rt)
1068{
1069 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1070 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1071}
1072
1073void emit_testimm(int rs,int imm)
1074{
1075 u_int armval;
5a05d80c 1076 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1077 genimm_checked(imm,&armval);
57871462 1078 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1079}
1080
b9b61529 1081void emit_testeqimm(int rs,int imm)
1082{
1083 u_int armval;
1084 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1085 genimm_checked(imm,&armval);
b9b61529 1086 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1087}
1088
57871462 1089void emit_not(int rs,int rt)
1090{
1091 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1092 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1093}
1094
b9b61529 1095void emit_mvnmi(int rs,int rt)
1096{
1097 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1098 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1099}
1100
57871462 1101void emit_and(u_int rs1,u_int rs2,u_int rt)
1102{
1103 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1104 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1105}
1106
1107void emit_or(u_int rs1,u_int rs2,u_int rt)
1108{
1109 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1110 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1111}
1112void emit_or_and_set_flags(int rs1,int rs2,int rt)
1113{
1114 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1115 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1116}
1117
f70d384d 1118void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1119{
1120 assert(rs<16);
1121 assert(rt<16);
1122 assert(imm<32);
1123 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1124 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1125}
1126
576bbd8f 1127void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1128{
1129 assert(rs<16);
1130 assert(rt<16);
1131 assert(imm<32);
1132 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1133 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1134}
1135
57871462 1136void emit_xor(u_int rs1,u_int rs2,u_int rt)
1137{
1138 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1139 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1140}
1141
57871462 1142void emit_addimm(u_int rs,int imm,u_int rt)
1143{
1144 assert(rs<16);
1145 assert(rt<16);
1146 if(imm!=0) {
57871462 1147 u_int armval;
1148 if(genimm(imm,&armval)) {
1149 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1150 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1151 }else if(genimm(-imm,&armval)) {
8a0a8423 1152 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1153 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1154 }else if(imm<0) {
ffb0b9e0 1155 assert(imm>-65536);
57871462 1156 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1157 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1158 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1159 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1160 }else{
ffb0b9e0 1161 assert(imm<65536);
57871462 1162 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1163 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1164 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1165 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1166 }
1167 }
1168 else if(rs!=rt) emit_mov(rs,rt);
1169}
1170
1171void emit_addimm_and_set_flags(int imm,int rt)
1172{
1173 assert(imm>-65536&&imm<65536);
1174 u_int armval;
1175 if(genimm(imm,&armval)) {
1176 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1177 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1178 }else if(genimm(-imm,&armval)) {
1179 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1180 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1181 }else if(imm<0) {
1182 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1183 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1184 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1185 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1186 }else{
1187 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1188 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1189 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1190 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1191 }
1192}
1193void emit_addimm_no_flags(u_int imm,u_int rt)
1194{
1195 emit_addimm(rt,imm,rt);
1196}
1197
1198void emit_addnop(u_int r)
1199{
1200 assert(r<16);
1201 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1202 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1203}
1204
1205void emit_adcimm(u_int rs,int imm,u_int rt)
1206{
1207 u_int armval;
cfbd3c6e 1208 genimm_checked(imm,&armval);
57871462 1209 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1210 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1211}
1212/*void emit_sbcimm(int imm,u_int rt)
1213{
1214 u_int armval;
cfbd3c6e 1215 genimm_checked(imm,&armval);
57871462 1216 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1217 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1218}*/
1219void emit_sbbimm(int imm,u_int rt)
1220{
1221 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1222 assert(rt<8);
1223 if(imm<128&&imm>=-128) {
1224 output_byte(0x83);
1225 output_modrm(3,rt,3);
1226 output_byte(imm);
1227 }
1228 else
1229 {
1230 output_byte(0x81);
1231 output_modrm(3,rt,3);
1232 output_w32(imm);
1233 }
1234}
1235void emit_rscimm(int rs,int imm,u_int rt)
1236{
1237 assert(0);
1238 u_int armval;
cfbd3c6e 1239 genimm_checked(imm,&armval);
57871462 1240 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1241 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1242}
1243
1244void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1245{
1246 // TODO: if(genimm(imm,&armval)) ...
1247 // else
1248 emit_movimm(imm,HOST_TEMPREG);
1249 emit_adds(HOST_TEMPREG,rsl,rtl);
1250 emit_adcimm(rsh,0,rth);
1251}
1252
1253void emit_sbb(int rs1,int rs2)
1254{
1255 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1256 output_byte(0x19);
1257 output_modrm(3,rs1,rs2);
1258}
1259
1260void emit_andimm(int rs,int imm,int rt)
1261{
1262 u_int armval;
790ee18e 1263 if(imm==0) {
1264 emit_zeroreg(rt);
1265 }else if(genimm(imm,&armval)) {
57871462 1266 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1267 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1268 }else if(genimm(~imm,&armval)) {
1269 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1270 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1271 }else if(imm==65535) {
1272 #ifdef ARMv5_ONLY
1273 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1274 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1275 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1276 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1277 #else
1278 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1279 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1280 #endif
1281 }else{
1282 assert(imm>0&&imm<65535);
1283 #ifdef ARMv5_ONLY
1284 assem_debug("mov r14,#%d\n",imm&0xFF00);
1285 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1286 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1287 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1288 #else
1289 emit_movw(imm,HOST_TEMPREG);
1290 #endif
1291 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1292 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1293 }
1294}
1295
1296void emit_orimm(int rs,int imm,int rt)
1297{
1298 u_int armval;
790ee18e 1299 if(imm==0) {
1300 if(rs!=rt) emit_mov(rs,rt);
1301 }else if(genimm(imm,&armval)) {
57871462 1302 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1303 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1304 }else{
1305 assert(imm>0&&imm<65536);
1306 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1307 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1308 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1309 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1310 }
1311}
1312
1313void emit_xorimm(int rs,int imm,int rt)
1314{
57871462 1315 u_int armval;
790ee18e 1316 if(imm==0) {
1317 if(rs!=rt) emit_mov(rs,rt);
1318 }else if(genimm(imm,&armval)) {
57871462 1319 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1320 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1321 }else{
514ed0d9 1322 assert(imm>0&&imm<65536);
57871462 1323 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1324 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1325 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1326 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1327 }
1328}
1329
1330void emit_shlimm(int rs,u_int imm,int rt)
1331{
1332 assert(imm>0);
1333 assert(imm<32);
1334 //if(imm==1) ...
1335 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1336 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1337}
1338
c6c3b1b3 1339void emit_lsls_imm(int rs,int imm,int rt)
1340{
1341 assert(imm>0);
1342 assert(imm<32);
1343 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1344 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1345}
1346
57871462 1347void emit_shrimm(int rs,u_int imm,int rt)
1348{
1349 assert(imm>0);
1350 assert(imm<32);
1351 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1352 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1353}
1354
1355void emit_sarimm(int rs,u_int imm,int rt)
1356{
1357 assert(imm>0);
1358 assert(imm<32);
1359 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1360 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1361}
1362
1363void emit_rorimm(int rs,u_int imm,int rt)
1364{
1365 assert(imm>0);
1366 assert(imm<32);
1367 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1368 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1369}
1370
1371void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1372{
1373 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1374 assert(imm>0);
1375 assert(imm<32);
1376 //if(imm==1) ...
1377 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1378 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1379 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1380 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1381}
1382
1383void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1384{
1385 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1386 assert(imm>0);
1387 assert(imm<32);
1388 //if(imm==1) ...
1389 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1390 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1391 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1392 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1393}
1394
b9b61529 1395void emit_signextend16(int rs,int rt)
1396{
1397 #ifdef ARMv5_ONLY
1398 emit_shlimm(rs,16,rt);
1399 emit_sarimm(rt,16,rt);
1400 #else
1401 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1402 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1403 #endif
1404}
1405
c6c3b1b3 1406void emit_signextend8(int rs,int rt)
1407{
1408 #ifdef ARMv5_ONLY
1409 emit_shlimm(rs,24,rt);
1410 emit_sarimm(rt,24,rt);
1411 #else
1412 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1413 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1414 #endif
1415}
1416
57871462 1417void emit_shl(u_int rs,u_int shift,u_int rt)
1418{
1419 assert(rs<16);
1420 assert(rt<16);
1421 assert(shift<16);
1422 //if(imm==1) ...
1423 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1424 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1425}
1426void emit_shr(u_int rs,u_int shift,u_int rt)
1427{
1428 assert(rs<16);
1429 assert(rt<16);
1430 assert(shift<16);
1431 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1432 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1433}
1434void emit_sar(u_int rs,u_int shift,u_int rt)
1435{
1436 assert(rs<16);
1437 assert(rt<16);
1438 assert(shift<16);
1439 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1440 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1441}
1442void emit_shlcl(int r)
1443{
1444 assem_debug("shl %%%s,%%cl\n",regname[r]);
1445 assert(0);
1446}
1447void emit_shrcl(int r)
1448{
1449 assem_debug("shr %%%s,%%cl\n",regname[r]);
1450 assert(0);
1451}
1452void emit_sarcl(int r)
1453{
1454 assem_debug("sar %%%s,%%cl\n",regname[r]);
1455 assert(0);
1456}
1457
1458void emit_shldcl(int r1,int r2)
1459{
1460 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1461 assert(0);
1462}
1463void emit_shrdcl(int r1,int r2)
1464{
1465 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1466 assert(0);
1467}
1468void emit_orrshl(u_int rs,u_int shift,u_int rt)
1469{
1470 assert(rs<16);
1471 assert(rt<16);
1472 assert(shift<16);
1473 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1474 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1475}
1476void emit_orrshr(u_int rs,u_int shift,u_int rt)
1477{
1478 assert(rs<16);
1479 assert(rt<16);
1480 assert(shift<16);
1481 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1482 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1483}
1484
1485void emit_cmpimm(int rs,int imm)
1486{
1487 u_int armval;
1488 if(genimm(imm,&armval)) {
5a05d80c 1489 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1490 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1491 }else if(genimm(-imm,&armval)) {
5a05d80c 1492 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1493 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1494 }else if(imm>0) {
1495 assert(imm<65536);
1496 #ifdef ARMv5_ONLY
1497 emit_movimm(imm,HOST_TEMPREG);
1498 #else
1499 emit_movw(imm,HOST_TEMPREG);
1500 #endif
1501 assem_debug("cmp %s,r14\n",regname[rs]);
1502 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1503 }else{
1504 assert(imm>-65536);
1505 #ifdef ARMv5_ONLY
1506 emit_movimm(-imm,HOST_TEMPREG);
1507 #else
1508 emit_movw(-imm,HOST_TEMPREG);
1509 #endif
1510 assem_debug("cmn %s,r14\n",regname[rs]);
1511 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1512 }
1513}
1514
1515void emit_cmovne(u_int *addr,int rt)
1516{
1517 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1518 assert(0);
1519}
1520void emit_cmovl(u_int *addr,int rt)
1521{
1522 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1523 assert(0);
1524}
1525void emit_cmovs(u_int *addr,int rt)
1526{
1527 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1528 assert(0);
1529}
1530void emit_cmovne_imm(int imm,int rt)
1531{
1532 assem_debug("movne %s,#%d\n",regname[rt],imm);
1533 u_int armval;
cfbd3c6e 1534 genimm_checked(imm,&armval);
57871462 1535 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1536}
1537void emit_cmovl_imm(int imm,int rt)
1538{
1539 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1540 u_int armval;
cfbd3c6e 1541 genimm_checked(imm,&armval);
57871462 1542 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1543}
1544void emit_cmovb_imm(int imm,int rt)
1545{
1546 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1547 u_int armval;
cfbd3c6e 1548 genimm_checked(imm,&armval);
57871462 1549 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1550}
1551void emit_cmovs_imm(int imm,int rt)
1552{
1553 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1554 u_int armval;
cfbd3c6e 1555 genimm_checked(imm,&armval);
57871462 1556 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1557}
1558void emit_cmove_reg(int rs,int rt)
1559{
1560 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1561 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1562}
1563void emit_cmovne_reg(int rs,int rt)
1564{
1565 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1566 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1567}
1568void emit_cmovl_reg(int rs,int rt)
1569{
1570 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1571 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1572}
1573void emit_cmovs_reg(int rs,int rt)
1574{
1575 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1576 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1577}
1578
1579void emit_slti32(int rs,int imm,int rt)
1580{
1581 if(rs!=rt) emit_zeroreg(rt);
1582 emit_cmpimm(rs,imm);
1583 if(rs==rt) emit_movimm(0,rt);
1584 emit_cmovl_imm(1,rt);
1585}
1586void emit_sltiu32(int rs,int imm,int rt)
1587{
1588 if(rs!=rt) emit_zeroreg(rt);
1589 emit_cmpimm(rs,imm);
1590 if(rs==rt) emit_movimm(0,rt);
1591 emit_cmovb_imm(1,rt);
1592}
1593void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1594{
1595 assert(rsh!=rt);
1596 emit_slti32(rsl,imm,rt);
1597 if(imm>=0)
1598 {
1599 emit_test(rsh,rsh);
1600 emit_cmovne_imm(0,rt);
1601 emit_cmovs_imm(1,rt);
1602 }
1603 else
1604 {
1605 emit_cmpimm(rsh,-1);
1606 emit_cmovne_imm(0,rt);
1607 emit_cmovl_imm(1,rt);
1608 }
1609}
1610void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1611{
1612 assert(rsh!=rt);
1613 emit_sltiu32(rsl,imm,rt);
1614 if(imm>=0)
1615 {
1616 emit_test(rsh,rsh);
1617 emit_cmovne_imm(0,rt);
1618 }
1619 else
1620 {
1621 emit_cmpimm(rsh,-1);
1622 emit_cmovne_imm(1,rt);
1623 }
1624}
1625
1626void emit_cmp(int rs,int rt)
1627{
1628 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1629 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1630}
1631void emit_set_gz32(int rs, int rt)
1632{
1633 //assem_debug("set_gz32\n");
1634 emit_cmpimm(rs,1);
1635 emit_movimm(1,rt);
1636 emit_cmovl_imm(0,rt);
1637}
1638void emit_set_nz32(int rs, int rt)
1639{
1640 //assem_debug("set_nz32\n");
1641 if(rs!=rt) emit_movs(rs,rt);
1642 else emit_test(rs,rs);
1643 emit_cmovne_imm(1,rt);
1644}
1645void emit_set_gz64_32(int rsh, int rsl, int rt)
1646{
1647 //assem_debug("set_gz64\n");
1648 emit_set_gz32(rsl,rt);
1649 emit_test(rsh,rsh);
1650 emit_cmovne_imm(1,rt);
1651 emit_cmovs_imm(0,rt);
1652}
1653void emit_set_nz64_32(int rsh, int rsl, int rt)
1654{
1655 //assem_debug("set_nz64\n");
1656 emit_or_and_set_flags(rsh,rsl,rt);
1657 emit_cmovne_imm(1,rt);
1658}
1659void emit_set_if_less32(int rs1, int rs2, int rt)
1660{
1661 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1662 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1663 emit_cmp(rs1,rs2);
1664 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1665 emit_cmovl_imm(1,rt);
1666}
1667void emit_set_if_carry32(int rs1, int rs2, int rt)
1668{
1669 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1670 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1671 emit_cmp(rs1,rs2);
1672 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1673 emit_cmovb_imm(1,rt);
1674}
1675void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1676{
1677 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1678 assert(u1!=rt);
1679 assert(u2!=rt);
1680 emit_cmp(l1,l2);
1681 emit_movimm(0,rt);
1682 emit_sbcs(u1,u2,HOST_TEMPREG);
1683 emit_cmovl_imm(1,rt);
1684}
1685void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1686{
1687 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1688 assert(u1!=rt);
1689 assert(u2!=rt);
1690 emit_cmp(l1,l2);
1691 emit_movimm(0,rt);
1692 emit_sbcs(u1,u2,HOST_TEMPREG);
1693 emit_cmovb_imm(1,rt);
1694}
1695
1696void emit_call(int a)
1697{
1698 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1699 u_int offset=genjmp(a);
1700 output_w32(0xeb000000|offset);
1701}
1702void emit_jmp(int a)
1703{
1704 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1705 u_int offset=genjmp(a);
1706 output_w32(0xea000000|offset);
1707}
1708void emit_jne(int a)
1709{
1710 assem_debug("bne %x\n",a);
1711 u_int offset=genjmp(a);
1712 output_w32(0x1a000000|offset);
1713}
1714void emit_jeq(int a)
1715{
1716 assem_debug("beq %x\n",a);
1717 u_int offset=genjmp(a);
1718 output_w32(0x0a000000|offset);
1719}
1720void emit_js(int a)
1721{
1722 assem_debug("bmi %x\n",a);
1723 u_int offset=genjmp(a);
1724 output_w32(0x4a000000|offset);
1725}
1726void emit_jns(int a)
1727{
1728 assem_debug("bpl %x\n",a);
1729 u_int offset=genjmp(a);
1730 output_w32(0x5a000000|offset);
1731}
1732void emit_jl(int a)
1733{
1734 assem_debug("blt %x\n",a);
1735 u_int offset=genjmp(a);
1736 output_w32(0xba000000|offset);
1737}
1738void emit_jge(int a)
1739{
1740 assem_debug("bge %x\n",a);
1741 u_int offset=genjmp(a);
1742 output_w32(0xaa000000|offset);
1743}
1744void emit_jno(int a)
1745{
1746 assem_debug("bvc %x\n",a);
1747 u_int offset=genjmp(a);
1748 output_w32(0x7a000000|offset);
1749}
1750void emit_jc(int a)
1751{
1752 assem_debug("bcs %x\n",a);
1753 u_int offset=genjmp(a);
1754 output_w32(0x2a000000|offset);
1755}
1756void emit_jcc(int a)
1757{
1758 assem_debug("bcc %x\n",a);
1759 u_int offset=genjmp(a);
1760 output_w32(0x3a000000|offset);
1761}
1762
1763void emit_pushimm(int imm)
1764{
1765 assem_debug("push $%x\n",imm);
1766 assert(0);
1767}
1768void emit_pusha()
1769{
1770 assem_debug("pusha\n");
1771 assert(0);
1772}
1773void emit_popa()
1774{
1775 assem_debug("popa\n");
1776 assert(0);
1777}
1778void emit_pushreg(u_int r)
1779{
1780 assem_debug("push %%%s\n",regname[r]);
1781 assert(0);
1782}
1783void emit_popreg(u_int r)
1784{
1785 assem_debug("pop %%%s\n",regname[r]);
1786 assert(0);
1787}
1788void emit_callreg(u_int r)
1789{
c6c3b1b3 1790 assert(r<15);
1791 assem_debug("blx %s\n",regname[r]);
1792 output_w32(0xe12fff30|r);
57871462 1793}
1794void emit_jmpreg(u_int r)
1795{
1796 assem_debug("mov pc,%s\n",regname[r]);
1797 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1798}
1799
1800void emit_readword_indexed(int offset, int rs, int rt)
1801{
1802 assert(offset>-4096&&offset<4096);
1803 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1804 if(offset>=0) {
1805 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1806 }else{
1807 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1808 }
1809}
1810void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1811{
1812 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1813 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1814}
c6c3b1b3 1815void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1816{
1817 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1818 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1819}
1820void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1821{
1822 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1823 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1824}
1825void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1826{
1827 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1828 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1829}
1830void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1831{
1832 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1833 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1834}
1835void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1836{
1837 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1838 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1839}
57871462 1840void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1841{
1842 if(map<0) emit_readword_indexed(addr, rs, rt);
1843 else {
1844 assert(addr==0);
1845 emit_readword_dualindexedx4(rs, map, rt);
1846 }
1847}
1848void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1849{
1850 if(map<0) {
1851 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1852 emit_readword_indexed(addr+4, rs, rl);
1853 }else{
1854 assert(rh!=rs);
1855 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1856 emit_addimm(map,1,map);
1857 emit_readword_indexed_tlb(addr, rs, map, rl);
1858 }
1859}
1860void emit_movsbl_indexed(int offset, int rs, int rt)
1861{
1862 assert(offset>-256&&offset<256);
1863 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1864 if(offset>=0) {
1865 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1866 }else{
1867 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1868 }
1869}
1870void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1871{
1872 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1873 else {
1874 if(addr==0) {
1875 emit_shlimm(map,2,map);
1876 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1877 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1878 }else{
1879 assert(addr>-256&&addr<256);
1880 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1881 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1882 emit_movsbl_indexed(addr, rt, rt);
1883 }
1884 }
1885}
1886void emit_movswl_indexed(int offset, int rs, int rt)
1887{
1888 assert(offset>-256&&offset<256);
1889 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1890 if(offset>=0) {
1891 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1892 }else{
1893 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1894 }
1895}
1896void emit_movzbl_indexed(int offset, int rs, int rt)
1897{
1898 assert(offset>-4096&&offset<4096);
1899 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1900 if(offset>=0) {
1901 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1902 }else{
1903 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1904 }
1905}
1906void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1907{
1908 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1909 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1910}
1911void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1912{
1913 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1914 else {
1915 if(addr==0) {
1916 emit_movzbl_dualindexedx4(rs, map, rt);
1917 }else{
1918 emit_addimm(rs,addr,rt);
1919 emit_movzbl_dualindexedx4(rt, map, rt);
1920 }
1921 }
1922}
1923void emit_movzwl_indexed(int offset, int rs, int rt)
1924{
1925 assert(offset>-256&&offset<256);
1926 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1927 if(offset>=0) {
1928 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1929 }else{
1930 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1931 }
1932}
054175e9 1933static void emit_ldrd(int offset, int rs, int rt)
1934{
1935 assert(offset>-256&&offset<256);
1936 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1937 if(offset>=0) {
1938 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1939 }else{
1940 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1941 }
1942}
57871462 1943void emit_readword(int addr, int rt)
1944{
1945 u_int offset = addr-(u_int)&dynarec_local;
1946 assert(offset<4096);
1947 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1948 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1949}
1950void emit_movsbl(int addr, int rt)
1951{
1952 u_int offset = addr-(u_int)&dynarec_local;
1953 assert(offset<256);
1954 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1955 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1956}
1957void emit_movswl(int addr, int rt)
1958{
1959 u_int offset = addr-(u_int)&dynarec_local;
1960 assert(offset<256);
1961 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1962 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1963}
1964void emit_movzbl(int addr, int rt)
1965{
1966 u_int offset = addr-(u_int)&dynarec_local;
1967 assert(offset<4096);
1968 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1969 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1970}
1971void emit_movzwl(int addr, int rt)
1972{
1973 u_int offset = addr-(u_int)&dynarec_local;
1974 assert(offset<256);
1975 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1976 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1977}
1978void emit_movzwl_reg(int rs, int rt)
1979{
1980 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1981 assert(0);
1982}
1983
1984void emit_xchg(int rs, int rt)
1985{
1986 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1987 assert(0);
1988}
1989void emit_writeword_indexed(int rt, int offset, int rs)
1990{
1991 assert(offset>-4096&&offset<4096);
1992 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1993 if(offset>=0) {
1994 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1995 }else{
1996 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1997 }
1998}
1999void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
2000{
2001 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2002 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2003}
2004void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2005{
2006 if(map<0) emit_writeword_indexed(rt, addr, rs);
2007 else {
2008 assert(addr==0);
2009 emit_writeword_dualindexedx4(rt, rs, map);
2010 }
2011}
2012void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2013{
2014 if(map<0) {
2015 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2016 emit_writeword_indexed(rl, addr+4, rs);
2017 }else{
2018 assert(rh>=0);
2019 if(temp!=rs) emit_addimm(map,1,temp);
2020 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2021 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2022 else {
2023 emit_addimm(rs,4,rs);
2024 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2025 }
2026 }
2027}
2028void emit_writehword_indexed(int rt, int offset, int rs)
2029{
2030 assert(offset>-256&&offset<256);
2031 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2032 if(offset>=0) {
2033 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2034 }else{
2035 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2036 }
2037}
2038void emit_writebyte_indexed(int rt, int offset, int rs)
2039{
2040 assert(offset>-4096&&offset<4096);
2041 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2042 if(offset>=0) {
2043 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2044 }else{
2045 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2046 }
2047}
2048void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2049{
2050 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2051 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2052}
2053void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2054{
2055 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2056 else {
2057 if(addr==0) {
2058 emit_writebyte_dualindexedx4(rt, rs, map);
2059 }else{
2060 emit_addimm(rs,addr,temp);
2061 emit_writebyte_dualindexedx4(rt, temp, map);
2062 }
2063 }
2064}
b96d3df7 2065void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2066{
2067 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2068 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2069}
2070void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2071{
2072 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2073 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2074}
2075void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2076{
2077 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2078 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2079}
57871462 2080void emit_writeword(int rt, int addr)
2081{
2082 u_int offset = addr-(u_int)&dynarec_local;
2083 assert(offset<4096);
2084 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2085 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2086}
2087void emit_writehword(int rt, int addr)
2088{
2089 u_int offset = addr-(u_int)&dynarec_local;
2090 assert(offset<256);
2091 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2092 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2093}
2094void emit_writebyte(int rt, int addr)
2095{
2096 u_int offset = addr-(u_int)&dynarec_local;
2097 assert(offset<4096);
74426039 2098 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2099 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2100}
2101void emit_writeword_imm(int imm, int addr)
2102{
2103 assem_debug("movl $%x,%x\n",imm,addr);
2104 assert(0);
2105}
2106void emit_writebyte_imm(int imm, int addr)
2107{
2108 assem_debug("movb $%x,%x\n",imm,addr);
2109 assert(0);
2110}
2111
2112void emit_mul(int rs)
2113{
2114 assem_debug("mul %%%s\n",regname[rs]);
2115 assert(0);
2116}
2117void emit_imul(int rs)
2118{
2119 assem_debug("imul %%%s\n",regname[rs]);
2120 assert(0);
2121}
2122void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2123{
2124 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2125 assert(rs1<16);
2126 assert(rs2<16);
2127 assert(hi<16);
2128 assert(lo<16);
2129 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2130}
2131void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2132{
2133 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2134 assert(rs1<16);
2135 assert(rs2<16);
2136 assert(hi<16);
2137 assert(lo<16);
2138 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2139}
2140
2141void emit_div(int rs)
2142{
2143 assem_debug("div %%%s\n",regname[rs]);
2144 assert(0);
2145}
2146void emit_idiv(int rs)
2147{
2148 assem_debug("idiv %%%s\n",regname[rs]);
2149 assert(0);
2150}
2151void emit_cdq()
2152{
2153 assem_debug("cdq\n");
2154 assert(0);
2155}
2156
2157void emit_clz(int rs,int rt)
2158{
2159 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2160 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2161}
2162
2163void emit_subcs(int rs1,int rs2,int rt)
2164{
2165 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2166 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2167}
2168
2169void emit_shrcc_imm(int rs,u_int imm,int rt)
2170{
2171 assert(imm>0);
2172 assert(imm<32);
2173 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2174 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2175}
2176
b1be1eee 2177void emit_shrne_imm(int rs,u_int imm,int rt)
2178{
2179 assert(imm>0);
2180 assert(imm<32);
2181 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2182 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2183}
2184
57871462 2185void emit_negmi(int rs, int rt)
2186{
2187 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2188 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2189}
2190
2191void emit_negsmi(int rs, int rt)
2192{
2193 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2194 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2195}
2196
2197void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2198{
2199 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2200 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2201}
2202
2203void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2204{
2205 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2206 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2207}
2208
2209void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2210{
2211 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2212 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2213}
2214
2215void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2216{
2217 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2218 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2219}
2220
2221void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2222{
2223 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2224 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2225}
2226
2227void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2228{
2229 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2230 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2231}
2232
2233void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2234{
2235 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2236 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2237}
2238
2239void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2240{
2241 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2242 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2243}
2244
2245void emit_teq(int rs, int rt)
2246{
2247 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2248 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2249}
2250
2251void emit_rsbimm(int rs, int imm, int rt)
2252{
2253 u_int armval;
cfbd3c6e 2254 genimm_checked(imm,&armval);
57871462 2255 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2256 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2257}
2258
2259// Load 2 immediates optimizing for small code size
2260void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2261{
2262 emit_movimm(imm1,rt1);
2263 u_int armval;
2264 if(genimm(imm2-imm1,&armval)) {
2265 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2266 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2267 }else if(genimm(imm1-imm2,&armval)) {
2268 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2269 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2270 }
2271 else emit_movimm(imm2,rt2);
2272}
2273
2274// Conditionally select one of two immediates, optimizing for small code size
2275// This will only be called if HAVE_CMOV_IMM is defined
2276void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2277{
2278 u_int armval;
2279 if(genimm(imm2-imm1,&armval)) {
2280 emit_movimm(imm1,rt);
2281 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2282 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2283 }else if(genimm(imm1-imm2,&armval)) {
2284 emit_movimm(imm1,rt);
2285 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2286 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2287 }
2288 else {
2289 #ifdef ARMv5_ONLY
2290 emit_movimm(imm1,rt);
2291 add_literal((int)out,imm2);
2292 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2293 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2294 #else
2295 emit_movw(imm1&0x0000FFFF,rt);
2296 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2297 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2298 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2299 }
2300 emit_movt(imm1&0xFFFF0000,rt);
2301 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2302 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2303 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2304 }
2305 #endif
2306 }
2307}
2308
2309// special case for checking invalid_code
2310void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2311{
2312 assert(0);
2313}
2314
2315// special case for checking invalid_code
2316void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2317{
2318 assert(imm<128&&imm>=0);
2319 assert(r>=0&&r<16);
2320 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2321 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2322 emit_cmpimm(HOST_TEMPREG,imm);
2323}
2324
2325// special case for tlb mapping
2326void emit_addsr12(int rs1,int rs2,int rt)
2327{
2328 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2329 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2330}
2331
0bbd1454 2332void emit_callne(int a)
2333{
2334 assem_debug("blne %x\n",a);
2335 u_int offset=genjmp(a);
2336 output_w32(0x1b000000|offset);
2337}
2338
57871462 2339// Used to preload hash table entries
2340void emit_prefetch(void *addr)
2341{
2342 assem_debug("prefetch %x\n",(int)addr);
2343 output_byte(0x0F);
2344 output_byte(0x18);
2345 output_modrm(0,5,1);
2346 output_w32((int)addr);
2347}
2348void emit_prefetchreg(int r)
2349{
2350 assem_debug("pld %s\n",regname[r]);
2351 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2352}
2353
2354// Special case for mini_ht
2355void emit_ldreq_indexed(int rs, u_int offset, int rt)
2356{
2357 assert(offset<4096);
2358 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2359 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2360}
2361
2362void emit_flds(int r,int sr)
2363{
2364 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2365 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2366}
2367
2368void emit_vldr(int r,int vr)
2369{
2370 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2371 output_w32(0xed900b00|(vr<<12)|(r<<16));
2372}
2373
2374void emit_fsts(int sr,int r)
2375{
2376 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2377 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2378}
2379
2380void emit_vstr(int vr,int r)
2381{
2382 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2383 output_w32(0xed800b00|(vr<<12)|(r<<16));
2384}
2385
2386void emit_ftosizs(int s,int d)
2387{
2388 assem_debug("ftosizs s%d,s%d\n",d,s);
2389 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2390}
2391
2392void emit_ftosizd(int s,int d)
2393{
2394 assem_debug("ftosizd s%d,d%d\n",d,s);
2395 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2396}
2397
2398void emit_fsitos(int s,int d)
2399{
2400 assem_debug("fsitos s%d,s%d\n",d,s);
2401 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2402}
2403
2404void emit_fsitod(int s,int d)
2405{
2406 assem_debug("fsitod d%d,s%d\n",d,s);
2407 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2408}
2409
2410void emit_fcvtds(int s,int d)
2411{
2412 assem_debug("fcvtds d%d,s%d\n",d,s);
2413 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2414}
2415
2416void emit_fcvtsd(int s,int d)
2417{
2418 assem_debug("fcvtsd s%d,d%d\n",d,s);
2419 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2420}
2421
2422void emit_fsqrts(int s,int d)
2423{
2424 assem_debug("fsqrts d%d,s%d\n",d,s);
2425 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2426}
2427
2428void emit_fsqrtd(int s,int d)
2429{
2430 assem_debug("fsqrtd s%d,d%d\n",d,s);
2431 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2432}
2433
2434void emit_fabss(int s,int d)
2435{
2436 assem_debug("fabss d%d,s%d\n",d,s);
2437 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2438}
2439
2440void emit_fabsd(int s,int d)
2441{
2442 assem_debug("fabsd s%d,d%d\n",d,s);
2443 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2444}
2445
2446void emit_fnegs(int s,int d)
2447{
2448 assem_debug("fnegs d%d,s%d\n",d,s);
2449 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2450}
2451
2452void emit_fnegd(int s,int d)
2453{
2454 assem_debug("fnegd s%d,d%d\n",d,s);
2455 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2456}
2457
2458void emit_fadds(int s1,int s2,int d)
2459{
2460 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2461 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2462}
2463
2464void emit_faddd(int s1,int s2,int d)
2465{
2466 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2467 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2468}
2469
2470void emit_fsubs(int s1,int s2,int d)
2471{
2472 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2473 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2474}
2475
2476void emit_fsubd(int s1,int s2,int d)
2477{
2478 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2479 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2480}
2481
2482void emit_fmuls(int s1,int s2,int d)
2483{
2484 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2485 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2486}
2487
2488void emit_fmuld(int s1,int s2,int d)
2489{
2490 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2491 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2492}
2493
2494void emit_fdivs(int s1,int s2,int d)
2495{
2496 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2497 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2498}
2499
2500void emit_fdivd(int s1,int s2,int d)
2501{
2502 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2503 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2504}
2505
2506void emit_fcmps(int x,int y)
2507{
2508 assem_debug("fcmps s14, s15\n");
2509 output_w32(0xeeb47a67);
2510}
2511
2512void emit_fcmpd(int x,int y)
2513{
2514 assem_debug("fcmpd d6, d7\n");
2515 output_w32(0xeeb46b47);
2516}
2517
2518void emit_fmstat()
2519{
2520 assem_debug("fmstat\n");
2521 output_w32(0xeef1fa10);
2522}
2523
2524void emit_bicne_imm(int rs,int imm,int rt)
2525{
2526 u_int armval;
cfbd3c6e 2527 genimm_checked(imm,&armval);
57871462 2528 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2529 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2530}
2531
2532void emit_biccs_imm(int rs,int imm,int rt)
2533{
2534 u_int armval;
cfbd3c6e 2535 genimm_checked(imm,&armval);
57871462 2536 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2537 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2538}
2539
2540void emit_bicvc_imm(int rs,int imm,int rt)
2541{
2542 u_int armval;
cfbd3c6e 2543 genimm_checked(imm,&armval);
57871462 2544 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2545 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2546}
2547
2548void emit_bichi_imm(int rs,int imm,int rt)
2549{
2550 u_int armval;
cfbd3c6e 2551 genimm_checked(imm,&armval);
57871462 2552 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2553 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2554}
2555
2556void emit_orrvs_imm(int rs,int imm,int rt)
2557{
2558 u_int armval;
cfbd3c6e 2559 genimm_checked(imm,&armval);
57871462 2560 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2561 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2562}
2563
b9b61529 2564void emit_orrne_imm(int rs,int imm,int rt)
2565{
2566 u_int armval;
cfbd3c6e 2567 genimm_checked(imm,&armval);
b9b61529 2568 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2569 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2570}
2571
2572void emit_andne_imm(int rs,int imm,int rt)
2573{
2574 u_int armval;
cfbd3c6e 2575 genimm_checked(imm,&armval);
b9b61529 2576 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2577 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2578}
2579
57871462 2580void emit_jno_unlikely(int a)
2581{
2582 //emit_jno(a);
2583 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2584 output_w32(0x72800000|rd_rn_rm(15,15,0));
2585}
2586
054175e9 2587static void save_regs_all(u_int reglist)
57871462 2588{
054175e9 2589 int i;
57871462 2590 if(!reglist) return;
2591 assem_debug("stmia fp,{");
054175e9 2592 for(i=0;i<16;i++)
2593 if(reglist&(1<<i))
2594 assem_debug("r%d,",i);
57871462 2595 assem_debug("}\n");
2596 output_w32(0xe88b0000|reglist);
2597}
054175e9 2598static void restore_regs_all(u_int reglist)
57871462 2599{
054175e9 2600 int i;
57871462 2601 if(!reglist) return;
2602 assem_debug("ldmia fp,{");
054175e9 2603 for(i=0;i<16;i++)
2604 if(reglist&(1<<i))
2605 assem_debug("r%d,",i);
57871462 2606 assem_debug("}\n");
2607 output_w32(0xe89b0000|reglist);
2608}
054175e9 2609// Save registers before function call
2610static void save_regs(u_int reglist)
2611{
2612 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2613 save_regs_all(reglist);
2614}
2615// Restore registers after function call
2616static void restore_regs(u_int reglist)
2617{
2618 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2619 restore_regs_all(reglist);
2620}
57871462 2621
2622// Write back consts using r14 so we don't disturb the other registers
2623void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2624{
2625 int hr;
2626 for(hr=0;hr<HOST_REGS;hr++) {
2627 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2628 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2629 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2630 int value=constmap[i][hr];
2631 if(value==0) {
2632 emit_zeroreg(HOST_TEMPREG);
2633 }
2634 else {
2635 emit_movimm(value,HOST_TEMPREG);
2636 }
2637 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2638#ifndef FORCE32
57871462 2639 if((i_is32>>i_regmap[hr])&1) {
2640 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2641 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2642 }
24385cae 2643#endif
57871462 2644 }
2645 }
2646 }
2647 }
2648}
2649
2650/* Stubs/epilogue */
2651
2652void literal_pool(int n)
2653{
2654 if(!literalcount) return;
2655 if(n) {
2656 if((int)out-literals[0][0]<4096-n) return;
2657 }
2658 u_int *ptr;
2659 int i;
2660 for(i=0;i<literalcount;i++)
2661 {
77750690 2662 u_int l_addr=(u_int)out;
2663 int j;
2664 for(j=0;j<i;j++) {
2665 if(literals[j][1]==literals[i][1]) {
2666 //printf("dup %08x\n",literals[i][1]);
2667 l_addr=literals[j][0];
2668 break;
2669 }
2670 }
57871462 2671 ptr=(u_int *)literals[i][0];
77750690 2672 u_int offset=l_addr-(u_int)ptr-8;
57871462 2673 assert(offset<4096);
2674 assert(!(offset&3));
2675 *ptr|=offset;
77750690 2676 if(l_addr==(u_int)out) {
2677 literals[i][0]=l_addr; // remember for dupes
2678 output_w32(literals[i][1]);
2679 }
57871462 2680 }
2681 literalcount=0;
2682}
2683
2684void literal_pool_jumpover(int n)
2685{
2686 if(!literalcount) return;
2687 if(n) {
2688 if((int)out-literals[0][0]<4096-n) return;
2689 }
2690 int jaddr=(int)out;
2691 emit_jmp(0);
2692 literal_pool(0);
2693 set_jump_target(jaddr,(int)out);
2694}
2695
2696emit_extjump2(int addr, int target, int linker)
2697{
2698 u_char *ptr=(u_char *)addr;
2699 assert((ptr[3]&0x0e)==0xa);
2700 emit_loadlp(target,0);
2701 emit_loadlp(addr,1);
24385cae 2702 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2703 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2704//DEBUG >
2705#ifdef DEBUG_CYCLE_COUNT
2706 emit_readword((int)&last_count,ECX);
2707 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2708 emit_readword((int)&next_interupt,ECX);
2709 emit_writeword(HOST_CCREG,(int)&Count);
2710 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2711 emit_writeword(ECX,(int)&last_count);
2712#endif
2713//DEBUG <
2714 emit_jmp(linker);
2715}
2716
2717emit_extjump(int addr, int target)
2718{
2719 emit_extjump2(addr, target, (int)dyna_linker);
2720}
2721emit_extjump_ds(int addr, int target)
2722{
2723 emit_extjump2(addr, target, (int)dyna_linker_ds);
2724}
2725
13e35c04 2726// put rt_val into rt, potentially making use of rs with value rs_val
2727static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2728{
8575a877 2729 u_int armval;
2730 int diff;
2731 if(genimm(rt_val,&armval)) {
2732 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2733 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2734 return;
2735 }
2736 if(genimm(~rt_val,&armval)) {
2737 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2738 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2739 return;
2740 }
2741 diff=rt_val-rs_val;
2742 if(genimm(diff,&armval)) {
2743 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2744 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2745 return;
2746 }else if(genimm(-diff,&armval)) {
2747 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2748 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2749 return;
2750 }
2751 emit_movimm(rt_val,rt);
2752}
2753
2754// return 1 if above function can do it's job cheaply
2755static int is_similar_value(u_int v1,u_int v2)
2756{
13e35c04 2757 u_int xs;
8575a877 2758 int diff;
2759 if(v1==v2) return 1;
2760 diff=v2-v1;
2761 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2762 ;
8575a877 2763 if(xs<0x100) return 1;
2764 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2765 ;
2766 if(xs<0x100) return 1;
2767 return 0;
13e35c04 2768}
cbbab9cd 2769
b96d3df7 2770// trashes r2
2771static void pass_args(int a0, int a1)
2772{
2773 if(a0==1&&a1==0) {
2774 // must swap
2775 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2776 }
2777 else if(a0!=0&&a1==0) {
2778 emit_mov(a1,1);
2779 if (a0>=0) emit_mov(a0,0);
2780 }
2781 else {
2782 if(a0>=0&&a0!=0) emit_mov(a0,0);
2783 if(a1>=0&&a1!=1) emit_mov(a1,1);
2784 }
2785}
2786
b1be1eee 2787static void mov_loadtype_adj(int type,int rs,int rt)
2788{
2789 switch(type) {
2790 case LOADB_STUB: emit_signextend8(rs,rt); break;
2791 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2792 case LOADH_STUB: emit_signextend16(rs,rt); break;
2793 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2794 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2795 default: assert(0);
2796 }
2797}
2798
2799#ifdef PCSX
2800#include "pcsxmem.h"
2801#include "pcsxmem_inline.c"
2802#endif
2803
57871462 2804do_readstub(int n)
2805{
2806 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2807 literal_pool(256);
2808 set_jump_target(stubs[n][1],(int)out);
2809 int type=stubs[n][0];
2810 int i=stubs[n][3];
2811 int rs=stubs[n][4];
2812 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2813 u_int reglist=stubs[n][7];
2814 signed char *i_regmap=i_regs->regmap;
2815 int addr=get_reg(i_regmap,AGEN1+(i&1));
2816 int rth,rt;
2817 int ds;
b9b61529 2818 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2819 rth=get_reg(i_regmap,FTEMP|64);
2820 rt=get_reg(i_regmap,FTEMP);
2821 }else{
2822 rth=get_reg(i_regmap,rt1[i]|64);
2823 rt=get_reg(i_regmap,rt1[i]);
2824 }
2825 assert(rs>=0);
c6c3b1b3 2826#ifdef PCSX
2827 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2828 reglist|=(1<<rs);
2829 for(r=0;r<=12;r++) {
2830 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2831 temp=r; break;
2832 }
2833 }
2834 if(rt>=0)
2835 reglist&=~(1<<rt);
2836 if(temp==-1) {
2837 save_regs(reglist);
2838 regs_saved=1;
2839 temp=(rs==0)?2:0;
2840 }
2841 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2842 temp2=1;
2843 emit_readword((int)&mem_rtab,temp);
2844 emit_shrimm(rs,12,temp2);
2845 emit_readword_dualindexedx4(temp,temp2,temp2);
2846 emit_lsls_imm(temp2,1,temp2);
2847 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2848 switch(type) {
2849 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2850 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2851 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2852 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2853 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2854 }
2855 }
2856 if(regs_saved) {
2857 restore_jump=(int)out;
2858 emit_jcc(0); // jump to reg restore
2859 }
2860 else
2861 emit_jcc(stubs[n][2]); // return address
2862
2863 if(!regs_saved)
2864 save_regs(reglist);
2865 int handler=0;
2866 if(type==LOADB_STUB||type==LOADBU_STUB)
2867 handler=(int)jump_handler_read8;
2868 if(type==LOADH_STUB||type==LOADHU_STUB)
2869 handler=(int)jump_handler_read16;
2870 if(type==LOADW_STUB)
2871 handler=(int)jump_handler_read32;
2872 assert(handler!=0);
b96d3df7 2873 pass_args(rs,temp2);
c6c3b1b3 2874 int cc=get_reg(i_regmap,CCREG);
2875 if(cc<0)
2876 emit_loadreg(CCREG,2);
2573466a 2877 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2878 emit_call(handler);
2879 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2880 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2881 }
2882 if(restore_jump)
2883 set_jump_target(restore_jump,(int)out);
2884 restore_regs(reglist);
2885 emit_jmp(stubs[n][2]); // return address
2886#else // !PCSX
57871462 2887 if(addr<0) addr=rt;
535d208a 2888 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2889 assert(addr>=0);
2890 int ftable=0;
2891 if(type==LOADB_STUB||type==LOADBU_STUB)
2892 ftable=(int)readmemb;
2893 if(type==LOADH_STUB||type==LOADHU_STUB)
2894 ftable=(int)readmemh;
2895 if(type==LOADW_STUB)
2896 ftable=(int)readmem;
24385cae 2897#ifndef FORCE32
57871462 2898 if(type==LOADD_STUB)
2899 ftable=(int)readmemd;
24385cae 2900#endif
2901 assert(ftable!=0);
57871462 2902 emit_writeword(rs,(int)&address);
2903 //emit_pusha();
2904 save_regs(reglist);
97a238a6 2905#ifndef PCSX
57871462 2906 ds=i_regs!=&regs[i];
2907 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2908 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2909 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2910 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2911 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2912#endif
57871462 2913 emit_shrimm(rs,16,1);
2914 int cc=get_reg(i_regmap,CCREG);
2915 if(cc<0) {
2916 emit_loadreg(CCREG,2);
2917 }
2918 emit_movimm(ftable,0);
2919 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2920#ifndef PCSX
57871462 2921 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2922#endif
57871462 2923 //emit_readword((int)&last_count,temp);
2924 //emit_add(cc,temp,cc);
2925 //emit_writeword(cc,(int)&Count);
2926 //emit_mov(15,14);
2927 emit_call((int)&indirect_jump_indexed);
2928 //emit_callreg(rs);
2929 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2930#ifndef PCSX
57871462 2931 // We really shouldn't need to update the count here,
2932 // but not doing so causes random crashes...
2933 emit_readword((int)&Count,HOST_TEMPREG);
2934 emit_readword((int)&next_interupt,2);
2935 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2936 emit_writeword(2,(int)&last_count);
2937 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2938 if(cc<0) {
2939 emit_storereg(CCREG,HOST_TEMPREG);
2940 }
f51dc36c 2941#endif
57871462 2942 //emit_popa();
2943 restore_regs(reglist);
2944 //if((cc=get_reg(regmap,CCREG))>=0) {
2945 // emit_loadreg(CCREG,cc);
2946 //}
f18c0f46 2947 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2948 assert(rt>=0);
2949 if(type==LOADB_STUB)
2950 emit_movsbl((int)&readmem_dword,rt);
2951 if(type==LOADBU_STUB)
2952 emit_movzbl((int)&readmem_dword,rt);
2953 if(type==LOADH_STUB)
2954 emit_movswl((int)&readmem_dword,rt);
2955 if(type==LOADHU_STUB)
2956 emit_movzwl((int)&readmem_dword,rt);
2957 if(type==LOADW_STUB)
2958 emit_readword((int)&readmem_dword,rt);
2959 if(type==LOADD_STUB) {
2960 emit_readword((int)&readmem_dword,rt);
2961 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2962 }
57871462 2963 }
2964 emit_jmp(stubs[n][2]); // return address
c6c3b1b3 2965#endif // !PCSX
57871462 2966}
2967
c6c3b1b3 2968#ifdef PCSX
2969// return memhandler, or get directly accessable address and return 0
2970u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2971{
2972 u_int l1,l2=0;
2973 l1=((u_int *)table)[addr>>12];
2974 if((l1&(1<<31))==0) {
2975 u_int v=l1<<1;
2976 *addr_host=v+addr;
2977 return 0;
2978 }
2979 else {
2980 l1<<=1;
2981 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2982 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2983 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2984 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2985 else
2986 l2=((u_int *)l1)[(addr&0xfff)/4];
2987 if((l2&(1<<31))==0) {
2988 u_int v=l2<<1;
2989 *addr_host=v+(addr&0xfff);
2990 return 0;
2991 }
2992 return l2<<1;
2993 }
2994}
2995#endif
2996
57871462 2997inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2998{
2999 int rs=get_reg(regmap,target);
3000 int rth=get_reg(regmap,target|64);
3001 int rt=get_reg(regmap,target);
535d208a 3002 if(rs<0) rs=get_reg(regmap,-1);
57871462 3003 assert(rs>=0);
c6c3b1b3 3004#ifdef PCSX
b1be1eee 3005 u_int handler,host_addr=0,is_dynamic,far_call=0;
3006 int cc=get_reg(regmap,CCREG);
3007 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
3008 return;
c6c3b1b3 3009 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
3010 if (handler==0) {
3011 if(rt<0)
3012 return;
13e35c04 3013 if(addr!=host_addr)
3014 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 3015 switch(type) {
3016 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
3017 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
3018 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
3019 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
3020 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
3021 default: assert(0);
3022 }
3023 return;
3024 }
b1be1eee 3025 is_dynamic=pcsxmem_is_handler_dynamic(addr);
3026 if(is_dynamic) {
3027 if(type==LOADB_STUB||type==LOADBU_STUB)
3028 handler=(int)jump_handler_read8;
3029 if(type==LOADH_STUB||type==LOADHU_STUB)
3030 handler=(int)jump_handler_read16;
3031 if(type==LOADW_STUB)
3032 handler=(int)jump_handler_read32;
3033 }
c6c3b1b3 3034
3035 // call a memhandler
3036 if(rt>=0)
3037 reglist&=~(1<<rt);
3038 save_regs(reglist);
3039 if(target==0)
3040 emit_movimm(addr,0);
3041 else if(rs!=0)
3042 emit_mov(rs,0);
c6c3b1b3 3043 int offset=(int)handler-(int)out-8;
3044 if(offset<-33554432||offset>=33554432) {
3045 // unreachable memhandler, a plugin func perhaps
b1be1eee 3046 emit_movimm(handler,12);
3047 far_call=1;
3048 }
3049 if(cc<0)
3050 emit_loadreg(CCREG,2);
3051 if(is_dynamic) {
3052 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
3053 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 3054 }
b1be1eee 3055 else {
3056 emit_readword((int)&last_count,3);
3057 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
3058 emit_add(2,3,2);
3059 emit_writeword(2,(int)&Count);
3060 }
3061
3062 if(far_call)
3063 emit_callreg(12);
c6c3b1b3 3064 else
3065 emit_call(handler);
b1be1eee 3066
c6c3b1b3 3067 if(rt>=0) {
3068 switch(type) {
3069 case LOADB_STUB: emit_signextend8(0,rt); break;
3070 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
3071 case LOADH_STUB: emit_signextend16(0,rt); break;
3072 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
3073 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
3074 default: assert(0);
3075 }
3076 }
3077 restore_regs(reglist);
3078#else // if !PCSX
57871462 3079 int ftable=0;
3080 if(type==LOADB_STUB||type==LOADBU_STUB)
3081 ftable=(int)readmemb;
3082 if(type==LOADH_STUB||type==LOADHU_STUB)
3083 ftable=(int)readmemh;
3084 if(type==LOADW_STUB)
3085 ftable=(int)readmem;
24385cae 3086#ifndef FORCE32
57871462 3087 if(type==LOADD_STUB)
3088 ftable=(int)readmemd;
24385cae 3089#endif
3090 assert(ftable!=0);
fd99c415 3091 if(target==0)
3092 emit_movimm(addr,rs);
57871462 3093 emit_writeword(rs,(int)&address);
3094 //emit_pusha();
3095 save_regs(reglist);
0c1fe38b 3096#ifndef PCSX
3097 if((signed int)addr>=(signed int)0xC0000000) {
3098 // Theoretically we can have a pagefault here, if the TLB has never
3099 // been enabled and the address is outside the range 80000000..BFFFFFFF
3100 // Write out the registers so the pagefault can be handled. This is
3101 // a very rare case and likely represents a bug.
3102 int ds=regmap!=regs[i].regmap;
3103 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3104 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3105 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3106 }
3107#endif
57871462 3108 //emit_shrimm(rs,16,1);
3109 int cc=get_reg(regmap,CCREG);
3110 if(cc<0) {
3111 emit_loadreg(CCREG,2);
3112 }
3113 //emit_movimm(ftable,0);
3114 emit_movimm(((u_int *)ftable)[addr>>16],0);
3115 //emit_readword((int)&last_count,12);
2573466a 3116 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3117#ifndef PCSX
57871462 3118 if((signed int)addr>=(signed int)0xC0000000) {
3119 // Pagefault address
3120 int ds=regmap!=regs[i].regmap;
3121 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3122 }
f51dc36c 3123#endif
57871462 3124 //emit_add(12,2,2);
3125 //emit_writeword(2,(int)&Count);
3126 //emit_call(((u_int *)ftable)[addr>>16]);
3127 emit_call((int)&indirect_jump);
f51dc36c 3128#ifndef PCSX
57871462 3129 // We really shouldn't need to update the count here,
3130 // but not doing so causes random crashes...
3131 emit_readword((int)&Count,HOST_TEMPREG);
3132 emit_readword((int)&next_interupt,2);
2573466a 3133 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3134 emit_writeword(2,(int)&last_count);
3135 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3136 if(cc<0) {
3137 emit_storereg(CCREG,HOST_TEMPREG);
3138 }
f51dc36c 3139#endif
57871462 3140 //emit_popa();
3141 restore_regs(reglist);
fd99c415 3142 if(rt>=0) {
3143 if(type==LOADB_STUB)
3144 emit_movsbl((int)&readmem_dword,rt);
3145 if(type==LOADBU_STUB)
3146 emit_movzbl((int)&readmem_dword,rt);
3147 if(type==LOADH_STUB)
3148 emit_movswl((int)&readmem_dword,rt);
3149 if(type==LOADHU_STUB)
3150 emit_movzwl((int)&readmem_dword,rt);
3151 if(type==LOADW_STUB)
3152 emit_readword((int)&readmem_dword,rt);
3153 if(type==LOADD_STUB) {
3154 emit_readword((int)&readmem_dword,rt);
3155 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
3156 }
57871462 3157 }
c6c3b1b3 3158#endif // !PCSX
57871462 3159}
3160
3161do_writestub(int n)
3162{
3163 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
3164 literal_pool(256);
3165 set_jump_target(stubs[n][1],(int)out);
3166 int type=stubs[n][0];
3167 int i=stubs[n][3];
3168 int rs=stubs[n][4];
3169 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3170 u_int reglist=stubs[n][7];
3171 signed char *i_regmap=i_regs->regmap;
3172 int addr=get_reg(i_regmap,AGEN1+(i&1));
3173 int rth,rt,r;
3174 int ds;
b9b61529 3175 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 3176 rth=get_reg(i_regmap,FTEMP|64);
3177 rt=get_reg(i_regmap,r=FTEMP);
3178 }else{
3179 rth=get_reg(i_regmap,rs2[i]|64);
3180 rt=get_reg(i_regmap,r=rs2[i]);
3181 }
3182 assert(rs>=0);
3183 assert(rt>=0);
b96d3df7 3184#ifdef PCSX
3185 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
3186 int reglist2=reglist|(1<<rs)|(1<<rt);
3187 for(rtmp=0;rtmp<=12;rtmp++) {
3188 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
3189 temp=rtmp; break;
3190 }
3191 }
3192 if(temp==-1) {
3193 save_regs(reglist);
3194 regs_saved=1;
3195 for(rtmp=0;rtmp<=3;rtmp++)
3196 if(rtmp!=rs&&rtmp!=rt)
3197 {temp=rtmp;break;}
3198 }
3199 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
3200 temp2=3;
3201 emit_readword((int)&mem_wtab,temp);
3202 emit_shrimm(rs,12,temp2);
3203 emit_readword_dualindexedx4(temp,temp2,temp2);
3204 emit_lsls_imm(temp2,1,temp2);
3205 switch(type) {
3206 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
3207 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
3208 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
3209 default: assert(0);
3210 }
3211 if(regs_saved) {
3212 restore_jump=(int)out;
3213 emit_jcc(0); // jump to reg restore
3214 }
3215 else
3216 emit_jcc(stubs[n][2]); // return address (invcode check)
3217
3218 if(!regs_saved)
3219 save_regs(reglist);
3220 int handler=0;
3221 switch(type) {
3222 case STOREB_STUB: handler=(int)jump_handler_write8; break;
3223 case STOREH_STUB: handler=(int)jump_handler_write16; break;
3224 case STOREW_STUB: handler=(int)jump_handler_write32; break;
3225 }
3226 assert(handler!=0);
3227 pass_args(rs,rt);
3228 if(temp2!=3)
3229 emit_mov(temp2,3);
3230 int cc=get_reg(i_regmap,CCREG);
3231 if(cc<0)
3232 emit_loadreg(CCREG,2);
2573466a 3233 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3234 // returns new cycle_count
3235 emit_call(handler);
2573466a 3236 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3237 if(cc<0)
3238 emit_storereg(CCREG,2);
3239 if(restore_jump)
3240 set_jump_target(restore_jump,(int)out);
3241 restore_regs(reglist);
3242 ra=stubs[n][2];
b96d3df7 3243 emit_jmp(ra);
3244#else // if !PCSX
57871462 3245 if(addr<0) addr=get_reg(i_regmap,-1);
3246 assert(addr>=0);
3247 int ftable=0;
3248 if(type==STOREB_STUB)
3249 ftable=(int)writememb;
3250 if(type==STOREH_STUB)
3251 ftable=(int)writememh;
3252 if(type==STOREW_STUB)
3253 ftable=(int)writemem;
24385cae 3254#ifndef FORCE32
57871462 3255 if(type==STORED_STUB)
3256 ftable=(int)writememd;
24385cae 3257#endif
3258 assert(ftable!=0);
57871462 3259 emit_writeword(rs,(int)&address);
3260 //emit_shrimm(rs,16,rs);
3261 //emit_movmem_indexedx4(ftable,rs,rs);
3262 if(type==STOREB_STUB)
3263 emit_writebyte(rt,(int)&byte);
3264 if(type==STOREH_STUB)
3265 emit_writehword(rt,(int)&hword);
3266 if(type==STOREW_STUB)
3267 emit_writeword(rt,(int)&word);
3268 if(type==STORED_STUB) {
3d624f89 3269#ifndef FORCE32
57871462 3270 emit_writeword(rt,(int)&dword);
3271 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 3272#else
3273 printf("STORED_STUB\n");
3274#endif
57871462 3275 }
3276 //emit_pusha();
3277 save_regs(reglist);
97a238a6 3278#ifndef PCSX
57871462 3279 ds=i_regs!=&regs[i];
3280 int real_rs=get_reg(i_regmap,rs1[i]);
3281 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3282 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3283 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3284 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3285#endif
57871462 3286 emit_shrimm(rs,16,1);
3287 int cc=get_reg(i_regmap,CCREG);
3288 if(cc<0) {
3289 emit_loadreg(CCREG,2);
3290 }
3291 emit_movimm(ftable,0);
3292 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3293#ifndef PCSX
57871462 3294 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 3295#endif
57871462 3296 //emit_readword((int)&last_count,temp);
3297 //emit_addimm(cc,2*stubs[n][5]+2,cc);
3298 //emit_add(cc,temp,cc);
3299 //emit_writeword(cc,(int)&Count);
3300 emit_call((int)&indirect_jump_indexed);
3301 //emit_callreg(rs);
3302 emit_readword((int)&Count,HOST_TEMPREG);
3303 emit_readword((int)&next_interupt,2);
3304 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3305 emit_writeword(2,(int)&last_count);
3306 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3307 if(cc<0) {
3308 emit_storereg(CCREG,HOST_TEMPREG);
3309 }
3310 //emit_popa();
3311 restore_regs(reglist);
3312 //if((cc=get_reg(regmap,CCREG))>=0) {
3313 // emit_loadreg(CCREG,cc);
3314 //}
3315 emit_jmp(stubs[n][2]); // return address
b96d3df7 3316#endif // !PCSX
57871462 3317}
3318
3319inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3320{
3321 int rs=get_reg(regmap,-1);
3322 int rth=get_reg(regmap,target|64);
3323 int rt=get_reg(regmap,target);
3324 assert(rs>=0);
3325 assert(rt>=0);
cbbab9cd 3326#ifdef PCSX
b96d3df7 3327 u_int handler,host_addr=0;
b96d3df7 3328 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
3329 if (handler==0) {
13e35c04 3330 if(addr!=host_addr)
3331 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 3332 switch(type) {
3333 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
3334 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
3335 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
3336 default: assert(0);
3337 }
3338 return;
3339 }
3340
3341 // call a memhandler
3342 save_regs(reglist);
13e35c04 3343 pass_args(rs,rt);
b96d3df7 3344 int cc=get_reg(regmap,CCREG);
3345 if(cc<0)
3346 emit_loadreg(CCREG,2);
2573466a 3347 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 3348 emit_movimm(handler,3);
3349 // returns new cycle_count
3350 emit_call((int)jump_handler_write_h);
2573466a 3351 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 3352 if(cc<0)
3353 emit_storereg(CCREG,2);
3354 restore_regs(reglist);
3355#else // if !pcsx
57871462 3356 int ftable=0;
3357 if(type==STOREB_STUB)
3358 ftable=(int)writememb;
3359 if(type==STOREH_STUB)
3360 ftable=(int)writememh;
3361 if(type==STOREW_STUB)
3362 ftable=(int)writemem;
24385cae 3363#ifndef FORCE32
57871462 3364 if(type==STORED_STUB)
3365 ftable=(int)writememd;
24385cae 3366#endif
3367 assert(ftable!=0);
57871462 3368 emit_writeword(rs,(int)&address);
3369 //emit_shrimm(rs,16,rs);
3370 //emit_movmem_indexedx4(ftable,rs,rs);
3371 if(type==STOREB_STUB)
3372 emit_writebyte(rt,(int)&byte);
3373 if(type==STOREH_STUB)
3374 emit_writehword(rt,(int)&hword);
3375 if(type==STOREW_STUB)
3376 emit_writeword(rt,(int)&word);
3377 if(type==STORED_STUB) {
3d624f89 3378#ifndef FORCE32
57871462 3379 emit_writeword(rt,(int)&dword);
3380 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 3381#else
3382 printf("STORED_STUB\n");
3383#endif
57871462 3384 }
3385 //emit_pusha();
3386 save_regs(reglist);
0c1fe38b 3387#ifndef PCSX
3388 // rearmed note: load_all_consts prevents BIOS boot, some bug?
3389 if((signed int)addr>=(signed int)0xC0000000) {
3390 // Theoretically we can have a pagefault here, if the TLB has never
3391 // been enabled and the address is outside the range 80000000..BFFFFFFF
3392 // Write out the registers so the pagefault can be handled. This is
3393 // a very rare case and likely represents a bug.
3394 int ds=regmap!=regs[i].regmap;
3395 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3396 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3397 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3398 }
3399#endif
57871462 3400 //emit_shrimm(rs,16,1);
3401 int cc=get_reg(regmap,CCREG);
3402 if(cc<0) {
3403 emit_loadreg(CCREG,2);
3404 }
3405 //emit_movimm(ftable,0);
3406 emit_movimm(((u_int *)ftable)[addr>>16],0);
3407 //emit_readword((int)&last_count,12);
2573466a 3408 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3409#ifndef PCSX
57871462 3410 if((signed int)addr>=(signed int)0xC0000000) {
3411 // Pagefault address
3412 int ds=regmap!=regs[i].regmap;
3413 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3414 }
f51dc36c 3415#endif
57871462 3416 //emit_add(12,2,2);
3417 //emit_writeword(2,(int)&Count);
3418 //emit_call(((u_int *)ftable)[addr>>16]);
3419 emit_call((int)&indirect_jump);
3420 emit_readword((int)&Count,HOST_TEMPREG);
3421 emit_readword((int)&next_interupt,2);
2573466a 3422 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3423 emit_writeword(2,(int)&last_count);
3424 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3425 if(cc<0) {
3426 emit_storereg(CCREG,HOST_TEMPREG);
3427 }
3428 //emit_popa();
3429 restore_regs(reglist);
b96d3df7 3430#endif
57871462 3431}
3432
3433do_unalignedwritestub(int n)
3434{
b7918751 3435 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3436 literal_pool(256);
57871462 3437 set_jump_target(stubs[n][1],(int)out);
b7918751 3438
3439 int i=stubs[n][3];
3440 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3441 int addr=stubs[n][5];
3442 u_int reglist=stubs[n][7];
3443 signed char *i_regmap=i_regs->regmap;
3444 int temp2=get_reg(i_regmap,FTEMP);
3445 int rt;
3446 int ds, real_rs;
3447 rt=get_reg(i_regmap,rs2[i]);
3448 assert(rt>=0);
3449 assert(addr>=0);
3450 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3451 reglist|=(1<<addr);
3452 reglist&=~(1<<temp2);
3453
b96d3df7 3454#if 1
3455 // don't bother with it and call write handler
3456 save_regs(reglist);
3457 pass_args(addr,rt);
3458 int cc=get_reg(i_regmap,CCREG);
3459 if(cc<0)
3460 emit_loadreg(CCREG,2);
2573466a 3461 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3462 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 3463 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3464 if(cc<0)
3465 emit_storereg(CCREG,2);
3466 restore_regs(reglist);
3467 emit_jmp(stubs[n][2]); // return address
3468#else
b7918751 3469 emit_andimm(addr,0xfffffffc,temp2);
3470 emit_writeword(temp2,(int)&address);
3471
3472 save_regs(reglist);
97a238a6 3473#ifndef PCSX
b7918751 3474 ds=i_regs!=&regs[i];
3475 real_rs=get_reg(i_regmap,rs1[i]);
3476 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3477 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3478 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3479 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3480#endif
b7918751 3481 emit_shrimm(addr,16,1);
3482 int cc=get_reg(i_regmap,CCREG);
3483 if(cc<0) {
3484 emit_loadreg(CCREG,2);
3485 }
3486 emit_movimm((u_int)readmem,0);
3487 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3488#ifndef PCSX
3489 // pagefault address
3490 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3491#endif
b7918751 3492 emit_call((int)&indirect_jump_indexed);
3493 restore_regs(reglist);
3494
3495 emit_readword((int)&readmem_dword,temp2);
3496 int temp=addr; //hmh
3497 emit_shlimm(addr,3,temp);
3498 emit_andimm(temp,24,temp);
3499#ifdef BIG_ENDIAN_MIPS
3500 if (opcode[i]==0x2e) // SWR
3501#else
3502 if (opcode[i]==0x2a) // SWL
3503#endif
3504 emit_xorimm(temp,24,temp);
3505 emit_movimm(-1,HOST_TEMPREG);
55439448 3506 if (opcode[i]==0x2a) { // SWL
b7918751 3507 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3508 emit_orrshr(rt,temp,temp2);
3509 }else{
3510 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3511 emit_orrshl(rt,temp,temp2);
3512 }
3513 emit_readword((int)&address,addr);
3514 emit_writeword(temp2,(int)&word);
3515 //save_regs(reglist); // don't need to, no state changes
3516 emit_shrimm(addr,16,1);
3517 emit_movimm((u_int)writemem,0);
3518 //emit_call((int)&indirect_jump_indexed);
3519 emit_mov(15,14);
3520 emit_readword_dualindexedx4(0,1,15);
3521 emit_readword((int)&Count,HOST_TEMPREG);
3522 emit_readword((int)&next_interupt,2);
3523 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3524 emit_writeword(2,(int)&last_count);
3525 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3526 if(cc<0) {
3527 emit_storereg(CCREG,HOST_TEMPREG);
3528 }
3529 restore_regs(reglist);
57871462 3530 emit_jmp(stubs[n][2]); // return address
b96d3df7 3531#endif
57871462 3532}
3533
3534void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3535{
3536 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3537}
3538
3539do_invstub(int n)
3540{
3541 literal_pool(20);
3542 u_int reglist=stubs[n][3];
3543 set_jump_target(stubs[n][1],(int)out);
3544 save_regs(reglist);
3545 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3546 emit_call((int)&invalidate_addr);
3547 restore_regs(reglist);
3548 emit_jmp(stubs[n][2]); // return address
3549}
3550
3551int do_dirty_stub(int i)
3552{
3553 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3554 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3555 #ifdef PCSX
3556 addr=(u_int)source;
3557 #endif
57871462 3558 // Careful about the code output here, verify_dirty needs to parse it.
3559 #ifdef ARMv5_ONLY
ac545b3a 3560 emit_loadlp(addr,1);
57871462 3561 emit_loadlp((int)copy,2);
3562 emit_loadlp(slen*4,3);
3563 #else
ac545b3a 3564 emit_movw(addr&0x0000FFFF,1);
57871462 3565 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3566 emit_movt(addr&0xFFFF0000,1);
57871462 3567 emit_movt(((u_int)copy)&0xFFFF0000,2);
3568 emit_movw(slen*4,3);
3569 #endif
3570 emit_movimm(start+i*4,0);
3571 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3572 int entry=(int)out;
3573 load_regs_entry(i);
3574 if(entry==(int)out) entry=instr_addr[i];
3575 emit_jmp(instr_addr[i]);
3576 return entry;
3577}
3578
3579void do_dirty_stub_ds()
3580{
3581 // Careful about the code output here, verify_dirty needs to parse it.
3582 #ifdef ARMv5_ONLY
3583 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3584 emit_loadlp((int)copy,2);
3585 emit_loadlp(slen*4,3);
3586 #else
3587 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3588 emit_movw(((u_int)copy)&0x0000FFFF,2);
3589 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3590 emit_movt(((u_int)copy)&0xFFFF0000,2);
3591 emit_movw(slen*4,3);
3592 #endif
3593 emit_movimm(start+1,0);
3594 emit_call((int)&verify_code_ds);
3595}
3596
3597do_cop1stub(int n)
3598{
3599 literal_pool(256);
3600 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3601 set_jump_target(stubs[n][1],(int)out);
3602 int i=stubs[n][3];
3d624f89 3603// int rs=stubs[n][4];
57871462 3604 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3605 int ds=stubs[n][6];
3606 if(!ds) {
3607 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3608 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3609 }
3610 //else {printf("fp exception in delay slot\n");}
3611 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3612 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3613 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 3614 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 3615 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3616}
3617
63cb0298 3618#ifndef DISABLE_TLB
3619
57871462 3620/* TLB */
3621
3622int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3623{
3624 if(c) {
3625 if((signed int)addr>=(signed int)0xC0000000) {
3626 // address_generation already loaded the const
3627 emit_readword_dualindexedx4(FP,map,map);
3628 }
3629 else
3630 return -1; // No mapping
3631 }
3632 else {
3633 assert(s!=map);
3634 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3635 emit_addsr12(map,s,map);
3636 // Schedule this while we wait on the load
3637 //if(x) emit_xorimm(s,x,ar);
3638 if(shift>=0) emit_shlimm(s,3,shift);
3639 if(~a) emit_andimm(s,a,ar);
3640 emit_readword_dualindexedx4(FP,map,map);
3641 }
3642 return map;
3643}
3644int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3645{
3646 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3647 emit_test(map,map);
3648 *jaddr=(int)out;
3649 emit_js(0);
3650 }
3651 return map;
3652}
3653
3654int gen_tlb_addr_r(int ar, int map) {
3655 if(map>=0) {
3656 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3657 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3658 }
3659}
3660
3661int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3662{
3663 if(c) {
3664 if(addr<0x80800000||addr>=0xC0000000) {
3665 // address_generation already loaded the const
3666 emit_readword_dualindexedx4(FP,map,map);
3667 }
3668 else
3669 return -1; // No mapping
3670 }
3671 else {
3672 assert(s!=map);
3673 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3674 emit_addsr12(map,s,map);
3675 // Schedule this while we wait on the load
3676 //if(x) emit_xorimm(s,x,ar);
3677 emit_readword_dualindexedx4(FP,map,map);
3678 }
3679 return map;
3680}
3681int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3682{
3683 if(!c||addr<0x80800000||addr>=0xC0000000) {
3684 emit_testimm(map,0x40000000);
3685 *jaddr=(int)out;
3686 emit_jne(0);
3687 }
3688}
3689
3690int gen_tlb_addr_w(int ar, int map) {
3691 if(map>=0) {
3692 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3693 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3694 }
3695}
3696
3697// Generate the address of the memory_map entry, relative to dynarec_local
3698generate_map_const(u_int addr,int reg) {
3699 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3700 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3701}
3702
63cb0298 3703#else
3704
3705static int do_tlb_r() { return 0; }
3706static int do_tlb_r_branch() { return 0; }
3707static int gen_tlb_addr_r() { return 0; }
3708static int do_tlb_w() { return 0; }
3709static int do_tlb_w_branch() { return 0; }
3710static int gen_tlb_addr_w() { return 0; }
3711
3712#endif // DISABLE_TLB
3713
57871462 3714/* Special assem */
3715
3716void shift_assemble_arm(int i,struct regstat *i_regs)
3717{
3718 if(rt1[i]) {
3719 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3720 {
3721 signed char s,t,shift;
3722 t=get_reg(i_regs->regmap,rt1[i]);
3723 s=get_reg(i_regs->regmap,rs1[i]);
3724 shift=get_reg(i_regs->regmap,rs2[i]);
3725 if(t>=0){
3726 if(rs1[i]==0)
3727 {
3728 emit_zeroreg(t);
3729 }
3730 else if(rs2[i]==0)
3731 {
3732 assert(s>=0);
3733 if(s!=t) emit_mov(s,t);
3734 }
3735 else
3736 {
3737 emit_andimm(shift,31,HOST_TEMPREG);
3738 if(opcode2[i]==4) // SLLV
3739 {
3740 emit_shl(s,HOST_TEMPREG,t);
3741 }
3742 if(opcode2[i]==6) // SRLV
3743 {
3744 emit_shr(s,HOST_TEMPREG,t);
3745 }
3746 if(opcode2[i]==7) // SRAV
3747 {
3748 emit_sar(s,HOST_TEMPREG,t);
3749 }
3750 }
3751 }
3752 } else { // DSLLV/DSRLV/DSRAV
3753 signed char sh,sl,th,tl,shift;
3754 th=get_reg(i_regs->regmap,rt1[i]|64);
3755 tl=get_reg(i_regs->regmap,rt1[i]);
3756 sh=get_reg(i_regs->regmap,rs1[i]|64);
3757 sl=get_reg(i_regs->regmap,rs1[i]);
3758 shift=get_reg(i_regs->regmap,rs2[i]);
3759 if(tl>=0){
3760 if(rs1[i]==0)
3761 {
3762 emit_zeroreg(tl);
3763 if(th>=0) emit_zeroreg(th);
3764 }
3765 else if(rs2[i]==0)
3766 {
3767 assert(sl>=0);
3768 if(sl!=tl) emit_mov(sl,tl);
3769 if(th>=0&&sh!=th) emit_mov(sh,th);
3770 }
3771 else
3772 {
3773 // FIXME: What if shift==tl ?
3774 assert(shift!=tl);
3775 int temp=get_reg(i_regs->regmap,-1);
3776 int real_th=th;
3777 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3778 assert(sl>=0);
3779 assert(sh>=0);
3780 emit_andimm(shift,31,HOST_TEMPREG);
3781 if(opcode2[i]==0x14) // DSLLV
3782 {
3783 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3784 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3785 emit_orrshr(sl,HOST_TEMPREG,th);
3786 emit_andimm(shift,31,HOST_TEMPREG);
3787 emit_testimm(shift,32);
3788 emit_shl(sl,HOST_TEMPREG,tl);
3789 if(th>=0) emit_cmovne_reg(tl,th);
3790 emit_cmovne_imm(0,tl);
3791 }
3792 if(opcode2[i]==0x16) // DSRLV
3793 {
3794 assert(th>=0);
3795 emit_shr(sl,HOST_TEMPREG,tl);
3796 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3797 emit_orrshl(sh,HOST_TEMPREG,tl);
3798 emit_andimm(shift,31,HOST_TEMPREG);
3799 emit_testimm(shift,32);
3800 emit_shr(sh,HOST_TEMPREG,th);
3801 emit_cmovne_reg(th,tl);
3802 if(real_th>=0) emit_cmovne_imm(0,th);
3803 }
3804 if(opcode2[i]==0x17) // DSRAV
3805 {
3806 assert(th>=0);
3807 emit_shr(sl,HOST_TEMPREG,tl);
3808 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3809 if(real_th>=0) {
3810 assert(temp>=0);
3811 emit_sarimm(th,31,temp);
3812 }
3813 emit_orrshl(sh,HOST_TEMPREG,tl);
3814 emit_andimm(shift,31,HOST_TEMPREG);
3815 emit_testimm(shift,32);
3816 emit_sar(sh,HOST_TEMPREG,th);
3817 emit_cmovne_reg(th,tl);
3818 if(real_th>=0) emit_cmovne_reg(temp,th);
3819 }
3820 }
3821 }
3822 }
3823 }
3824}
ffb0b9e0 3825
3826#ifdef PCSX
3827static void speculate_mov(int rs,int rt)
3828{
3829 if(rt!=0) {
3830 smrv_strong_next|=1<<rt;
3831 smrv[rt]=smrv[rs];
3832 }
3833}
3834
3835static void speculate_mov_weak(int rs,int rt)
3836{
3837 if(rt!=0) {
3838 smrv_weak_next|=1<<rt;
3839 smrv[rt]=smrv[rs];
3840 }
3841}
3842
3843static void speculate_register_values(int i)
3844{
3845 if(i==0) {
3846 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3847 // gp,sp are likely to stay the same throughout the block
3848 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3849 smrv_weak_next=~smrv_strong_next;
3850 //printf(" llr %08x\n", smrv[4]);
3851 }
3852 smrv_strong=smrv_strong_next;
3853 smrv_weak=smrv_weak_next;
3854 switch(itype[i]) {
3855 case ALU:
3856 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3857 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3858 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3859 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3860 else {
3861 smrv_strong_next&=~(1<<rt1[i]);
3862 smrv_weak_next&=~(1<<rt1[i]);
3863 }
3864 break;
3865 case SHIFTIMM:
3866 smrv_strong_next&=~(1<<rt1[i]);
3867 smrv_weak_next&=~(1<<rt1[i]);
3868 // fallthrough
3869 case IMM16:
3870 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3871 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3872 if(hr>=0) {
3873 if(get_final_value(hr,i,&value))
3874 smrv[rt1[i]]=value;
3875 else smrv[rt1[i]]=constmap[i][hr];
3876 smrv_strong_next|=1<<rt1[i];
3877 }
3878 }
3879 else {
3880 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3881 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3882 }
3883 break;
3884 case LOAD:
3885 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3886 // special case for BIOS
3887 smrv[rt1[i]]=0xa0000000;
3888 smrv_strong_next|=1<<rt1[i];
3889 break;
3890 }
3891 // fallthrough
3892 case SHIFT:
3893 case LOADLR:
3894 case MOV:
3895 smrv_strong_next&=~(1<<rt1[i]);
3896 smrv_weak_next&=~(1<<rt1[i]);
3897 break;
3898 case COP0:
3899 case COP2:
3900 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3901 smrv_strong_next&=~(1<<rt1[i]);
3902 smrv_weak_next&=~(1<<rt1[i]);
3903 }
3904 break;
3905 case C2LS:
3906 if (opcode[i]==0x32) { // LWC2
3907 smrv_strong_next&=~(1<<rt1[i]);
3908 smrv_weak_next&=~(1<<rt1[i]);
3909 }
3910 break;
3911 }
3912#if 0
3913 int r=4;
3914 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3915 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3916#endif
3917}
3918
3919enum {
3920 MTYPE_8000 = 0,
3921 MTYPE_8020,
3922 MTYPE_0000,
3923 MTYPE_A000,
3924 MTYPE_1F80,
3925};
3926
3927static int get_ptr_mem_type(u_int a)
3928{
3929 if(a < 0x00200000) {
3930 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3931 // return wrong, must use memhandler for BIOS self-test to pass
3932 // 007 does similar stuff from a00 mirror, weird stuff
3933 return MTYPE_8000;
3934 return MTYPE_0000;
3935 }
3936 if(0x1f800000 <= a && a < 0x1f801000)
3937 return MTYPE_1F80;
3938 if(0x80200000 <= a && a < 0x80800000)
3939 return MTYPE_8020;
3940 if(0xa0000000 <= a && a < 0xa0200000)
3941 return MTYPE_A000;
3942 return MTYPE_8000;
3943}
3944#endif
3945
3946static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3947{
3948 int jaddr,type=0;
3949
3950#ifdef PCSX
3951 int mr=rs1[i];
3952 if(((smrv_strong|smrv_weak)>>mr)&1) {
3953 type=get_ptr_mem_type(smrv[mr]);
3954 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3955 }
3956 else {
3957 // use the mirror we are running on
3958 type=get_ptr_mem_type(start);
3959 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3960 }
3961
3962 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3963 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3964 addr=*addr_reg_override=HOST_TEMPREG;
3965 type=0;
3966 }
3967 else if(type==MTYPE_0000) { // RAM 0 mirror
3968 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3969 addr=*addr_reg_override=HOST_TEMPREG;
3970 type=0;
3971 }
3972 else if(type==MTYPE_A000) { // RAM A mirror
3973 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3974 addr=*addr_reg_override=HOST_TEMPREG;
3975 type=0;
3976 }
3977 else if(type==MTYPE_1F80) { // scratchpad
3978 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3979 emit_cmpimm(HOST_TEMPREG,0x1000);
3980 jaddr=(int)out;
3981 emit_jc(0);
3982 }
3983#endif
3984
3985 if(type==0)
3986 {
3987 emit_cmpimm(addr,RAM_SIZE);
3988 jaddr=(int)out;
3989 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3990 // Hint to branch predictor that the branch is unlikely to be taken
3991 if(rs1[i]>=28)
3992 emit_jno_unlikely(0);
3993 else
3994 #endif
3995 emit_jno(0);
3996 }
3997
3998 return jaddr;
3999}
4000
57871462 4001#define shift_assemble shift_assemble_arm
4002
4003void loadlr_assemble_arm(int i,struct regstat *i_regs)
4004{
4005 int s,th,tl,temp,temp2,addr,map=-1;
4006 int offset;
4007 int jaddr=0;
af4ee1fe 4008 int memtarget=0,c=0;
ffb0b9e0 4009 int fastload_reg_override=0;
57871462 4010 u_int hr,reglist=0;
4011 th=get_reg(i_regs->regmap,rt1[i]|64);
4012 tl=get_reg(i_regs->regmap,rt1[i]);
4013 s=get_reg(i_regs->regmap,rs1[i]);
4014 temp=get_reg(i_regs->regmap,-1);
4015 temp2=get_reg(i_regs->regmap,FTEMP);
4016 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
4017 assert(addr<0);
4018 offset=imm[i];
4019 for(hr=0;hr<HOST_REGS;hr++) {
4020 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4021 }
4022 reglist|=1<<temp;
4023 if(offset||s<0||c) addr=temp2;
4024 else addr=s;
4025 if(s>=0) {
4026 c=(i_regs->wasconst>>s)&1;
af4ee1fe 4027 if(c) {
4028 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
4029 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
4030 }
57871462 4031 }
535d208a 4032 if(!using_tlb) {
4033 if(!c) {
4034 #ifdef RAM_OFFSET
4035 map=get_reg(i_regs->regmap,ROREG);
4036 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
4037 #endif
4038 emit_shlimm(addr,3,temp);
4039 if (opcode[i]==0x22||opcode[i]==0x26) {
4040 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 4041 }else{
535d208a 4042 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 4043 }
ffb0b9e0 4044 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
535d208a 4045 }
4046 else {
4047 if (opcode[i]==0x22||opcode[i]==0x26) {
4048 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4049 }else{
4050 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4051 }
57871462 4052 }
535d208a 4053 }else{ // using tlb
4054 int a;
4055 if(c) {
4056 a=-1;
4057 }else if (opcode[i]==0x22||opcode[i]==0x26) {
4058 a=0xFFFFFFFC; // LWL/LWR
4059 }else{
4060 a=0xFFFFFFF8; // LDL/LDR
4061 }
4062 map=get_reg(i_regs->regmap,TLREG);
4063 assert(map>=0);
ea3d2e6e 4064 reglist&=~(1<<map);
535d208a 4065 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
4066 if(c) {
4067 if (opcode[i]==0x22||opcode[i]==0x26) {
4068 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4069 }else{
4070 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4071 }
535d208a 4072 }
4073 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
4074 }
4075 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
4076 if(!c||memtarget) {
ffb0b9e0 4077 int a=temp2;
4078 if(fastload_reg_override) a=fastload_reg_override;
535d208a 4079 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 4080 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 4081 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4082 }
4083 else
4084 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
4085 if(rt1[i]) {
4086 assert(tl>=0);
57871462 4087 emit_andimm(temp,24,temp);
2002a1db 4088#ifdef BIG_ENDIAN_MIPS
4089 if (opcode[i]==0x26) // LWR
4090#else
4091 if (opcode[i]==0x22) // LWL
4092#endif
4093 emit_xorimm(temp,24,temp);
57871462 4094 emit_movimm(-1,HOST_TEMPREG);
4095 if (opcode[i]==0x26) {
4096 emit_shr(temp2,temp,temp2);
4097 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
4098 }else{
4099 emit_shl(temp2,temp,temp2);
4100 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
4101 }
4102 emit_or(temp2,tl,tl);
57871462 4103 }
535d208a 4104 //emit_storereg(rt1[i],tl); // DEBUG
4105 }
4106 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 4107 // FIXME: little endian, fastload_reg_override
535d208a 4108 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
4109 if(!c||memtarget) {
4110 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
4111 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
4112 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
4113 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4114 }
4115 else
4116 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
4117 if(rt1[i]) {
4118 assert(th>=0);
4119 assert(tl>=0);
57871462 4120 emit_testimm(temp,32);
4121 emit_andimm(temp,24,temp);
4122 if (opcode[i]==0x1A) { // LDL
4123 emit_rsbimm(temp,32,HOST_TEMPREG);
4124 emit_shl(temp2h,temp,temp2h);
4125 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
4126 emit_movimm(-1,HOST_TEMPREG);
4127 emit_shl(temp2,temp,temp2);
4128 emit_cmove_reg(temp2h,th);
4129 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
4130 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
4131 emit_orreq(temp2,tl,tl);
4132 emit_orrne(temp2,th,th);
4133 }
4134 if (opcode[i]==0x1B) { // LDR
4135 emit_xorimm(temp,24,temp);
4136 emit_rsbimm(temp,32,HOST_TEMPREG);
4137 emit_shr(temp2,temp,temp2);
4138 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
4139 emit_movimm(-1,HOST_TEMPREG);
4140 emit_shr(temp2h,temp,temp2h);
4141 emit_cmovne_reg(temp2,tl);
4142 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
4143 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
4144 emit_orrne(temp2h,th,th);
4145 emit_orreq(temp2h,tl,tl);
4146 }
4147 }
4148 }
4149}
4150#define loadlr_assemble loadlr_assemble_arm
4151
4152void cop0_assemble(int i,struct regstat *i_regs)
4153{
4154 if(opcode2[i]==0) // MFC0
4155 {
4156 signed char t=get_reg(i_regs->regmap,rt1[i]);
4157 char copr=(source[i]>>11)&0x1f;
4158 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 4159 if(t>=0&&rt1[i]!=0) {
7139f3c8 4160#ifdef MUPEN64
57871462 4161 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4162 emit_movimm((source[i]>>11)&0x1f,1);
4163 emit_writeword(0,(int)&PC);
4164 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
4165 if(copr==9) {
4166 emit_readword((int)&last_count,ECX);
4167 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
4168 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4169 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4170 emit_writeword(HOST_CCREG,(int)&Count);
4171 }
4172 emit_call((int)MFC0);
4173 emit_readword((int)&readmem_dword,t);
7139f3c8 4174#else
4175 emit_readword((int)&reg_cop0+copr*4,t);
4176#endif
57871462 4177 }
4178 }
4179 else if(opcode2[i]==4) // MTC0
4180 {
4181 signed char s=get_reg(i_regs->regmap,rs1[i]);
4182 char copr=(source[i]>>11)&0x1f;
4183 assert(s>=0);
63cb0298 4184#ifdef MUPEN64
57871462 4185 emit_writeword(s,(int)&readmem_dword);
4186 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
4187 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4188 emit_movimm((source[i]>>11)&0x1f,1);
4189 emit_writeword(0,(int)&PC);
4190 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
63cb0298 4191#else
4192 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 4193#endif
4194 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 4195 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 4196 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 4197 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 4198 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4199 emit_writeword(HOST_CCREG,(int)&Count);
4200 }
4201 // What a mess. The status register (12) can enable interrupts,
4202 // so needs a special case to handle a pending interrupt.
4203 // The interrupt must be taken immediately, because a subsequent
4204 // instruction might disable interrupts again.
7139f3c8 4205 if(copr==12||copr==13) {
fca1aef2 4206#ifdef PCSX
4207 if (is_delayslot) {
4208 // burn cycles to cause cc_interrupt, which will
4209 // reschedule next_interupt. Relies on CCREG from above.
4210 assem_debug("MTC0 DS %d\n", copr);
4211 emit_writeword(HOST_CCREG,(int)&last_count);
4212 emit_movimm(0,HOST_CCREG);
4213 emit_storereg(CCREG,HOST_CCREG);
63cb0298 4214 if(s!=1)
4215 emit_mov(s,1);
fca1aef2 4216 emit_movimm(copr,0);
4217 emit_call((int)pcsx_mtc0_ds);
4218 return;
4219 }
4220#endif
63cb0298 4221 emit_movimm(start+i*4+4,HOST_TEMPREG);
4222 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
4223 emit_movimm(0,HOST_TEMPREG);
4224 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 4225 }
4226 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
4227 //else
fca1aef2 4228#ifdef PCSX
63cb0298 4229 if(s!=1)
4230 emit_mov(s,1);
fca1aef2 4231 emit_movimm(copr,0);
4232 emit_call((int)pcsx_mtc0);
4233#else
57871462 4234 emit_call((int)MTC0);
fca1aef2 4235#endif
7139f3c8 4236 if(copr==9||copr==11||copr==12||copr==13) {
57871462 4237 emit_readword((int)&Count,HOST_CCREG);
4238 emit_readword((int)&next_interupt,ECX);
2573466a 4239 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4240 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
4241 emit_writeword(ECX,(int)&last_count);
4242 emit_storereg(CCREG,HOST_CCREG);
4243 }
7139f3c8 4244 if(copr==12||copr==13) {
57871462 4245 assert(!is_delayslot);
4246 emit_readword((int)&pending_exception,14);
4247 }
4248 emit_loadreg(rs1[i],s);
4249 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
4250 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 4251 if(copr==12||copr==13) {
57871462 4252 emit_test(14,14);
4253 emit_jne((int)&do_interrupt);
4254 }
4255 cop1_usable=0;
4256 }
4257 else
4258 {
4259 assert(opcode2[i]==0x10);
3d624f89 4260#ifndef DISABLE_TLB
57871462 4261 if((source[i]&0x3f)==0x01) // TLBR
4262 emit_call((int)TLBR);
4263 if((source[i]&0x3f)==0x02) // TLBWI
4264 emit_call((int)TLBWI_new);
4265 if((source[i]&0x3f)==0x06) { // TLBWR
4266 // The TLB entry written by TLBWR is dependent on the count,
4267 // so update the cycle count
4268 emit_readword((int)&last_count,ECX);
4269 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
4270 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4271 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4272 emit_writeword(HOST_CCREG,(int)&Count);
4273 emit_call((int)TLBWR_new);
4274 }
4275 if((source[i]&0x3f)==0x08) // TLBP
4276 emit_call((int)TLBP);
3d624f89 4277#endif
576bbd8f 4278#ifdef PCSX
4279 if((source[i]&0x3f)==0x10) // RFE
4280 {
4281 emit_readword((int)&Status,0);
4282 emit_andimm(0,0x3c,1);
4283 emit_andimm(0,~0xf,0);
4284 emit_orrshr_imm(1,2,0);
4285 emit_writeword(0,(int)&Status);
4286 }
4287#else
57871462 4288 if((source[i]&0x3f)==0x18) // ERET
4289 {
4290 int count=ccadj[i];
4291 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2573466a 4292 emit_addimm(HOST_CCREG,CLOCK_ADJUST(count),HOST_CCREG); // TODO: Should there be an extra cycle here?
57871462 4293 emit_jmp((int)jump_eret);
4294 }
576bbd8f 4295#endif
57871462 4296 }
4297}
4298
b9b61529 4299static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
4300{
4301 switch (copr) {
4302 case 1:
4303 case 3:
4304 case 5:
4305 case 8:
4306 case 9:
4307 case 10:
4308 case 11:
4309 emit_readword((int)&reg_cop2d[copr],tl);
4310 emit_signextend16(tl,tl);
4311 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
4312 break;
4313 case 7:
4314 case 16:
4315 case 17:
4316 case 18:
4317 case 19:
4318 emit_readword((int)&reg_cop2d[copr],tl);
4319 emit_andimm(tl,0xffff,tl);
4320 emit_writeword(tl,(int)&reg_cop2d[copr]);
4321 break;
4322 case 15:
4323 emit_readword((int)&reg_cop2d[14],tl); // SXY2
4324 emit_writeword(tl,(int)&reg_cop2d[copr]);
4325 break;
4326 case 28:
b9b61529 4327 case 29:
4328 emit_readword((int)&reg_cop2d[9],temp);
4329 emit_testimm(temp,0x8000); // do we need this?
4330 emit_andimm(temp,0xf80,temp);
4331 emit_andne_imm(temp,0,temp);
f70d384d 4332 emit_shrimm(temp,7,tl);
b9b61529 4333 emit_readword((int)&reg_cop2d[10],temp);
4334 emit_testimm(temp,0x8000);
4335 emit_andimm(temp,0xf80,temp);
4336 emit_andne_imm(temp,0,temp);
f70d384d 4337 emit_orrshr_imm(temp,2,tl);
b9b61529 4338 emit_readword((int)&reg_cop2d[11],temp);
4339 emit_testimm(temp,0x8000);
4340 emit_andimm(temp,0xf80,temp);
4341 emit_andne_imm(temp,0,temp);
f70d384d 4342 emit_orrshl_imm(temp,3,tl);
b9b61529 4343 emit_writeword(tl,(int)&reg_cop2d[copr]);
4344 break;
4345 default:
4346 emit_readword((int)&reg_cop2d[copr],tl);
4347 break;
4348 }
4349}
4350
4351static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
4352{
4353 switch (copr) {
4354 case 15:
4355 emit_readword((int)&reg_cop2d[13],temp); // SXY1
4356 emit_writeword(sl,(int)&reg_cop2d[copr]);
4357 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
4358 emit_readword((int)&reg_cop2d[14],temp); // SXY2
4359 emit_writeword(sl,(int)&reg_cop2d[14]);
4360 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
4361 break;
4362 case 28:
4363 emit_andimm(sl,0x001f,temp);
f70d384d 4364 emit_shlimm(temp,7,temp);
b9b61529 4365 emit_writeword(temp,(int)&reg_cop2d[9]);
4366 emit_andimm(sl,0x03e0,temp);
f70d384d 4367 emit_shlimm(temp,2,temp);
b9b61529 4368 emit_writeword(temp,(int)&reg_cop2d[10]);
4369 emit_andimm(sl,0x7c00,temp);
f70d384d 4370 emit_shrimm(temp,3,temp);
b9b61529 4371 emit_writeword(temp,(int)&reg_cop2d[11]);
4372 emit_writeword(sl,(int)&reg_cop2d[28]);
4373 break;
4374 case 30:
4375 emit_movs(sl,temp);
4376 emit_mvnmi(temp,temp);
4377 emit_clz(temp,temp);
4378 emit_writeword(sl,(int)&reg_cop2d[30]);
4379 emit_writeword(temp,(int)&reg_cop2d[31]);
4380 break;
b9b61529 4381 case 31:
4382 break;
4383 default:
4384 emit_writeword(sl,(int)&reg_cop2d[copr]);
4385 break;
4386 }
4387}
4388
4389void cop2_assemble(int i,struct regstat *i_regs)
4390{
4391 u_int copr=(source[i]>>11)&0x1f;
4392 signed char temp=get_reg(i_regs->regmap,-1);
4393 if (opcode2[i]==0) { // MFC2
4394 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4395 if(tl>=0&&rt1[i]!=0)
b9b61529 4396 cop2_get_dreg(copr,tl,temp);
4397 }
4398 else if (opcode2[i]==4) { // MTC2
4399 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4400 cop2_put_dreg(copr,sl,temp);
4401 }
4402 else if (opcode2[i]==2) // CFC2
4403 {
4404 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4405 if(tl>=0&&rt1[i]!=0)
b9b61529 4406 emit_readword((int)&reg_cop2c[copr],tl);
4407 }
4408 else if (opcode2[i]==6) // CTC2
4409 {
4410 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4411 switch(copr) {
4412 case 4:
4413 case 12:
4414 case 20:
4415 case 26:
4416 case 27:
4417 case 29:
4418 case 30:
4419 emit_signextend16(sl,temp);
4420 break;
4421 case 31:
4422 //value = value & 0x7ffff000;
4423 //if (value & 0x7f87e000) value |= 0x80000000;
4424 emit_shrimm(sl,12,temp);
4425 emit_shlimm(temp,12,temp);
4426 emit_testimm(temp,0x7f000000);
4427 emit_testeqimm(temp,0x00870000);
4428 emit_testeqimm(temp,0x0000e000);
4429 emit_orrne_imm(temp,0x80000000,temp);
4430 break;
4431 default:
4432 temp=sl;
4433 break;
4434 }
4435 emit_writeword(temp,(int)&reg_cop2c[copr]);
4436 assert(sl>=0);
4437 }
4438}
4439
054175e9 4440static void c2op_prologue(u_int op,u_int reglist)
4441{
4442 save_regs_all(reglist);
82ed88eb 4443#ifdef PCNT
4444 emit_movimm(op,0);
4445 emit_call((int)pcnt_gte_start);
4446#endif
054175e9 4447 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
4448}
4449
4450static void c2op_epilogue(u_int op,u_int reglist)
4451{
82ed88eb 4452#ifdef PCNT
4453 emit_movimm(op,0);
4454 emit_call((int)pcnt_gte_end);
4455#endif
054175e9 4456 restore_regs_all(reglist);
4457}
4458
4459static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 4460{
4461 signed char temp=get_reg(i_regs->regmap,-1);
4462 u_int c2op=source[i]&0x3f;
4463 u_int hr,reglist=0;
054175e9 4464 int need_flags,need_ir;
b9b61529 4465 for(hr=0;hr<HOST_REGS;hr++) {
4466 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4467 }
b9b61529 4468
4469 if (gte_handlers[c2op]!=NULL) {
bedfea38 4470 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 4471 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
4472 assem_debug("gte unneeded %016llx, need_flags %d, need_ir %d\n",
4473 gte_unneeded[i+1],need_flags,need_ir);
bedfea38 4474#ifdef ARMv5_ONLY
4475 // let's take more risk here
4476 need_flags=need_flags&&gte_reads_flags;
4477#endif
054175e9 4478 switch(c2op) {
4479 case GTE_MVMVA: {
4480 int shift = (source[i] >> 19) & 1;
4481 int v = (source[i] >> 15) & 3;
4482 int cv = (source[i] >> 13) & 3;
4483 int mx = (source[i] >> 17) & 3;
4484 int lm = (source[i] >> 10) & 1;
4485 reglist&=0x10ff; // +{r4-r7}
4486 c2op_prologue(c2op,reglist);
4487 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
4488 if(v<3)
4489 emit_ldrd(v*8,0,4);
4490 else {
4491 emit_movzwl_indexed(9*4,0,4); // gteIR
4492 emit_movzwl_indexed(10*4,0,6);
4493 emit_movzwl_indexed(11*4,0,5);
4494 emit_orrshl_imm(6,16,4);
4495 }
4496 if(mx<3)
4497 emit_addimm(0,32*4+mx*8*4,6);
4498 else
4499 emit_readword((int)&zeromem_ptr,6);
4500 if(cv<3)
4501 emit_addimm(0,32*4+(cv*8+5)*4,7);
4502 else
4503 emit_readword((int)&zeromem_ptr,7);
4504#ifdef __ARM_NEON__
4505 emit_movimm(source[i],1); // opcode
4506 emit_call((int)gteMVMVA_part_neon);
4507 if(need_flags) {
4508 emit_movimm(lm,1);
4509 emit_call((int)gteMACtoIR_flags_neon);
4510 }
4511#else
4512 if(cv==3&&shift)
4513 emit_call((int)gteMVMVA_part_cv3sh12_arm);
4514 else {
4515 emit_movimm(shift,1);
4516 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
4517 }
4518 if(need_flags||need_ir) {
4519 if(need_flags)
4520 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
4521 else
4522 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf)); // lm0 borked
4523 }
4524#endif
4525 break;
4526 }
b9b61529 4527
054175e9 4528 default:
4529 reglist&=0x100f;
4530 c2op_prologue(c2op,reglist);
4531 emit_movimm(source[i],1); // opcode
4532 emit_writeword(1,(int)&psxRegs.code);
4533 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
4534 break;
4535 }
4536 c2op_epilogue(c2op,reglist);
4537 }
b9b61529 4538}
4539
4540void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 4541{
4542 // XXX: should just just do the exception instead
4543 if(!cop1_usable) {
4544 int jaddr=(int)out;
4545 emit_jmp(0);
4546 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
4547 cop1_usable=1;
4548 }
4549}
4550
57871462 4551void cop1_assemble(int i,struct regstat *i_regs)
4552{
3d624f89 4553#ifndef DISABLE_COP1
57871462 4554 // Check cop1 unusable
4555 if(!cop1_usable) {
4556 signed char rs=get_reg(i_regs->regmap,CSREG);
4557 assert(rs>=0);
4558 emit_testimm(rs,0x20000000);
4559 int jaddr=(int)out;
4560 emit_jeq(0);
4561 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4562 cop1_usable=1;
4563 }
4564 if (opcode2[i]==0) { // MFC1
4565 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4566 if(tl>=0) {
4567 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
4568 emit_readword_indexed(0,tl,tl);
4569 }
4570 }
4571 else if (opcode2[i]==1) { // DMFC1
4572 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4573 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
4574 if(tl>=0) {
4575 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
4576 if(th>=0) emit_readword_indexed(4,tl,th);
4577 emit_readword_indexed(0,tl,tl);
4578 }
4579 }
4580 else if (opcode2[i]==4) { // MTC1
4581 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4582 signed char temp=get_reg(i_regs->regmap,-1);
4583 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4584 emit_writeword_indexed(sl,0,temp);
4585 }
4586 else if (opcode2[i]==5) { // DMTC1
4587 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4588 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
4589 signed char temp=get_reg(i_regs->regmap,-1);
4590 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4591 emit_writeword_indexed(sh,4,temp);
4592 emit_writeword_indexed(sl,0,temp);
4593 }
4594 else if (opcode2[i]==2) // CFC1
4595 {
4596 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4597 if(tl>=0) {
4598 u_int copr=(source[i]>>11)&0x1f;
4599 if(copr==0) emit_readword((int)&FCR0,tl);
4600 if(copr==31) emit_readword((int)&FCR31,tl);
4601 }
4602 }
4603 else if (opcode2[i]==6) // CTC1
4604 {
4605 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4606 u_int copr=(source[i]>>11)&0x1f;
4607 assert(sl>=0);
4608 if(copr==31)
4609 {
4610 emit_writeword(sl,(int)&FCR31);
4611 // Set the rounding mode
4612 //FIXME
4613 //char temp=get_reg(i_regs->regmap,-1);
4614 //emit_andimm(sl,3,temp);
4615 //emit_fldcw_indexed((int)&rounding_modes,temp);
4616 }
4617 }
3d624f89 4618#else
4619 cop1_unusable(i, i_regs);
4620#endif
57871462 4621}
4622
4623void fconv_assemble_arm(int i,struct regstat *i_regs)
4624{
3d624f89 4625#ifndef DISABLE_COP1
57871462 4626 signed char temp=get_reg(i_regs->regmap,-1);
4627 assert(temp>=0);
4628 // Check cop1 unusable
4629 if(!cop1_usable) {
4630 signed char rs=get_reg(i_regs->regmap,CSREG);
4631 assert(rs>=0);
4632 emit_testimm(rs,0x20000000);
4633 int jaddr=(int)out;
4634 emit_jeq(0);
4635 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4636 cop1_usable=1;
4637 }
4638
4639 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4640 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
4641 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4642 emit_flds(temp,15);
4643 emit_ftosizs(15,15); // float->int, truncate
4644 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4645 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4646 emit_fsts(15,temp);
4647 return;
4648 }
4649 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
4650 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4651 emit_vldr(temp,7);
4652 emit_ftosizd(7,13); // double->int, truncate
4653 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4654 emit_fsts(13,temp);
4655 return;
4656 }
4657
4658 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
4659 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4660 emit_flds(temp,13);
4661 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4662 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4663 emit_fsitos(13,15);
4664 emit_fsts(15,temp);
4665 return;
4666 }
4667 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
4668 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4669 emit_flds(temp,13);
4670 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4671 emit_fsitod(13,7);
4672 emit_vstr(7,temp);
4673 return;
4674 }
4675
4676 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
4677 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4678 emit_flds(temp,13);
4679 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4680 emit_fcvtds(13,7);
4681 emit_vstr(7,temp);
4682 return;
4683 }
4684 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
4685 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4686 emit_vldr(temp,7);
4687 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4688 emit_fcvtsd(7,13);
4689 emit_fsts(13,temp);
4690 return;
4691 }
4692 #endif
4693
4694 // C emulation code
4695
4696 u_int hr,reglist=0;
4697 for(hr=0;hr<HOST_REGS;hr++) {
4698 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4699 }
4700 save_regs(reglist);
4701
4702 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
4703 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4704 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4705 emit_call((int)cvt_s_w);
4706 }
4707 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
4708 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4709 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4710 emit_call((int)cvt_d_w);
4711 }
4712 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
4713 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4714 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4715 emit_call((int)cvt_s_l);
4716 }
4717 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
4718 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4719 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4720 emit_call((int)cvt_d_l);
4721 }
4722
4723 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
4724 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4725 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4726 emit_call((int)cvt_d_s);
4727 }
4728 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4729 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4730 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4731 emit_call((int)cvt_w_s);
4732 }
4733 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4734 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4735 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4736 emit_call((int)cvt_l_s);
4737 }
4738
4739 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4740 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4741 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4742 emit_call((int)cvt_s_d);
4743 }
4744 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4745 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4746 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4747 emit_call((int)cvt_w_d);
4748 }
4749 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4750 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4751 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4752 emit_call((int)cvt_l_d);
4753 }
4754
4755 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4756 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4757 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4758 emit_call((int)round_l_s);
4759 }
4760 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4761 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4762 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4763 emit_call((int)trunc_l_s);
4764 }
4765 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4766 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4767 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4768 emit_call((int)ceil_l_s);
4769 }
4770 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4771 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4772 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4773 emit_call((int)floor_l_s);
4774 }
4775 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4776 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4777 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4778 emit_call((int)round_w_s);
4779 }
4780 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4781 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4782 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4783 emit_call((int)trunc_w_s);
4784 }
4785 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4786 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4787 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4788 emit_call((int)ceil_w_s);
4789 }
4790 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4791 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4792 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4793 emit_call((int)floor_w_s);
4794 }
4795
4796 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4797 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4798 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4799 emit_call((int)round_l_d);
4800 }
4801 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4802 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4803 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4804 emit_call((int)trunc_l_d);
4805 }
4806 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4807 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4808 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4809 emit_call((int)ceil_l_d);
4810 }
4811 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4812 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4813 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4814 emit_call((int)floor_l_d);
4815 }
4816 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4817 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4818 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4819 emit_call((int)round_w_d);
4820 }
4821 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4822 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4823 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4824 emit_call((int)trunc_w_d);
4825 }
4826 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4827 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4828 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4829 emit_call((int)ceil_w_d);
4830 }
4831 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4832 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4833 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4834 emit_call((int)floor_w_d);
4835 }
4836
4837 restore_regs(reglist);
3d624f89 4838#else
4839 cop1_unusable(i, i_regs);
4840#endif
57871462 4841}
4842#define fconv_assemble fconv_assemble_arm
4843
4844void fcomp_assemble(int i,struct regstat *i_regs)
4845{
3d624f89 4846#ifndef DISABLE_COP1
57871462 4847 signed char fs=get_reg(i_regs->regmap,FSREG);
4848 signed char temp=get_reg(i_regs->regmap,-1);
4849 assert(temp>=0);
4850 // Check cop1 unusable
4851 if(!cop1_usable) {
4852 signed char cs=get_reg(i_regs->regmap,CSREG);
4853 assert(cs>=0);
4854 emit_testimm(cs,0x20000000);
4855 int jaddr=(int)out;
4856 emit_jeq(0);
4857 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4858 cop1_usable=1;
4859 }
4860
4861 if((source[i]&0x3f)==0x30) {
4862 emit_andimm(fs,~0x800000,fs);
4863 return;
4864 }
4865
4866 if((source[i]&0x3e)==0x38) {
4867 // sf/ngle - these should throw exceptions for NaNs
4868 emit_andimm(fs,~0x800000,fs);
4869 return;
4870 }
4871
4872 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4873 if(opcode2[i]==0x10) {
4874 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4875 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4876 emit_orimm(fs,0x800000,fs);
4877 emit_flds(temp,14);
4878 emit_flds(HOST_TEMPREG,15);
4879 emit_fcmps(14,15);
4880 emit_fmstat();
4881 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4882 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4883 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4884 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4885 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4886 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4887 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4888 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4889 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4890 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4891 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4892 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4893 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4894 return;
4895 }
4896 if(opcode2[i]==0x11) {
4897 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4898 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4899 emit_orimm(fs,0x800000,fs);
4900 emit_vldr(temp,6);
4901 emit_vldr(HOST_TEMPREG,7);
4902 emit_fcmpd(6,7);
4903 emit_fmstat();
4904 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4905 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4906 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4907 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4908 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4909 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4910 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4911 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4912 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4913 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4914 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4915 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4916 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4917 return;
4918 }
4919 #endif
4920
4921 // C only
4922
4923 u_int hr,reglist=0;
4924 for(hr=0;hr<HOST_REGS;hr++) {
4925 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4926 }
4927 reglist&=~(1<<fs);
4928 save_regs(reglist);
4929 if(opcode2[i]==0x10) {
4930 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4931 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4932 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4933 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4934 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4935 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4936 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4937 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4938 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4939 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4940 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4941 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4942 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4943 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4944 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4945 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4946 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4947 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4948 }
4949 if(opcode2[i]==0x11) {
4950 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4951 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4952 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4953 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4954 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4955 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4956 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4957 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4958 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4959 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4960 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4961 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4962 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4963 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4964 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4965 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4966 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4967 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4968 }
4969 restore_regs(reglist);
4970 emit_loadreg(FSREG,fs);
3d624f89 4971#else
4972 cop1_unusable(i, i_regs);
4973#endif
57871462 4974}
4975
4976void float_assemble(int i,struct regstat *i_regs)
4977{
3d624f89 4978#ifndef DISABLE_COP1
57871462 4979 signed char temp=get_reg(i_regs->regmap,-1);
4980 assert(temp>=0);
4981 // Check cop1 unusable
4982 if(!cop1_usable) {
4983 signed char cs=get_reg(i_regs->regmap,CSREG);
4984 assert(cs>=0);
4985 emit_testimm(cs,0x20000000);
4986 int jaddr=(int)out;
4987 emit_jeq(0);
4988 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4989 cop1_usable=1;
4990 }
4991
4992 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4993 if((source[i]&0x3f)==6) // mov
4994 {
4995 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4996 if(opcode2[i]==0x10) {
4997 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4998 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4999 emit_readword_indexed(0,temp,temp);
5000 emit_writeword_indexed(temp,0,HOST_TEMPREG);
5001 }
5002 if(opcode2[i]==0x11) {
5003 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5004 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
5005 emit_vldr(temp,7);
5006 emit_vstr(7,HOST_TEMPREG);
5007 }
5008 }
5009 return;
5010 }
5011
5012 if((source[i]&0x3f)>3)
5013 {
5014 if(opcode2[i]==0x10) {
5015 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5016 emit_flds(temp,15);
5017 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5018 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5019 }
5020 if((source[i]&0x3f)==4) // sqrt
5021 emit_fsqrts(15,15);
5022 if((source[i]&0x3f)==5) // abs
5023 emit_fabss(15,15);
5024 if((source[i]&0x3f)==7) // neg
5025 emit_fnegs(15,15);
5026 emit_fsts(15,temp);
5027 }
5028 if(opcode2[i]==0x11) {
5029 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5030 emit_vldr(temp,7);
5031 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5032 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5033 }
5034 if((source[i]&0x3f)==4) // sqrt
5035 emit_fsqrtd(7,7);
5036 if((source[i]&0x3f)==5) // abs
5037 emit_fabsd(7,7);
5038 if((source[i]&0x3f)==7) // neg
5039 emit_fnegd(7,7);
5040 emit_vstr(7,temp);
5041 }
5042 return;
5043 }
5044 if((source[i]&0x3f)<4)
5045 {
5046 if(opcode2[i]==0x10) {
5047 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5048 }
5049 if(opcode2[i]==0x11) {
5050 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5051 }
5052 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
5053 if(opcode2[i]==0x10) {
5054 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
5055 emit_flds(temp,15);
5056 emit_flds(HOST_TEMPREG,13);
5057 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5058 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5059 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5060 }
5061 }
5062 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
5063 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
5064 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
5065 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
5066 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5067 emit_fsts(15,HOST_TEMPREG);
5068 }else{
5069 emit_fsts(15,temp);
5070 }
5071 }
5072 else if(opcode2[i]==0x11) {
5073 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
5074 emit_vldr(temp,7);
5075 emit_vldr(HOST_TEMPREG,6);
5076 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5077 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5078 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5079 }
5080 }
5081 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
5082 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
5083 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
5084 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
5085 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5086 emit_vstr(7,HOST_TEMPREG);
5087 }else{
5088 emit_vstr(7,temp);
5089 }
5090 }
5091 }
5092 else {
5093 if(opcode2[i]==0x10) {
5094 emit_flds(temp,15);
5095 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5096 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5097 }
5098 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
5099 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
5100 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
5101 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
5102 emit_fsts(15,temp);
5103 }
5104 else if(opcode2[i]==0x11) {
5105 emit_vldr(temp,7);
5106 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5107 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5108 }
5109 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
5110 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
5111 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
5112 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
5113 emit_vstr(7,temp);
5114 }
5115 }
5116 return;
5117 }
5118 #endif
5119
5120 u_int hr,reglist=0;
5121 for(hr=0;hr<HOST_REGS;hr++) {
5122 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
5123 }
5124 if(opcode2[i]==0x10) { // Single precision
5125 save_regs(reglist);
5126 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
5127 if((source[i]&0x3f)<4) {
5128 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
5129 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
5130 }else{
5131 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
5132 }
5133 switch(source[i]&0x3f)
5134 {
5135 case 0x00: emit_call((int)add_s);break;
5136 case 0x01: emit_call((int)sub_s);break;
5137 case 0x02: emit_call((int)mul_s);break;
5138 case 0x03: emit_call((int)div_s);break;
5139 case 0x04: emit_call((int)sqrt_s);break;
5140 case 0x05: emit_call((int)abs_s);break;
5141 case 0x06: emit_call((int)mov_s);break;
5142 case 0x07: emit_call((int)neg_s);break;
5143 }
5144 restore_regs(reglist);
5145 }
5146 if(opcode2[i]==0x11) { // Double precision
5147 save_regs(reglist);
5148 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5149 if((source[i]&0x3f)<4) {
5150 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5151 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
5152 }else{
5153 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
5154 }
5155 switch(source[i]&0x3f)
5156 {
5157 case 0x00: emit_call((int)add_d);break;
5158 case 0x01: emit_call((int)sub_d);break;
5159 case 0x02: emit_call((int)mul_d);break;
5160 case 0x03: emit_call((int)div_d);break;
5161 case 0x04: emit_call((int)sqrt_d);break;
5162 case 0x05: emit_call((int)abs_d);break;
5163 case 0x06: emit_call((int)mov_d);break;
5164 case 0x07: emit_call((int)neg_d);break;
5165 }
5166 restore_regs(reglist);
5167 }
3d624f89 5168#else
5169 cop1_unusable(i, i_regs);
5170#endif
57871462 5171}
5172
5173void multdiv_assemble_arm(int i,struct regstat *i_regs)
5174{
5175 // case 0x18: MULT
5176 // case 0x19: MULTU
5177 // case 0x1A: DIV
5178 // case 0x1B: DIVU
5179 // case 0x1C: DMULT
5180 // case 0x1D: DMULTU
5181 // case 0x1E: DDIV
5182 // case 0x1F: DDIVU
5183 if(rs1[i]&&rs2[i])
5184 {
5185 if((opcode2[i]&4)==0) // 32-bit
5186 {
5187 if(opcode2[i]==0x18) // MULT
5188 {
5189 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5190 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5191 signed char hi=get_reg(i_regs->regmap,HIREG);
5192 signed char lo=get_reg(i_regs->regmap,LOREG);
5193 assert(m1>=0);
5194 assert(m2>=0);
5195 assert(hi>=0);
5196 assert(lo>=0);
5197 emit_smull(m1,m2,hi,lo);
5198 }
5199 if(opcode2[i]==0x19) // MULTU
5200 {
5201 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5202 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5203 signed char hi=get_reg(i_regs->regmap,HIREG);
5204 signed char lo=get_reg(i_regs->regmap,LOREG);
5205 assert(m1>=0);
5206 assert(m2>=0);
5207 assert(hi>=0);
5208 assert(lo>=0);
5209 emit_umull(m1,m2,hi,lo);
5210 }
5211 if(opcode2[i]==0x1A) // DIV
5212 {
5213 signed char d1=get_reg(i_regs->regmap,rs1[i]);
5214 signed char d2=get_reg(i_regs->regmap,rs2[i]);
5215 assert(d1>=0);
5216 assert(d2>=0);
5217 signed char quotient=get_reg(i_regs->regmap,LOREG);
5218 signed char remainder=get_reg(i_regs->regmap,HIREG);
5219 assert(quotient>=0);
5220 assert(remainder>=0);
5221 emit_movs(d1,remainder);
44a80f6a 5222 emit_movimm(0xffffffff,quotient);
5223 emit_negmi(quotient,quotient); // .. quotient and ..
5224 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 5225 emit_movs(d2,HOST_TEMPREG);
5226 emit_jeq((int)out+52); // Division by zero
5227 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
5228 emit_clz(HOST_TEMPREG,quotient);
5229 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
5230 emit_orimm(quotient,1<<31,quotient);
5231 emit_shr(quotient,quotient,quotient);
5232 emit_cmp(remainder,HOST_TEMPREG);
5233 emit_subcs(remainder,HOST_TEMPREG,remainder);
5234 emit_adcs(quotient,quotient,quotient);
5235 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
5236 emit_jcc((int)out-16); // -4
5237 emit_teq(d1,d2);
5238 emit_negmi(quotient,quotient);
5239 emit_test(d1,d1);
5240 emit_negmi(remainder,remainder);
5241 }
5242 if(opcode2[i]==0x1B) // DIVU
5243 {
5244 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
5245 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
5246 assert(d1>=0);
5247 assert(d2>=0);
5248 signed char quotient=get_reg(i_regs->regmap,LOREG);
5249 signed char remainder=get_reg(i_regs->regmap,HIREG);
5250 assert(quotient>=0);
5251 assert(remainder>=0);
44a80f6a 5252 emit_mov(d1,remainder);
5253 emit_movimm(0xffffffff,quotient); // div0 case
57871462 5254 emit_test(d2,d2);
44a80f6a 5255 emit_jeq((int)out+40); // Division by zero
57871462 5256 emit_clz(d2,HOST_TEMPREG);
5257 emit_movimm(1<<31,quotient);
5258 emit_shl(d2,HOST_TEMPREG,d2);
57871462 5259 emit_shr(quotient,HOST_TEMPREG,quotient);
5260 emit_cmp(remainder,d2);
5261 emit_subcs(remainder,d2,remainder);
5262 emit_adcs(quotient,quotient,quotient);
5263 emit_shrcc_imm(d2,1,d2);
5264 emit_jcc((int)out-16); // -4
5265 }
5266 }
5267 else // 64-bit
4600ba03 5268#ifndef FORCE32
57871462 5269 {
5270 if(opcode2[i]==0x1C) // DMULT
5271 {
5272 assert(opcode2[i]!=0x1C);
5273 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5274 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5275 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5276 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5277 assert(m1h>=0);
5278 assert(m2h>=0);
5279 assert(m1l>=0);
5280 assert(m2l>=0);
5281 emit_pushreg(m2h);
5282 emit_pushreg(m2l);
5283 emit_pushreg(m1h);
5284 emit_pushreg(m1l);
5285 emit_call((int)&mult64);
5286 emit_popreg(m1l);
5287 emit_popreg(m1h);
5288 emit_popreg(m2l);
5289 emit_popreg(m2h);
5290 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5291 signed char hil=get_reg(i_regs->regmap,HIREG);
5292 if(hih>=0) emit_loadreg(HIREG|64,hih);
5293 if(hil>=0) emit_loadreg(HIREG,hil);
5294 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5295 signed char lol=get_reg(i_regs->regmap,LOREG);
5296 if(loh>=0) emit_loadreg(LOREG|64,loh);
5297 if(lol>=0) emit_loadreg(LOREG,lol);
5298 }
5299 if(opcode2[i]==0x1D) // DMULTU
5300 {
5301 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5302 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5303 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5304 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5305 assert(m1h>=0);
5306 assert(m2h>=0);
5307 assert(m1l>=0);
5308 assert(m2l>=0);
5309 save_regs(0x100f);
5310 if(m1l!=0) emit_mov(m1l,0);
5311 if(m1h==0) emit_readword((int)&dynarec_local,1);
5312 else if(m1h>1) emit_mov(m1h,1);
5313 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
5314 else if(m2l>2) emit_mov(m2l,2);
5315 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
5316 else if(m2h>3) emit_mov(m2h,3);
5317 emit_call((int)&multu64);
5318 restore_regs(0x100f);
5319 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5320 signed char hil=get_reg(i_regs->regmap,HIREG);
5321 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5322 signed char lol=get_reg(i_regs->regmap,LOREG);
5323 /*signed char temp=get_reg(i_regs->regmap,-1);
5324 signed char rh=get_reg(i_regs->regmap,HIREG|64);
5325 signed char rl=get_reg(i_regs->regmap,HIREG);
5326 assert(m1h>=0);
5327 assert(m2h>=0);
5328 assert(m1l>=0);
5329 assert(m2l>=0);
5330 assert(temp>=0);
5331 //emit_mov(m1l,EAX);
5332 //emit_mul(m2l);
5333 emit_umull(rl,rh,m1l,m2l);
5334 emit_storereg(LOREG,rl);
5335 emit_mov(rh,temp);
5336 //emit_mov(m1h,EAX);
5337 //emit_mul(m2l);
5338 emit_umull(rl,rh,m1h,m2l);
5339 emit_adds(rl,temp,temp);
5340 emit_adcimm(rh,0,rh);
5341 emit_storereg(HIREG,rh);
5342 //emit_mov(m2h,EAX);
5343 //emit_mul(m1l);
5344 emit_umull(rl,rh,m1l,m2h);
5345 emit_adds(rl,temp,temp);
5346 emit_adcimm(rh,0,rh);
5347 emit_storereg(LOREG|64,temp);
5348 emit_mov(rh,temp);
5349 //emit_mov(m2h,EAX);
5350 //emit_mul(m1h);
5351 emit_umull(rl,rh,m1h,m2h);
5352 emit_adds(rl,temp,rl);
5353 emit_loadreg(HIREG,temp);
5354 emit_adcimm(rh,0,rh);
5355 emit_adds(rl,temp,rl);
5356 emit_adcimm(rh,0,rh);
5357 // DEBUG
5358 /*
5359 emit_pushreg(m2h);
5360 emit_pushreg(m2l);
5361 emit_pushreg(m1h);
5362 emit_pushreg(m1l);
5363 emit_call((int)&multu64);
5364 emit_popreg(m1l);
5365 emit_popreg(m1h);
5366 emit_popreg(m2l);
5367 emit_popreg(m2h);
5368 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5369 signed char hil=get_reg(i_regs->regmap,HIREG);
5370 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
5371 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
5372 */
5373 // Shouldn't be necessary
5374 //char loh=get_reg(i_regs->regmap,LOREG|64);
5375 //char lol=get_reg(i_regs->regmap,LOREG);
5376 //if(loh>=0) emit_loadreg(LOREG|64,loh);
5377 //if(lol>=0) emit_loadreg(LOREG,lol);
5378 }
5379 if(opcode2[i]==0x1E) // DDIV
5380 {
5381 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5382 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5383 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5384 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5385 assert(d1h>=0);
5386 assert(d2h>=0);
5387 assert(d1l>=0);
5388 assert(d2l>=0);
5389 save_regs(0x100f);
5390 if(d1l!=0) emit_mov(d1l,0);
5391 if(d1h==0) emit_readword((int)&dynarec_local,1);
5392 else if(d1h>1) emit_mov(d1h,1);
5393 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5394 else if(d2l>2) emit_mov(d2l,2);
5395 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5396 else if(d2h>3) emit_mov(d2h,3);
5397 emit_call((int)&div64);
5398 restore_regs(0x100f);
5399 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5400 signed char hil=get_reg(i_regs->regmap,HIREG);
5401 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5402 signed char lol=get_reg(i_regs->regmap,LOREG);
5403 if(hih>=0) emit_loadreg(HIREG|64,hih);
5404 if(hil>=0) emit_loadreg(HIREG,hil);
5405 if(loh>=0) emit_loadreg(LOREG|64,loh);
5406 if(lol>=0) emit_loadreg(LOREG,lol);
5407 }
5408 if(opcode2[i]==0x1F) // DDIVU
5409 {
5410 //u_int hr,reglist=0;
5411 //for(hr=0;hr<HOST_REGS;hr++) {
5412 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
5413 //}
5414 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5415 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5416 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5417 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5418 assert(d1h>=0);
5419 assert(d2h>=0);
5420 assert(d1l>=0);
5421 assert(d2l>=0);
5422 save_regs(0x100f);
5423 if(d1l!=0) emit_mov(d1l,0);
5424 if(d1h==0) emit_readword((int)&dynarec_local,1);
5425 else if(d1h>1) emit_mov(d1h,1);
5426 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5427 else if(d2l>2) emit_mov(d2l,2);
5428 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5429 else if(d2h>3) emit_mov(d2h,3);
5430 emit_call((int)&divu64);
5431 restore_regs(0x100f);
5432 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5433 signed char hil=get_reg(i_regs->regmap,HIREG);
5434 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5435 signed char lol=get_reg(i_regs->regmap,LOREG);
5436 if(hih>=0) emit_loadreg(HIREG|64,hih);
5437 if(hil>=0) emit_loadreg(HIREG,hil);
5438 if(loh>=0) emit_loadreg(LOREG|64,loh);
5439 if(lol>=0) emit_loadreg(LOREG,lol);
5440 }
5441 }
4600ba03 5442#else
5443 assert(0);
5444#endif
57871462 5445 }
5446 else
5447 {
5448 // Multiply by zero is zero.
5449 // MIPS does not have a divide by zero exception.
5450 // The result is undefined, we return zero.
5451 signed char hr=get_reg(i_regs->regmap,HIREG);
5452 signed char lr=get_reg(i_regs->regmap,LOREG);
5453 if(hr>=0) emit_zeroreg(hr);
5454 if(lr>=0) emit_zeroreg(lr);
5455 }
5456}
5457#define multdiv_assemble multdiv_assemble_arm
5458
5459void do_preload_rhash(int r) {
5460 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
5461 // register. On ARM the hash can be done with a single instruction (below)
5462}
5463
5464void do_preload_rhtbl(int ht) {
5465 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
5466}
5467
5468void do_rhash(int rs,int rh) {
5469 emit_andimm(rs,0xf8,rh);
5470}
5471
5472void do_miniht_load(int ht,int rh) {
5473 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
5474 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
5475}
5476
5477void do_miniht_jump(int rs,int rh,int ht) {
5478 emit_cmp(rh,rs);
5479 emit_ldreq_indexed(ht,4,15);
5480 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5481 emit_mov(rs,7);
5482 emit_jmp(jump_vaddr_reg[7]);
5483 #else
5484 emit_jmp(jump_vaddr_reg[rs]);
5485 #endif
5486}
5487
5488void do_miniht_insert(u_int return_address,int rt,int temp) {
5489 #ifdef ARMv5_ONLY
5490 emit_movimm(return_address,rt); // PC into link register
5491 add_to_linker((int)out,return_address,1);
5492 emit_pcreladdr(temp);
5493 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5494 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5495 #else
5496 emit_movw(return_address&0x0000FFFF,rt);
5497 add_to_linker((int)out,return_address,1);
5498 emit_pcreladdr(temp);
5499 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5500 emit_movt(return_address&0xFFFF0000,rt);
5501 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5502 #endif
5503}
5504
5505// Sign-extend to 64 bits and write out upper half of a register
5506// This is useful where we have a 32-bit value in a register, and want to
5507// keep it in a 32-bit register, but can't guarantee that it won't be read
5508// as a 64-bit value later.
5509void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
5510{
24385cae 5511#ifndef FORCE32
57871462 5512 if(is32_pre==is32) return;
5513 int hr,reg;
5514 for(hr=0;hr<HOST_REGS;hr++) {
5515 if(hr!=EXCLUDE_REG) {
5516 //if(pre[hr]==entry[hr]) {
5517 if((reg=pre[hr])>=0) {
5518 if((dirty>>hr)&1) {
5519 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
5520 emit_sarimm(hr,31,HOST_TEMPREG);
5521 emit_storereg(reg|64,HOST_TEMPREG);
5522 }
5523 }
5524 }
5525 //}
5526 }
5527 }
24385cae 5528#endif
57871462 5529}
5530
5531void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
5532{
5533 //if(dirty_pre==dirty) return;
5534 int hr,reg,new_hr;
5535 for(hr=0;hr<HOST_REGS;hr++) {
5536 if(hr!=EXCLUDE_REG) {
5537 reg=pre[hr];
5538 if(((~u)>>(reg&63))&1) {
f776eb14 5539 if(reg>0) {
57871462 5540 if(((dirty_pre&~dirty)>>hr)&1) {
5541 if(reg>0&&reg<34) {
5542 emit_storereg(reg,hr);
5543 if( ((is32_pre&~uu)>>reg)&1 ) {
5544 emit_sarimm(hr,31,HOST_TEMPREG);
5545 emit_storereg(reg|64,HOST_TEMPREG);
5546 }
5547 }
5548 else if(reg>=64) {
5549 emit_storereg(reg,hr);
5550 }
5551 }
5552 }
57871462 5553 }
5554 }
5555 }
5556}
5557
5558
5559/* using strd could possibly help but you'd have to allocate registers in pairs
5560void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
5561{
5562 int hr;
5563 int wrote=-1;
5564 for(hr=HOST_REGS-1;hr>=0;hr--) {
5565 if(hr!=EXCLUDE_REG) {
5566 if(pre[hr]!=entry[hr]) {
5567 if(pre[hr]>=0) {
5568 if((dirty>>hr)&1) {
5569 if(get_reg(entry,pre[hr])<0) {
5570 if(pre[hr]<64) {
5571 if(!((u>>pre[hr])&1)) {
5572 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
5573 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5574 emit_sarimm(hr,31,hr+1);
5575 emit_strdreg(pre[hr],hr);
5576 }
5577 else
5578 emit_storereg(pre[hr],hr);
5579 }else{
5580 emit_storereg(pre[hr],hr);
5581 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5582 emit_sarimm(hr,31,hr);
5583 emit_storereg(pre[hr]|64,hr);
5584 }
5585 }
5586 }
5587 }else{
5588 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
5589 emit_storereg(pre[hr],hr);
5590 }
5591 }
5592 wrote=hr;
5593 }
5594 }
5595 }
5596 }
5597 }
5598 }
5599 for(hr=0;hr<HOST_REGS;hr++) {
5600 if(hr!=EXCLUDE_REG) {
5601 if(pre[hr]!=entry[hr]) {
5602 if(pre[hr]>=0) {
5603 int nr;
5604 if((nr=get_reg(entry,pre[hr]))>=0) {
5605 emit_mov(hr,nr);
5606 }
5607 }
5608 }
5609 }
5610 }
5611}
5612#define wb_invalidate wb_invalidate_arm
5613*/
5614
dd3a91a1 5615// Clearing the cache is rather slow on ARM Linux, so mark the areas
5616// that need to be cleared, and then only clear these areas once.
5617void do_clear_cache()
5618{
5619 int i,j;
5620 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
5621 {
5622 u_int bitmap=needs_clear_cache[i];
5623 if(bitmap) {
5624 u_int start,end;
5625 for(j=0;j<32;j++)
5626 {
5627 if(bitmap&(1<<j)) {
5628 start=BASE_ADDR+i*131072+j*4096;
5629 end=start+4095;
5630 j++;
5631 while(j<32) {
5632 if(bitmap&(1<<j)) {
5633 end+=4096;
5634 j++;
5635 }else{
5636 __clear_cache((void *)start,(void *)end);
5637 break;
5638 }
5639 }
5640 }
5641 }
5642 needs_clear_cache[i]=0;
5643 }
5644 }
5645}
5646
57871462 5647// CPU-architecture-specific initialization
5648void arch_init() {
3d624f89 5649#ifndef DISABLE_COP1
57871462 5650 rounding_modes[0]=0x0<<22; // round
5651 rounding_modes[1]=0x3<<22; // trunc
5652 rounding_modes[2]=0x1<<22; // ceil
5653 rounding_modes[3]=0x2<<22; // floor
3d624f89 5654#endif
57871462 5655}
b9b61529 5656
5657// vim:shiftwidth=2:expandtab