pcnt: measure gte too
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
054175e9 22#ifdef PCSX
23#include "../gte_arm.h"
24#include "../gte_neon.h"
25#include "pcnt.h"
26#endif
27
57871462 28extern int cycle_count;
29extern int last_count;
30extern int pcaddr;
31extern int pending_exception;
32extern int branch_target;
33extern uint64_t readmem_dword;
3d624f89 34#ifdef MUPEN64
57871462 35extern precomp_instr fake_pc;
3d624f89 36#endif
57871462 37extern void *dynarec_local;
38extern u_int memory_map[1048576];
39extern u_int mini_ht[32][2];
40extern u_int rounding_modes[4];
41
42void indirect_jump_indexed();
43void indirect_jump();
44void do_interrupt();
45void jump_vaddr_r0();
46void jump_vaddr_r1();
47void jump_vaddr_r2();
48void jump_vaddr_r3();
49void jump_vaddr_r4();
50void jump_vaddr_r5();
51void jump_vaddr_r6();
52void jump_vaddr_r7();
53void jump_vaddr_r8();
54void jump_vaddr_r9();
55void jump_vaddr_r10();
56void jump_vaddr_r12();
57
58const u_int jump_vaddr_reg[16] = {
59 (int)jump_vaddr_r0,
60 (int)jump_vaddr_r1,
61 (int)jump_vaddr_r2,
62 (int)jump_vaddr_r3,
63 (int)jump_vaddr_r4,
64 (int)jump_vaddr_r5,
65 (int)jump_vaddr_r6,
66 (int)jump_vaddr_r7,
67 (int)jump_vaddr_r8,
68 (int)jump_vaddr_r9,
69 (int)jump_vaddr_r10,
70 0,
71 (int)jump_vaddr_r12,
72 0,
73 0,
74 0};
75
0bbd1454 76void invalidate_addr_r0();
77void invalidate_addr_r1();
78void invalidate_addr_r2();
79void invalidate_addr_r3();
80void invalidate_addr_r4();
81void invalidate_addr_r5();
82void invalidate_addr_r6();
83void invalidate_addr_r7();
84void invalidate_addr_r8();
85void invalidate_addr_r9();
86void invalidate_addr_r10();
87void invalidate_addr_r12();
88
89const u_int invalidate_addr_reg[16] = {
90 (int)invalidate_addr_r0,
91 (int)invalidate_addr_r1,
92 (int)invalidate_addr_r2,
93 (int)invalidate_addr_r3,
94 (int)invalidate_addr_r4,
95 (int)invalidate_addr_r5,
96 (int)invalidate_addr_r6,
97 (int)invalidate_addr_r7,
98 (int)invalidate_addr_r8,
99 (int)invalidate_addr_r9,
100 (int)invalidate_addr_r10,
101 0,
102 (int)invalidate_addr_r12,
103 0,
104 0,
105 0};
106
57871462 107#include "fpu.h"
108
dd3a91a1 109unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
110
57871462 111/* Linker */
112
113void set_jump_target(int addr,u_int target)
114{
115 u_char *ptr=(u_char *)addr;
116 u_int *ptr2=(u_int *)ptr;
117 if(ptr[3]==0xe2) {
118 assert((target-(u_int)ptr2-8)<1024);
119 assert((addr&3)==0);
120 assert((target&3)==0);
121 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
122 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
123 }
124 else if(ptr[3]==0x72) {
125 // generated by emit_jno_unlikely
126 if((target-(u_int)ptr2-8)<1024) {
127 assert((addr&3)==0);
128 assert((target&3)==0);
129 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
130 }
131 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
132 assert((addr&3)==0);
133 assert((target&3)==0);
134 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
135 }
136 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
137 }
138 else {
139 assert((ptr[3]&0x0e)==0xa);
140 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
141 }
142}
143
144// This optionally copies the instruction from the target of the branch into
145// the space before the branch. Works, but the difference in speed is
146// usually insignificant.
147void set_jump_target_fillslot(int addr,u_int target,int copy)
148{
149 u_char *ptr=(u_char *)addr;
150 u_int *ptr2=(u_int *)ptr;
151 assert(!copy||ptr2[-1]==0xe28dd000);
152 if(ptr[3]==0xe2) {
153 assert(!copy);
154 assert((target-(u_int)ptr2-8)<4096);
155 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
156 }
157 else {
158 assert((ptr[3]&0x0e)==0xa);
159 u_int target_insn=*(u_int *)target;
160 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
161 copy=0;
162 }
163 if((target_insn&0x0c100000)==0x04100000) { // Load
164 copy=0;
165 }
166 if(target_insn&0x08000000) {
167 copy=0;
168 }
169 if(copy) {
170 ptr2[-1]=target_insn;
171 target+=4;
172 }
173 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
174 }
175}
176
177/* Literal pool */
178add_literal(int addr,int val)
179{
15776b68 180 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 181 literals[literalcount][0]=addr;
182 literals[literalcount][1]=val;
183 literalcount++;
184}
185
f76eeef9 186void *kill_pointer(void *stub)
57871462 187{
188 int *ptr=(int *)(stub+4);
189 assert((*ptr&0x0ff00000)==0x05900000);
190 u_int offset=*ptr&0xfff;
191 int **l_ptr=(void *)ptr+offset+8;
192 int *i_ptr=*l_ptr;
193 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 194 return i_ptr;
57871462 195}
196
f968d35d 197// find where external branch is liked to using addr of it's stub:
198// get address that insn one after stub loads (dyna_linker arg1),
199// treat it as a pointer to branch insn,
200// return addr where that branch jumps to
57871462 201int get_pointer(void *stub)
202{
203 //printf("get_pointer(%x)\n",(int)stub);
204 int *ptr=(int *)(stub+4);
f968d35d 205 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 206 u_int offset=*ptr&0xfff;
207 int **l_ptr=(void *)ptr+offset+8;
208 int *i_ptr=*l_ptr;
209 assert((*i_ptr&0x0f000000)==0x0a000000);
210 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
211}
212
213// Find the "clean" entry point from a "dirty" entry point
214// by skipping past the call to verify_code
215u_int get_clean_addr(int addr)
216{
217 int *ptr=(int *)addr;
218 #ifdef ARMv5_ONLY
219 ptr+=4;
220 #else
221 ptr+=6;
222 #endif
223 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
224 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
225 ptr++;
226 if((*ptr&0xFF000000)==0xea000000) {
227 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
228 }
229 return (u_int)ptr;
230}
231
232int verify_dirty(int addr)
233{
234 u_int *ptr=(u_int *)addr;
235 #ifdef ARMv5_ONLY
236 // get from literal pool
15776b68 237 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 238 u_int offset=*ptr&0xfff;
239 u_int *l_ptr=(void *)ptr+offset+8;
240 u_int source=l_ptr[0];
241 u_int copy=l_ptr[1];
242 u_int len=l_ptr[2];
243 ptr+=4;
244 #else
245 // ARMv7 movw/movt
246 assert((*ptr&0xFFF00000)==0xe3000000);
247 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
248 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
249 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
250 ptr+=6;
251 #endif
252 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
253 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 254#ifndef DISABLE_TLB
cfcba99a 255 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 256 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
257 unsigned int page=source>>12;
258 unsigned int map_value=memory_map[page];
259 if(map_value>=0x80000000) return 0;
260 while(page<((source+len-1)>>12)) {
261 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
262 }
263 source = source+(map_value<<2);
264 }
63cb0298 265#endif
57871462 266 //printf("verify_dirty: %x %x %x\n",source,copy,len);
267 return !memcmp((void *)source,(void *)copy,len);
268}
269
270// This doesn't necessarily find all clean entry points, just
271// guarantees that it's not dirty
272int isclean(int addr)
273{
274 #ifdef ARMv5_ONLY
275 int *ptr=((u_int *)addr)+4;
276 #else
277 int *ptr=((u_int *)addr)+6;
278 #endif
279 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
280 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
281 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
282 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
283 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
284 return 1;
285}
286
287void get_bounds(int addr,u_int *start,u_int *end)
288{
289 u_int *ptr=(u_int *)addr;
290 #ifdef ARMv5_ONLY
291 // get from literal pool
15776b68 292 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 293 u_int offset=*ptr&0xfff;
294 u_int *l_ptr=(void *)ptr+offset+8;
295 u_int source=l_ptr[0];
296 //u_int copy=l_ptr[1];
297 u_int len=l_ptr[2];
298 ptr+=4;
299 #else
300 // ARMv7 movw/movt
301 assert((*ptr&0xFFF00000)==0xe3000000);
302 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
303 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
304 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
305 ptr+=6;
306 #endif
307 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
308 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 309#ifndef DISABLE_TLB
cfcba99a 310 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 311 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
312 if(memory_map[source>>12]>=0x80000000) source = 0;
313 else source = source+(memory_map[source>>12]<<2);
314 }
63cb0298 315#endif
57871462 316 *start=source;
317 *end=source+len;
318}
319
320/* Register allocation */
321
322// Note: registers are allocated clean (unmodified state)
323// if you intend to modify the register, you must call dirty_reg().
324void alloc_reg(struct regstat *cur,int i,signed char reg)
325{
326 int r,hr;
327 int preferred_reg = (reg&7);
328 if(reg==CCREG) preferred_reg=HOST_CCREG;
329 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
330
331 // Don't allocate unused registers
332 if((cur->u>>reg)&1) return;
333
334 // see if it's already allocated
335 for(hr=0;hr<HOST_REGS;hr++)
336 {
337 if(cur->regmap[hr]==reg) return;
338 }
339
340 // Keep the same mapping if the register was already allocated in a loop
341 preferred_reg = loop_reg(i,reg,preferred_reg);
342
343 // Try to allocate the preferred register
344 if(cur->regmap[preferred_reg]==-1) {
345 cur->regmap[preferred_reg]=reg;
346 cur->dirty&=~(1<<preferred_reg);
347 cur->isconst&=~(1<<preferred_reg);
348 return;
349 }
350 r=cur->regmap[preferred_reg];
351 if(r<64&&((cur->u>>r)&1)) {
352 cur->regmap[preferred_reg]=reg;
353 cur->dirty&=~(1<<preferred_reg);
354 cur->isconst&=~(1<<preferred_reg);
355 return;
356 }
357 if(r>=64&&((cur->uu>>(r&63))&1)) {
358 cur->regmap[preferred_reg]=reg;
359 cur->dirty&=~(1<<preferred_reg);
360 cur->isconst&=~(1<<preferred_reg);
361 return;
362 }
363
364 // Clear any unneeded registers
365 // We try to keep the mapping consistent, if possible, because it
366 // makes branches easier (especially loops). So we try to allocate
367 // first (see above) before removing old mappings. If this is not
368 // possible then go ahead and clear out the registers that are no
369 // longer needed.
370 for(hr=0;hr<HOST_REGS;hr++)
371 {
372 r=cur->regmap[hr];
373 if(r>=0) {
374 if(r<64) {
375 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
376 }
377 else
378 {
379 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
380 }
381 }
382 }
383 // Try to allocate any available register, but prefer
384 // registers that have not been used recently.
385 if(i>0) {
386 for(hr=0;hr<HOST_REGS;hr++) {
387 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
388 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
389 cur->regmap[hr]=reg;
390 cur->dirty&=~(1<<hr);
391 cur->isconst&=~(1<<hr);
392 return;
393 }
394 }
395 }
396 }
397 // Try to allocate any available register
398 for(hr=0;hr<HOST_REGS;hr++) {
399 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
400 cur->regmap[hr]=reg;
401 cur->dirty&=~(1<<hr);
402 cur->isconst&=~(1<<hr);
403 return;
404 }
405 }
406
407 // Ok, now we have to evict someone
408 // Pick a register we hopefully won't need soon
409 u_char hsn[MAXREG+1];
410 memset(hsn,10,sizeof(hsn));
411 int j;
412 lsn(hsn,i,&preferred_reg);
413 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
414 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
415 if(i>0) {
416 // Don't evict the cycle count at entry points, otherwise the entry
417 // stub will have to write it.
418 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
419 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
420 for(j=10;j>=3;j--)
421 {
422 // Alloc preferred register if available
423 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
424 for(hr=0;hr<HOST_REGS;hr++) {
425 // Evict both parts of a 64-bit register
426 if((cur->regmap[hr]&63)==r) {
427 cur->regmap[hr]=-1;
428 cur->dirty&=~(1<<hr);
429 cur->isconst&=~(1<<hr);
430 }
431 }
432 cur->regmap[preferred_reg]=reg;
433 return;
434 }
435 for(r=1;r<=MAXREG;r++)
436 {
437 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
438 for(hr=0;hr<HOST_REGS;hr++) {
439 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
440 if(cur->regmap[hr]==r+64) {
441 cur->regmap[hr]=reg;
442 cur->dirty&=~(1<<hr);
443 cur->isconst&=~(1<<hr);
444 return;
445 }
446 }
447 }
448 for(hr=0;hr<HOST_REGS;hr++) {
449 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
450 if(cur->regmap[hr]==r) {
451 cur->regmap[hr]=reg;
452 cur->dirty&=~(1<<hr);
453 cur->isconst&=~(1<<hr);
454 return;
455 }
456 }
457 }
458 }
459 }
460 }
461 }
462 for(j=10;j>=0;j--)
463 {
464 for(r=1;r<=MAXREG;r++)
465 {
466 if(hsn[r]==j) {
467 for(hr=0;hr<HOST_REGS;hr++) {
468 if(cur->regmap[hr]==r+64) {
469 cur->regmap[hr]=reg;
470 cur->dirty&=~(1<<hr);
471 cur->isconst&=~(1<<hr);
472 return;
473 }
474 }
475 for(hr=0;hr<HOST_REGS;hr++) {
476 if(cur->regmap[hr]==r) {
477 cur->regmap[hr]=reg;
478 cur->dirty&=~(1<<hr);
479 cur->isconst&=~(1<<hr);
480 return;
481 }
482 }
483 }
484 }
485 }
486 printf("This shouldn't happen (alloc_reg)");exit(1);
487}
488
489void alloc_reg64(struct regstat *cur,int i,signed char reg)
490{
491 int preferred_reg = 8+(reg&1);
492 int r,hr;
493
494 // allocate the lower 32 bits
495 alloc_reg(cur,i,reg);
496
497 // Don't allocate unused registers
498 if((cur->uu>>reg)&1) return;
499
500 // see if the upper half is already allocated
501 for(hr=0;hr<HOST_REGS;hr++)
502 {
503 if(cur->regmap[hr]==reg+64) return;
504 }
505
506 // Keep the same mapping if the register was already allocated in a loop
507 preferred_reg = loop_reg(i,reg,preferred_reg);
508
509 // Try to allocate the preferred register
510 if(cur->regmap[preferred_reg]==-1) {
511 cur->regmap[preferred_reg]=reg|64;
512 cur->dirty&=~(1<<preferred_reg);
513 cur->isconst&=~(1<<preferred_reg);
514 return;
515 }
516 r=cur->regmap[preferred_reg];
517 if(r<64&&((cur->u>>r)&1)) {
518 cur->regmap[preferred_reg]=reg|64;
519 cur->dirty&=~(1<<preferred_reg);
520 cur->isconst&=~(1<<preferred_reg);
521 return;
522 }
523 if(r>=64&&((cur->uu>>(r&63))&1)) {
524 cur->regmap[preferred_reg]=reg|64;
525 cur->dirty&=~(1<<preferred_reg);
526 cur->isconst&=~(1<<preferred_reg);
527 return;
528 }
529
530 // Clear any unneeded registers
531 // We try to keep the mapping consistent, if possible, because it
532 // makes branches easier (especially loops). So we try to allocate
533 // first (see above) before removing old mappings. If this is not
534 // possible then go ahead and clear out the registers that are no
535 // longer needed.
536 for(hr=HOST_REGS-1;hr>=0;hr--)
537 {
538 r=cur->regmap[hr];
539 if(r>=0) {
540 if(r<64) {
541 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
542 }
543 else
544 {
545 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
546 }
547 }
548 }
549 // Try to allocate any available register, but prefer
550 // registers that have not been used recently.
551 if(i>0) {
552 for(hr=0;hr<HOST_REGS;hr++) {
553 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
554 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
555 cur->regmap[hr]=reg|64;
556 cur->dirty&=~(1<<hr);
557 cur->isconst&=~(1<<hr);
558 return;
559 }
560 }
561 }
562 }
563 // Try to allocate any available register
564 for(hr=0;hr<HOST_REGS;hr++) {
565 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
566 cur->regmap[hr]=reg|64;
567 cur->dirty&=~(1<<hr);
568 cur->isconst&=~(1<<hr);
569 return;
570 }
571 }
572
573 // Ok, now we have to evict someone
574 // Pick a register we hopefully won't need soon
575 u_char hsn[MAXREG+1];
576 memset(hsn,10,sizeof(hsn));
577 int j;
578 lsn(hsn,i,&preferred_reg);
579 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
580 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
581 if(i>0) {
582 // Don't evict the cycle count at entry points, otherwise the entry
583 // stub will have to write it.
584 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
585 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
586 for(j=10;j>=3;j--)
587 {
588 // Alloc preferred register if available
589 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
590 for(hr=0;hr<HOST_REGS;hr++) {
591 // Evict both parts of a 64-bit register
592 if((cur->regmap[hr]&63)==r) {
593 cur->regmap[hr]=-1;
594 cur->dirty&=~(1<<hr);
595 cur->isconst&=~(1<<hr);
596 }
597 }
598 cur->regmap[preferred_reg]=reg|64;
599 return;
600 }
601 for(r=1;r<=MAXREG;r++)
602 {
603 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
604 for(hr=0;hr<HOST_REGS;hr++) {
605 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
606 if(cur->regmap[hr]==r+64) {
607 cur->regmap[hr]=reg|64;
608 cur->dirty&=~(1<<hr);
609 cur->isconst&=~(1<<hr);
610 return;
611 }
612 }
613 }
614 for(hr=0;hr<HOST_REGS;hr++) {
615 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
616 if(cur->regmap[hr]==r) {
617 cur->regmap[hr]=reg|64;
618 cur->dirty&=~(1<<hr);
619 cur->isconst&=~(1<<hr);
620 return;
621 }
622 }
623 }
624 }
625 }
626 }
627 }
628 for(j=10;j>=0;j--)
629 {
630 for(r=1;r<=MAXREG;r++)
631 {
632 if(hsn[r]==j) {
633 for(hr=0;hr<HOST_REGS;hr++) {
634 if(cur->regmap[hr]==r+64) {
635 cur->regmap[hr]=reg|64;
636 cur->dirty&=~(1<<hr);
637 cur->isconst&=~(1<<hr);
638 return;
639 }
640 }
641 for(hr=0;hr<HOST_REGS;hr++) {
642 if(cur->regmap[hr]==r) {
643 cur->regmap[hr]=reg|64;
644 cur->dirty&=~(1<<hr);
645 cur->isconst&=~(1<<hr);
646 return;
647 }
648 }
649 }
650 }
651 }
652 printf("This shouldn't happen");exit(1);
653}
654
655// Allocate a temporary register. This is done without regard to
656// dirty status or whether the register we request is on the unneeded list
657// Note: This will only allocate one register, even if called multiple times
658void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
659{
660 int r,hr;
661 int preferred_reg = -1;
662
663 // see if it's already allocated
664 for(hr=0;hr<HOST_REGS;hr++)
665 {
666 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
667 }
668
669 // Try to allocate any available register
670 for(hr=HOST_REGS-1;hr>=0;hr--) {
671 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
672 cur->regmap[hr]=reg;
673 cur->dirty&=~(1<<hr);
674 cur->isconst&=~(1<<hr);
675 return;
676 }
677 }
678
679 // Find an unneeded register
680 for(hr=HOST_REGS-1;hr>=0;hr--)
681 {
682 r=cur->regmap[hr];
683 if(r>=0) {
684 if(r<64) {
685 if((cur->u>>r)&1) {
686 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
687 cur->regmap[hr]=reg;
688 cur->dirty&=~(1<<hr);
689 cur->isconst&=~(1<<hr);
690 return;
691 }
692 }
693 }
694 else
695 {
696 if((cur->uu>>(r&63))&1) {
697 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
698 cur->regmap[hr]=reg;
699 cur->dirty&=~(1<<hr);
700 cur->isconst&=~(1<<hr);
701 return;
702 }
703 }
704 }
705 }
706 }
707
708 // Ok, now we have to evict someone
709 // Pick a register we hopefully won't need soon
710 // TODO: we might want to follow unconditional jumps here
711 // TODO: get rid of dupe code and make this into a function
712 u_char hsn[MAXREG+1];
713 memset(hsn,10,sizeof(hsn));
714 int j;
715 lsn(hsn,i,&preferred_reg);
716 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
717 if(i>0) {
718 // Don't evict the cycle count at entry points, otherwise the entry
719 // stub will have to write it.
720 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
721 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
722 for(j=10;j>=3;j--)
723 {
724 for(r=1;r<=MAXREG;r++)
725 {
726 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
727 for(hr=0;hr<HOST_REGS;hr++) {
728 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
729 if(cur->regmap[hr]==r+64) {
730 cur->regmap[hr]=reg;
731 cur->dirty&=~(1<<hr);
732 cur->isconst&=~(1<<hr);
733 return;
734 }
735 }
736 }
737 for(hr=0;hr<HOST_REGS;hr++) {
738 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
739 if(cur->regmap[hr]==r) {
740 cur->regmap[hr]=reg;
741 cur->dirty&=~(1<<hr);
742 cur->isconst&=~(1<<hr);
743 return;
744 }
745 }
746 }
747 }
748 }
749 }
750 }
751 for(j=10;j>=0;j--)
752 {
753 for(r=1;r<=MAXREG;r++)
754 {
755 if(hsn[r]==j) {
756 for(hr=0;hr<HOST_REGS;hr++) {
757 if(cur->regmap[hr]==r+64) {
758 cur->regmap[hr]=reg;
759 cur->dirty&=~(1<<hr);
760 cur->isconst&=~(1<<hr);
761 return;
762 }
763 }
764 for(hr=0;hr<HOST_REGS;hr++) {
765 if(cur->regmap[hr]==r) {
766 cur->regmap[hr]=reg;
767 cur->dirty&=~(1<<hr);
768 cur->isconst&=~(1<<hr);
769 return;
770 }
771 }
772 }
773 }
774 }
775 printf("This shouldn't happen");exit(1);
776}
777// Allocate a specific ARM register.
778void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
779{
780 int n;
f776eb14 781 int dirty=0;
57871462 782
783 // see if it's already allocated (and dealloc it)
784 for(n=0;n<HOST_REGS;n++)
785 {
f776eb14 786 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
787 dirty=(cur->dirty>>n)&1;
788 cur->regmap[n]=-1;
789 }
57871462 790 }
791
792 cur->regmap[hr]=reg;
793 cur->dirty&=~(1<<hr);
f776eb14 794 cur->dirty|=dirty<<hr;
57871462 795 cur->isconst&=~(1<<hr);
796}
797
798// Alloc cycle count into dedicated register
799alloc_cc(struct regstat *cur,int i)
800{
801 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
802}
803
804/* Special alloc */
805
806
807/* Assembler */
808
809char regname[16][4] = {
810 "r0",
811 "r1",
812 "r2",
813 "r3",
814 "r4",
815 "r5",
816 "r6",
817 "r7",
818 "r8",
819 "r9",
820 "r10",
821 "fp",
822 "r12",
823 "sp",
824 "lr",
825 "pc"};
826
827void output_byte(u_char byte)
828{
829 *(out++)=byte;
830}
831void output_modrm(u_char mod,u_char rm,u_char ext)
832{
833 assert(mod<4);
834 assert(rm<8);
835 assert(ext<8);
836 u_char byte=(mod<<6)|(ext<<3)|rm;
837 *(out++)=byte;
838}
839void output_sib(u_char scale,u_char index,u_char base)
840{
841 assert(scale<4);
842 assert(index<8);
843 assert(base<8);
844 u_char byte=(scale<<6)|(index<<3)|base;
845 *(out++)=byte;
846}
847void output_w32(u_int word)
848{
849 *((u_int *)out)=word;
850 out+=4;
851}
852u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
853{
854 assert(rd<16);
855 assert(rn<16);
856 assert(rm<16);
857 return((rn<<16)|(rd<<12)|rm);
858}
859u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
860{
861 assert(rd<16);
862 assert(rn<16);
863 assert(imm<256);
864 assert((shift&1)==0);
865 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
866}
867u_int genimm(u_int imm,u_int *encoded)
868{
c2e3bd42 869 *encoded=0;
870 if(imm==0) return 1;
57871462 871 int i=32;
872 while(i>0)
873 {
874 if(imm<256) {
875 *encoded=((i&30)<<7)|imm;
876 return 1;
877 }
878 imm=(imm>>2)|(imm<<30);i-=2;
879 }
880 return 0;
881}
cfbd3c6e 882void genimm_checked(u_int imm,u_int *encoded)
883{
884 u_int ret=genimm(imm,encoded);
885 assert(ret);
886}
57871462 887u_int genjmp(u_int addr)
888{
889 int offset=addr-(int)out-8;
e80343e2 890 if(offset<-33554432||offset>=33554432) {
891 if (addr>2) {
892 printf("genjmp: out of range: %08x\n", offset);
893 exit(1);
894 }
895 return 0;
896 }
57871462 897 return ((u_int)offset>>2)&0xffffff;
898}
899
900void emit_mov(int rs,int rt)
901{
902 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
903 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
904}
905
906void emit_movs(int rs,int rt)
907{
908 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
909 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
910}
911
912void emit_add(int rs1,int rs2,int rt)
913{
914 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
915 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
916}
917
918void emit_adds(int rs1,int rs2,int rt)
919{
920 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
921 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
922}
923
924void emit_adcs(int rs1,int rs2,int rt)
925{
926 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
927 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
928}
929
930void emit_sbc(int rs1,int rs2,int rt)
931{
932 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
933 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
934}
935
936void emit_sbcs(int rs1,int rs2,int rt)
937{
938 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
939 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
940}
941
942void emit_neg(int rs, int rt)
943{
944 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
945 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
946}
947
948void emit_negs(int rs, int rt)
949{
950 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
951 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
952}
953
954void emit_sub(int rs1,int rs2,int rt)
955{
956 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
957 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
958}
959
960void emit_subs(int rs1,int rs2,int rt)
961{
962 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
963 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
964}
965
966void emit_zeroreg(int rt)
967{
968 assem_debug("mov %s,#0\n",regname[rt]);
969 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
970}
971
790ee18e 972void emit_loadlp(u_int imm,u_int rt)
973{
974 add_literal((int)out,imm);
975 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
976 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
977}
978void emit_movw(u_int imm,u_int rt)
979{
980 assert(imm<65536);
981 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
982 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
983}
984void emit_movt(u_int imm,u_int rt)
985{
986 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
987 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
988}
989void emit_movimm(u_int imm,u_int rt)
990{
991 u_int armval;
992 if(genimm(imm,&armval)) {
993 assem_debug("mov %s,#%d\n",regname[rt],imm);
994 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
995 }else if(genimm(~imm,&armval)) {
996 assem_debug("mvn %s,#%d\n",regname[rt],imm);
997 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
998 }else if(imm<65536) {
999 #ifdef ARMv5_ONLY
1000 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1001 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1002 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1003 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1004 #else
1005 emit_movw(imm,rt);
1006 #endif
1007 }else{
1008 #ifdef ARMv5_ONLY
1009 emit_loadlp(imm,rt);
1010 #else
1011 emit_movw(imm&0x0000FFFF,rt);
1012 emit_movt(imm&0xFFFF0000,rt);
1013 #endif
1014 }
1015}
1016void emit_pcreladdr(u_int rt)
1017{
1018 assem_debug("add %s,pc,#?\n",regname[rt]);
1019 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1020}
1021
57871462 1022void emit_loadreg(int r, int hr)
1023{
3d624f89 1024#ifdef FORCE32
1025 if(r&64) {
1026 printf("64bit load in 32bit mode!\n");
7f2607ea 1027 assert(0);
1028 return;
3d624f89 1029 }
1030#endif
57871462 1031 if((r&63)==0)
1032 emit_zeroreg(hr);
1033 else {
3d624f89 1034 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1035 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1036 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1037 if(r==CCREG) addr=(int)&cycle_count;
1038 if(r==CSREG) addr=(int)&Status;
1039 if(r==FSREG) addr=(int)&FCR31;
1040 if(r==INVCP) addr=(int)&invc_ptr;
1041 u_int offset = addr-(u_int)&dynarec_local;
1042 assert(offset<4096);
1043 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1044 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1045 }
1046}
1047void emit_storereg(int r, int hr)
1048{
3d624f89 1049#ifdef FORCE32
1050 if(r&64) {
1051 printf("64bit store in 32bit mode!\n");
7f2607ea 1052 assert(0);
1053 return;
3d624f89 1054 }
1055#endif
1056 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1057 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1058 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1059 if(r==CCREG) addr=(int)&cycle_count;
1060 if(r==FSREG) addr=(int)&FCR31;
1061 u_int offset = addr-(u_int)&dynarec_local;
1062 assert(offset<4096);
1063 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1064 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1065}
1066
1067void emit_test(int rs, int rt)
1068{
1069 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1070 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1071}
1072
1073void emit_testimm(int rs,int imm)
1074{
1075 u_int armval;
5a05d80c 1076 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1077 genimm_checked(imm,&armval);
57871462 1078 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1079}
1080
b9b61529 1081void emit_testeqimm(int rs,int imm)
1082{
1083 u_int armval;
1084 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1085 genimm_checked(imm,&armval);
b9b61529 1086 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1087}
1088
57871462 1089void emit_not(int rs,int rt)
1090{
1091 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1092 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1093}
1094
b9b61529 1095void emit_mvnmi(int rs,int rt)
1096{
1097 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1098 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1099}
1100
57871462 1101void emit_and(u_int rs1,u_int rs2,u_int rt)
1102{
1103 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1104 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1105}
1106
1107void emit_or(u_int rs1,u_int rs2,u_int rt)
1108{
1109 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1110 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1111}
1112void emit_or_and_set_flags(int rs1,int rs2,int rt)
1113{
1114 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1115 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1116}
1117
f70d384d 1118void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1119{
1120 assert(rs<16);
1121 assert(rt<16);
1122 assert(imm<32);
1123 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1124 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1125}
1126
576bbd8f 1127void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1128{
1129 assert(rs<16);
1130 assert(rt<16);
1131 assert(imm<32);
1132 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1133 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1134}
1135
57871462 1136void emit_xor(u_int rs1,u_int rs2,u_int rt)
1137{
1138 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1139 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1140}
1141
57871462 1142void emit_addimm(u_int rs,int imm,u_int rt)
1143{
1144 assert(rs<16);
1145 assert(rt<16);
1146 if(imm!=0) {
57871462 1147 u_int armval;
1148 if(genimm(imm,&armval)) {
1149 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1150 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1151 }else if(genimm(-imm,&armval)) {
1152 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1153 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1154 }else if(imm<0) {
ffb0b9e0 1155 assert(imm>-65536);
57871462 1156 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1157 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1158 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1159 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1160 }else{
ffb0b9e0 1161 assert(imm<65536);
57871462 1162 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1163 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1164 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1165 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1166 }
1167 }
1168 else if(rs!=rt) emit_mov(rs,rt);
1169}
1170
1171void emit_addimm_and_set_flags(int imm,int rt)
1172{
1173 assert(imm>-65536&&imm<65536);
1174 u_int armval;
1175 if(genimm(imm,&armval)) {
1176 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1177 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1178 }else if(genimm(-imm,&armval)) {
1179 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1180 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1181 }else if(imm<0) {
1182 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1183 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1184 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1185 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1186 }else{
1187 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1188 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1189 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1190 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1191 }
1192}
1193void emit_addimm_no_flags(u_int imm,u_int rt)
1194{
1195 emit_addimm(rt,imm,rt);
1196}
1197
1198void emit_addnop(u_int r)
1199{
1200 assert(r<16);
1201 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1202 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1203}
1204
1205void emit_adcimm(u_int rs,int imm,u_int rt)
1206{
1207 u_int armval;
cfbd3c6e 1208 genimm_checked(imm,&armval);
57871462 1209 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1210 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1211}
1212/*void emit_sbcimm(int imm,u_int rt)
1213{
1214 u_int armval;
cfbd3c6e 1215 genimm_checked(imm,&armval);
57871462 1216 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1217 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1218}*/
1219void emit_sbbimm(int imm,u_int rt)
1220{
1221 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1222 assert(rt<8);
1223 if(imm<128&&imm>=-128) {
1224 output_byte(0x83);
1225 output_modrm(3,rt,3);
1226 output_byte(imm);
1227 }
1228 else
1229 {
1230 output_byte(0x81);
1231 output_modrm(3,rt,3);
1232 output_w32(imm);
1233 }
1234}
1235void emit_rscimm(int rs,int imm,u_int rt)
1236{
1237 assert(0);
1238 u_int armval;
cfbd3c6e 1239 genimm_checked(imm,&armval);
57871462 1240 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1241 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1242}
1243
1244void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1245{
1246 // TODO: if(genimm(imm,&armval)) ...
1247 // else
1248 emit_movimm(imm,HOST_TEMPREG);
1249 emit_adds(HOST_TEMPREG,rsl,rtl);
1250 emit_adcimm(rsh,0,rth);
1251}
1252
1253void emit_sbb(int rs1,int rs2)
1254{
1255 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1256 output_byte(0x19);
1257 output_modrm(3,rs1,rs2);
1258}
1259
1260void emit_andimm(int rs,int imm,int rt)
1261{
1262 u_int armval;
790ee18e 1263 if(imm==0) {
1264 emit_zeroreg(rt);
1265 }else if(genimm(imm,&armval)) {
57871462 1266 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1267 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1268 }else if(genimm(~imm,&armval)) {
1269 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1270 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1271 }else if(imm==65535) {
1272 #ifdef ARMv5_ONLY
1273 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1274 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1275 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1276 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1277 #else
1278 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1279 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1280 #endif
1281 }else{
1282 assert(imm>0&&imm<65535);
1283 #ifdef ARMv5_ONLY
1284 assem_debug("mov r14,#%d\n",imm&0xFF00);
1285 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1286 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1287 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1288 #else
1289 emit_movw(imm,HOST_TEMPREG);
1290 #endif
1291 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1292 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1293 }
1294}
1295
1296void emit_orimm(int rs,int imm,int rt)
1297{
1298 u_int armval;
790ee18e 1299 if(imm==0) {
1300 if(rs!=rt) emit_mov(rs,rt);
1301 }else if(genimm(imm,&armval)) {
57871462 1302 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1303 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1304 }else{
1305 assert(imm>0&&imm<65536);
1306 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1307 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1308 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1309 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1310 }
1311}
1312
1313void emit_xorimm(int rs,int imm,int rt)
1314{
57871462 1315 u_int armval;
790ee18e 1316 if(imm==0) {
1317 if(rs!=rt) emit_mov(rs,rt);
1318 }else if(genimm(imm,&armval)) {
57871462 1319 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1320 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1321 }else{
514ed0d9 1322 assert(imm>0&&imm<65536);
57871462 1323 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1324 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1325 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1326 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1327 }
1328}
1329
1330void emit_shlimm(int rs,u_int imm,int rt)
1331{
1332 assert(imm>0);
1333 assert(imm<32);
1334 //if(imm==1) ...
1335 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1336 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1337}
1338
c6c3b1b3 1339void emit_lsls_imm(int rs,int imm,int rt)
1340{
1341 assert(imm>0);
1342 assert(imm<32);
1343 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1344 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1345}
1346
57871462 1347void emit_shrimm(int rs,u_int imm,int rt)
1348{
1349 assert(imm>0);
1350 assert(imm<32);
1351 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1352 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1353}
1354
1355void emit_sarimm(int rs,u_int imm,int rt)
1356{
1357 assert(imm>0);
1358 assert(imm<32);
1359 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1360 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1361}
1362
1363void emit_rorimm(int rs,u_int imm,int rt)
1364{
1365 assert(imm>0);
1366 assert(imm<32);
1367 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1368 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1369}
1370
1371void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1372{
1373 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1374 assert(imm>0);
1375 assert(imm<32);
1376 //if(imm==1) ...
1377 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1378 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1379 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1380 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1381}
1382
1383void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1384{
1385 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1386 assert(imm>0);
1387 assert(imm<32);
1388 //if(imm==1) ...
1389 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1390 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1391 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1392 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1393}
1394
b9b61529 1395void emit_signextend16(int rs,int rt)
1396{
1397 #ifdef ARMv5_ONLY
1398 emit_shlimm(rs,16,rt);
1399 emit_sarimm(rt,16,rt);
1400 #else
1401 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1402 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1403 #endif
1404}
1405
c6c3b1b3 1406void emit_signextend8(int rs,int rt)
1407{
1408 #ifdef ARMv5_ONLY
1409 emit_shlimm(rs,24,rt);
1410 emit_sarimm(rt,24,rt);
1411 #else
1412 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1413 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1414 #endif
1415}
1416
57871462 1417void emit_shl(u_int rs,u_int shift,u_int rt)
1418{
1419 assert(rs<16);
1420 assert(rt<16);
1421 assert(shift<16);
1422 //if(imm==1) ...
1423 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1424 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1425}
1426void emit_shr(u_int rs,u_int shift,u_int rt)
1427{
1428 assert(rs<16);
1429 assert(rt<16);
1430 assert(shift<16);
1431 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1432 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1433}
1434void emit_sar(u_int rs,u_int shift,u_int rt)
1435{
1436 assert(rs<16);
1437 assert(rt<16);
1438 assert(shift<16);
1439 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1440 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1441}
1442void emit_shlcl(int r)
1443{
1444 assem_debug("shl %%%s,%%cl\n",regname[r]);
1445 assert(0);
1446}
1447void emit_shrcl(int r)
1448{
1449 assem_debug("shr %%%s,%%cl\n",regname[r]);
1450 assert(0);
1451}
1452void emit_sarcl(int r)
1453{
1454 assem_debug("sar %%%s,%%cl\n",regname[r]);
1455 assert(0);
1456}
1457
1458void emit_shldcl(int r1,int r2)
1459{
1460 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1461 assert(0);
1462}
1463void emit_shrdcl(int r1,int r2)
1464{
1465 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1466 assert(0);
1467}
1468void emit_orrshl(u_int rs,u_int shift,u_int rt)
1469{
1470 assert(rs<16);
1471 assert(rt<16);
1472 assert(shift<16);
1473 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1474 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1475}
1476void emit_orrshr(u_int rs,u_int shift,u_int rt)
1477{
1478 assert(rs<16);
1479 assert(rt<16);
1480 assert(shift<16);
1481 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1482 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1483}
1484
1485void emit_cmpimm(int rs,int imm)
1486{
1487 u_int armval;
1488 if(genimm(imm,&armval)) {
5a05d80c 1489 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1490 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1491 }else if(genimm(-imm,&armval)) {
5a05d80c 1492 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1493 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1494 }else if(imm>0) {
1495 assert(imm<65536);
1496 #ifdef ARMv5_ONLY
1497 emit_movimm(imm,HOST_TEMPREG);
1498 #else
1499 emit_movw(imm,HOST_TEMPREG);
1500 #endif
1501 assem_debug("cmp %s,r14\n",regname[rs]);
1502 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1503 }else{
1504 assert(imm>-65536);
1505 #ifdef ARMv5_ONLY
1506 emit_movimm(-imm,HOST_TEMPREG);
1507 #else
1508 emit_movw(-imm,HOST_TEMPREG);
1509 #endif
1510 assem_debug("cmn %s,r14\n",regname[rs]);
1511 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1512 }
1513}
1514
1515void emit_cmovne(u_int *addr,int rt)
1516{
1517 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1518 assert(0);
1519}
1520void emit_cmovl(u_int *addr,int rt)
1521{
1522 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1523 assert(0);
1524}
1525void emit_cmovs(u_int *addr,int rt)
1526{
1527 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1528 assert(0);
1529}
1530void emit_cmovne_imm(int imm,int rt)
1531{
1532 assem_debug("movne %s,#%d\n",regname[rt],imm);
1533 u_int armval;
cfbd3c6e 1534 genimm_checked(imm,&armval);
57871462 1535 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1536}
1537void emit_cmovl_imm(int imm,int rt)
1538{
1539 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1540 u_int armval;
cfbd3c6e 1541 genimm_checked(imm,&armval);
57871462 1542 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1543}
1544void emit_cmovb_imm(int imm,int rt)
1545{
1546 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1547 u_int armval;
cfbd3c6e 1548 genimm_checked(imm,&armval);
57871462 1549 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1550}
1551void emit_cmovs_imm(int imm,int rt)
1552{
1553 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1554 u_int armval;
cfbd3c6e 1555 genimm_checked(imm,&armval);
57871462 1556 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1557}
1558void emit_cmove_reg(int rs,int rt)
1559{
1560 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1561 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1562}
1563void emit_cmovne_reg(int rs,int rt)
1564{
1565 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1566 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1567}
1568void emit_cmovl_reg(int rs,int rt)
1569{
1570 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1571 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1572}
1573void emit_cmovs_reg(int rs,int rt)
1574{
1575 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1576 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1577}
1578
1579void emit_slti32(int rs,int imm,int rt)
1580{
1581 if(rs!=rt) emit_zeroreg(rt);
1582 emit_cmpimm(rs,imm);
1583 if(rs==rt) emit_movimm(0,rt);
1584 emit_cmovl_imm(1,rt);
1585}
1586void emit_sltiu32(int rs,int imm,int rt)
1587{
1588 if(rs!=rt) emit_zeroreg(rt);
1589 emit_cmpimm(rs,imm);
1590 if(rs==rt) emit_movimm(0,rt);
1591 emit_cmovb_imm(1,rt);
1592}
1593void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1594{
1595 assert(rsh!=rt);
1596 emit_slti32(rsl,imm,rt);
1597 if(imm>=0)
1598 {
1599 emit_test(rsh,rsh);
1600 emit_cmovne_imm(0,rt);
1601 emit_cmovs_imm(1,rt);
1602 }
1603 else
1604 {
1605 emit_cmpimm(rsh,-1);
1606 emit_cmovne_imm(0,rt);
1607 emit_cmovl_imm(1,rt);
1608 }
1609}
1610void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1611{
1612 assert(rsh!=rt);
1613 emit_sltiu32(rsl,imm,rt);
1614 if(imm>=0)
1615 {
1616 emit_test(rsh,rsh);
1617 emit_cmovne_imm(0,rt);
1618 }
1619 else
1620 {
1621 emit_cmpimm(rsh,-1);
1622 emit_cmovne_imm(1,rt);
1623 }
1624}
1625
1626void emit_cmp(int rs,int rt)
1627{
1628 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1629 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1630}
1631void emit_set_gz32(int rs, int rt)
1632{
1633 //assem_debug("set_gz32\n");
1634 emit_cmpimm(rs,1);
1635 emit_movimm(1,rt);
1636 emit_cmovl_imm(0,rt);
1637}
1638void emit_set_nz32(int rs, int rt)
1639{
1640 //assem_debug("set_nz32\n");
1641 if(rs!=rt) emit_movs(rs,rt);
1642 else emit_test(rs,rs);
1643 emit_cmovne_imm(1,rt);
1644}
1645void emit_set_gz64_32(int rsh, int rsl, int rt)
1646{
1647 //assem_debug("set_gz64\n");
1648 emit_set_gz32(rsl,rt);
1649 emit_test(rsh,rsh);
1650 emit_cmovne_imm(1,rt);
1651 emit_cmovs_imm(0,rt);
1652}
1653void emit_set_nz64_32(int rsh, int rsl, int rt)
1654{
1655 //assem_debug("set_nz64\n");
1656 emit_or_and_set_flags(rsh,rsl,rt);
1657 emit_cmovne_imm(1,rt);
1658}
1659void emit_set_if_less32(int rs1, int rs2, int rt)
1660{
1661 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1662 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1663 emit_cmp(rs1,rs2);
1664 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1665 emit_cmovl_imm(1,rt);
1666}
1667void emit_set_if_carry32(int rs1, int rs2, int rt)
1668{
1669 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1670 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1671 emit_cmp(rs1,rs2);
1672 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1673 emit_cmovb_imm(1,rt);
1674}
1675void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1676{
1677 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1678 assert(u1!=rt);
1679 assert(u2!=rt);
1680 emit_cmp(l1,l2);
1681 emit_movimm(0,rt);
1682 emit_sbcs(u1,u2,HOST_TEMPREG);
1683 emit_cmovl_imm(1,rt);
1684}
1685void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1686{
1687 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1688 assert(u1!=rt);
1689 assert(u2!=rt);
1690 emit_cmp(l1,l2);
1691 emit_movimm(0,rt);
1692 emit_sbcs(u1,u2,HOST_TEMPREG);
1693 emit_cmovb_imm(1,rt);
1694}
1695
1696void emit_call(int a)
1697{
1698 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1699 u_int offset=genjmp(a);
1700 output_w32(0xeb000000|offset);
1701}
1702void emit_jmp(int a)
1703{
1704 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1705 u_int offset=genjmp(a);
1706 output_w32(0xea000000|offset);
1707}
1708void emit_jne(int a)
1709{
1710 assem_debug("bne %x\n",a);
1711 u_int offset=genjmp(a);
1712 output_w32(0x1a000000|offset);
1713}
1714void emit_jeq(int a)
1715{
1716 assem_debug("beq %x\n",a);
1717 u_int offset=genjmp(a);
1718 output_w32(0x0a000000|offset);
1719}
1720void emit_js(int a)
1721{
1722 assem_debug("bmi %x\n",a);
1723 u_int offset=genjmp(a);
1724 output_w32(0x4a000000|offset);
1725}
1726void emit_jns(int a)
1727{
1728 assem_debug("bpl %x\n",a);
1729 u_int offset=genjmp(a);
1730 output_w32(0x5a000000|offset);
1731}
1732void emit_jl(int a)
1733{
1734 assem_debug("blt %x\n",a);
1735 u_int offset=genjmp(a);
1736 output_w32(0xba000000|offset);
1737}
1738void emit_jge(int a)
1739{
1740 assem_debug("bge %x\n",a);
1741 u_int offset=genjmp(a);
1742 output_w32(0xaa000000|offset);
1743}
1744void emit_jno(int a)
1745{
1746 assem_debug("bvc %x\n",a);
1747 u_int offset=genjmp(a);
1748 output_w32(0x7a000000|offset);
1749}
1750void emit_jc(int a)
1751{
1752 assem_debug("bcs %x\n",a);
1753 u_int offset=genjmp(a);
1754 output_w32(0x2a000000|offset);
1755}
1756void emit_jcc(int a)
1757{
1758 assem_debug("bcc %x\n",a);
1759 u_int offset=genjmp(a);
1760 output_w32(0x3a000000|offset);
1761}
1762
1763void emit_pushimm(int imm)
1764{
1765 assem_debug("push $%x\n",imm);
1766 assert(0);
1767}
1768void emit_pusha()
1769{
1770 assem_debug("pusha\n");
1771 assert(0);
1772}
1773void emit_popa()
1774{
1775 assem_debug("popa\n");
1776 assert(0);
1777}
1778void emit_pushreg(u_int r)
1779{
1780 assem_debug("push %%%s\n",regname[r]);
1781 assert(0);
1782}
1783void emit_popreg(u_int r)
1784{
1785 assem_debug("pop %%%s\n",regname[r]);
1786 assert(0);
1787}
1788void emit_callreg(u_int r)
1789{
c6c3b1b3 1790 assert(r<15);
1791 assem_debug("blx %s\n",regname[r]);
1792 output_w32(0xe12fff30|r);
57871462 1793}
1794void emit_jmpreg(u_int r)
1795{
1796 assem_debug("mov pc,%s\n",regname[r]);
1797 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1798}
1799
1800void emit_readword_indexed(int offset, int rs, int rt)
1801{
1802 assert(offset>-4096&&offset<4096);
1803 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1804 if(offset>=0) {
1805 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1806 }else{
1807 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1808 }
1809}
1810void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1811{
1812 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1813 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1814}
c6c3b1b3 1815void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1816{
1817 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1818 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1819}
1820void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1821{
1822 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1823 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1824}
1825void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1826{
1827 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1828 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1829}
1830void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1831{
1832 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1833 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1834}
1835void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1836{
1837 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1838 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1839}
57871462 1840void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1841{
1842 if(map<0) emit_readword_indexed(addr, rs, rt);
1843 else {
1844 assert(addr==0);
1845 emit_readword_dualindexedx4(rs, map, rt);
1846 }
1847}
1848void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1849{
1850 if(map<0) {
1851 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1852 emit_readword_indexed(addr+4, rs, rl);
1853 }else{
1854 assert(rh!=rs);
1855 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1856 emit_addimm(map,1,map);
1857 emit_readword_indexed_tlb(addr, rs, map, rl);
1858 }
1859}
1860void emit_movsbl_indexed(int offset, int rs, int rt)
1861{
1862 assert(offset>-256&&offset<256);
1863 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1864 if(offset>=0) {
1865 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1866 }else{
1867 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1868 }
1869}
1870void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1871{
1872 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1873 else {
1874 if(addr==0) {
1875 emit_shlimm(map,2,map);
1876 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1877 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1878 }else{
1879 assert(addr>-256&&addr<256);
1880 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1881 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1882 emit_movsbl_indexed(addr, rt, rt);
1883 }
1884 }
1885}
1886void emit_movswl_indexed(int offset, int rs, int rt)
1887{
1888 assert(offset>-256&&offset<256);
1889 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1890 if(offset>=0) {
1891 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1892 }else{
1893 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1894 }
1895}
1896void emit_movzbl_indexed(int offset, int rs, int rt)
1897{
1898 assert(offset>-4096&&offset<4096);
1899 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1900 if(offset>=0) {
1901 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1902 }else{
1903 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1904 }
1905}
1906void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1907{
1908 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1909 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1910}
1911void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1912{
1913 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1914 else {
1915 if(addr==0) {
1916 emit_movzbl_dualindexedx4(rs, map, rt);
1917 }else{
1918 emit_addimm(rs,addr,rt);
1919 emit_movzbl_dualindexedx4(rt, map, rt);
1920 }
1921 }
1922}
1923void emit_movzwl_indexed(int offset, int rs, int rt)
1924{
1925 assert(offset>-256&&offset<256);
1926 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1927 if(offset>=0) {
1928 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1929 }else{
1930 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1931 }
1932}
054175e9 1933static void emit_ldrd(int offset, int rs, int rt)
1934{
1935 assert(offset>-256&&offset<256);
1936 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1937 if(offset>=0) {
1938 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1939 }else{
1940 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1941 }
1942}
57871462 1943void emit_readword(int addr, int rt)
1944{
1945 u_int offset = addr-(u_int)&dynarec_local;
1946 assert(offset<4096);
1947 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1948 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1949}
1950void emit_movsbl(int addr, int rt)
1951{
1952 u_int offset = addr-(u_int)&dynarec_local;
1953 assert(offset<256);
1954 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1955 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1956}
1957void emit_movswl(int addr, int rt)
1958{
1959 u_int offset = addr-(u_int)&dynarec_local;
1960 assert(offset<256);
1961 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1962 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1963}
1964void emit_movzbl(int addr, int rt)
1965{
1966 u_int offset = addr-(u_int)&dynarec_local;
1967 assert(offset<4096);
1968 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1969 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1970}
1971void emit_movzwl(int addr, int rt)
1972{
1973 u_int offset = addr-(u_int)&dynarec_local;
1974 assert(offset<256);
1975 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1976 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1977}
1978void emit_movzwl_reg(int rs, int rt)
1979{
1980 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1981 assert(0);
1982}
1983
1984void emit_xchg(int rs, int rt)
1985{
1986 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1987 assert(0);
1988}
1989void emit_writeword_indexed(int rt, int offset, int rs)
1990{
1991 assert(offset>-4096&&offset<4096);
1992 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1993 if(offset>=0) {
1994 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1995 }else{
1996 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1997 }
1998}
1999void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
2000{
2001 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2002 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2003}
2004void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2005{
2006 if(map<0) emit_writeword_indexed(rt, addr, rs);
2007 else {
2008 assert(addr==0);
2009 emit_writeword_dualindexedx4(rt, rs, map);
2010 }
2011}
2012void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2013{
2014 if(map<0) {
2015 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2016 emit_writeword_indexed(rl, addr+4, rs);
2017 }else{
2018 assert(rh>=0);
2019 if(temp!=rs) emit_addimm(map,1,temp);
2020 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2021 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2022 else {
2023 emit_addimm(rs,4,rs);
2024 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2025 }
2026 }
2027}
2028void emit_writehword_indexed(int rt, int offset, int rs)
2029{
2030 assert(offset>-256&&offset<256);
2031 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2032 if(offset>=0) {
2033 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2034 }else{
2035 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2036 }
2037}
2038void emit_writebyte_indexed(int rt, int offset, int rs)
2039{
2040 assert(offset>-4096&&offset<4096);
2041 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2042 if(offset>=0) {
2043 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2044 }else{
2045 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2046 }
2047}
2048void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2049{
2050 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2051 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2052}
2053void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2054{
2055 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2056 else {
2057 if(addr==0) {
2058 emit_writebyte_dualindexedx4(rt, rs, map);
2059 }else{
2060 emit_addimm(rs,addr,temp);
2061 emit_writebyte_dualindexedx4(rt, temp, map);
2062 }
2063 }
2064}
b96d3df7 2065void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2066{
2067 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2068 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2069}
2070void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2071{
2072 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2073 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2074}
2075void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2076{
2077 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2078 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2079}
57871462 2080void emit_writeword(int rt, int addr)
2081{
2082 u_int offset = addr-(u_int)&dynarec_local;
2083 assert(offset<4096);
2084 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2085 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2086}
2087void emit_writehword(int rt, int addr)
2088{
2089 u_int offset = addr-(u_int)&dynarec_local;
2090 assert(offset<256);
2091 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2092 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2093}
2094void emit_writebyte(int rt, int addr)
2095{
2096 u_int offset = addr-(u_int)&dynarec_local;
2097 assert(offset<4096);
74426039 2098 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2099 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2100}
2101void emit_writeword_imm(int imm, int addr)
2102{
2103 assem_debug("movl $%x,%x\n",imm,addr);
2104 assert(0);
2105}
2106void emit_writebyte_imm(int imm, int addr)
2107{
2108 assem_debug("movb $%x,%x\n",imm,addr);
2109 assert(0);
2110}
2111
2112void emit_mul(int rs)
2113{
2114 assem_debug("mul %%%s\n",regname[rs]);
2115 assert(0);
2116}
2117void emit_imul(int rs)
2118{
2119 assem_debug("imul %%%s\n",regname[rs]);
2120 assert(0);
2121}
2122void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2123{
2124 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2125 assert(rs1<16);
2126 assert(rs2<16);
2127 assert(hi<16);
2128 assert(lo<16);
2129 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2130}
2131void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2132{
2133 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2134 assert(rs1<16);
2135 assert(rs2<16);
2136 assert(hi<16);
2137 assert(lo<16);
2138 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2139}
2140
2141void emit_div(int rs)
2142{
2143 assem_debug("div %%%s\n",regname[rs]);
2144 assert(0);
2145}
2146void emit_idiv(int rs)
2147{
2148 assem_debug("idiv %%%s\n",regname[rs]);
2149 assert(0);
2150}
2151void emit_cdq()
2152{
2153 assem_debug("cdq\n");
2154 assert(0);
2155}
2156
2157void emit_clz(int rs,int rt)
2158{
2159 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2160 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2161}
2162
2163void emit_subcs(int rs1,int rs2,int rt)
2164{
2165 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2166 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2167}
2168
2169void emit_shrcc_imm(int rs,u_int imm,int rt)
2170{
2171 assert(imm>0);
2172 assert(imm<32);
2173 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2174 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2175}
2176
b1be1eee 2177void emit_shrne_imm(int rs,u_int imm,int rt)
2178{
2179 assert(imm>0);
2180 assert(imm<32);
2181 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2182 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2183}
2184
57871462 2185void emit_negmi(int rs, int rt)
2186{
2187 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2188 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2189}
2190
2191void emit_negsmi(int rs, int rt)
2192{
2193 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2194 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2195}
2196
2197void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2198{
2199 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2200 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2201}
2202
2203void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2204{
2205 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2206 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2207}
2208
2209void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2210{
2211 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2212 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2213}
2214
2215void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2216{
2217 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2218 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2219}
2220
2221void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2222{
2223 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2224 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2225}
2226
2227void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2228{
2229 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2230 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2231}
2232
2233void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2234{
2235 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2236 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2237}
2238
2239void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2240{
2241 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2242 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2243}
2244
2245void emit_teq(int rs, int rt)
2246{
2247 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2248 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2249}
2250
2251void emit_rsbimm(int rs, int imm, int rt)
2252{
2253 u_int armval;
cfbd3c6e 2254 genimm_checked(imm,&armval);
57871462 2255 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2256 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2257}
2258
2259// Load 2 immediates optimizing for small code size
2260void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2261{
2262 emit_movimm(imm1,rt1);
2263 u_int armval;
2264 if(genimm(imm2-imm1,&armval)) {
2265 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2266 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2267 }else if(genimm(imm1-imm2,&armval)) {
2268 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2269 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2270 }
2271 else emit_movimm(imm2,rt2);
2272}
2273
2274// Conditionally select one of two immediates, optimizing for small code size
2275// This will only be called if HAVE_CMOV_IMM is defined
2276void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2277{
2278 u_int armval;
2279 if(genimm(imm2-imm1,&armval)) {
2280 emit_movimm(imm1,rt);
2281 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2282 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2283 }else if(genimm(imm1-imm2,&armval)) {
2284 emit_movimm(imm1,rt);
2285 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2286 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2287 }
2288 else {
2289 #ifdef ARMv5_ONLY
2290 emit_movimm(imm1,rt);
2291 add_literal((int)out,imm2);
2292 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2293 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2294 #else
2295 emit_movw(imm1&0x0000FFFF,rt);
2296 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2297 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2298 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2299 }
2300 emit_movt(imm1&0xFFFF0000,rt);
2301 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2302 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2303 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2304 }
2305 #endif
2306 }
2307}
2308
2309// special case for checking invalid_code
2310void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2311{
2312 assert(0);
2313}
2314
2315// special case for checking invalid_code
2316void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2317{
2318 assert(imm<128&&imm>=0);
2319 assert(r>=0&&r<16);
2320 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2321 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2322 emit_cmpimm(HOST_TEMPREG,imm);
2323}
2324
2325// special case for tlb mapping
2326void emit_addsr12(int rs1,int rs2,int rt)
2327{
2328 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2329 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2330}
2331
0bbd1454 2332void emit_callne(int a)
2333{
2334 assem_debug("blne %x\n",a);
2335 u_int offset=genjmp(a);
2336 output_w32(0x1b000000|offset);
2337}
2338
57871462 2339// Used to preload hash table entries
2340void emit_prefetch(void *addr)
2341{
2342 assem_debug("prefetch %x\n",(int)addr);
2343 output_byte(0x0F);
2344 output_byte(0x18);
2345 output_modrm(0,5,1);
2346 output_w32((int)addr);
2347}
2348void emit_prefetchreg(int r)
2349{
2350 assem_debug("pld %s\n",regname[r]);
2351 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2352}
2353
2354// Special case for mini_ht
2355void emit_ldreq_indexed(int rs, u_int offset, int rt)
2356{
2357 assert(offset<4096);
2358 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2359 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2360}
2361
2362void emit_flds(int r,int sr)
2363{
2364 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2365 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2366}
2367
2368void emit_vldr(int r,int vr)
2369{
2370 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2371 output_w32(0xed900b00|(vr<<12)|(r<<16));
2372}
2373
2374void emit_fsts(int sr,int r)
2375{
2376 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2377 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2378}
2379
2380void emit_vstr(int vr,int r)
2381{
2382 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2383 output_w32(0xed800b00|(vr<<12)|(r<<16));
2384}
2385
2386void emit_ftosizs(int s,int d)
2387{
2388 assem_debug("ftosizs s%d,s%d\n",d,s);
2389 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2390}
2391
2392void emit_ftosizd(int s,int d)
2393{
2394 assem_debug("ftosizd s%d,d%d\n",d,s);
2395 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2396}
2397
2398void emit_fsitos(int s,int d)
2399{
2400 assem_debug("fsitos s%d,s%d\n",d,s);
2401 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2402}
2403
2404void emit_fsitod(int s,int d)
2405{
2406 assem_debug("fsitod d%d,s%d\n",d,s);
2407 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2408}
2409
2410void emit_fcvtds(int s,int d)
2411{
2412 assem_debug("fcvtds d%d,s%d\n",d,s);
2413 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2414}
2415
2416void emit_fcvtsd(int s,int d)
2417{
2418 assem_debug("fcvtsd s%d,d%d\n",d,s);
2419 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2420}
2421
2422void emit_fsqrts(int s,int d)
2423{
2424 assem_debug("fsqrts d%d,s%d\n",d,s);
2425 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2426}
2427
2428void emit_fsqrtd(int s,int d)
2429{
2430 assem_debug("fsqrtd s%d,d%d\n",d,s);
2431 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2432}
2433
2434void emit_fabss(int s,int d)
2435{
2436 assem_debug("fabss d%d,s%d\n",d,s);
2437 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2438}
2439
2440void emit_fabsd(int s,int d)
2441{
2442 assem_debug("fabsd s%d,d%d\n",d,s);
2443 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2444}
2445
2446void emit_fnegs(int s,int d)
2447{
2448 assem_debug("fnegs d%d,s%d\n",d,s);
2449 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2450}
2451
2452void emit_fnegd(int s,int d)
2453{
2454 assem_debug("fnegd s%d,d%d\n",d,s);
2455 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2456}
2457
2458void emit_fadds(int s1,int s2,int d)
2459{
2460 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2461 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2462}
2463
2464void emit_faddd(int s1,int s2,int d)
2465{
2466 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2467 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2468}
2469
2470void emit_fsubs(int s1,int s2,int d)
2471{
2472 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2473 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2474}
2475
2476void emit_fsubd(int s1,int s2,int d)
2477{
2478 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2479 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2480}
2481
2482void emit_fmuls(int s1,int s2,int d)
2483{
2484 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2485 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2486}
2487
2488void emit_fmuld(int s1,int s2,int d)
2489{
2490 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2491 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2492}
2493
2494void emit_fdivs(int s1,int s2,int d)
2495{
2496 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2497 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2498}
2499
2500void emit_fdivd(int s1,int s2,int d)
2501{
2502 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2503 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2504}
2505
2506void emit_fcmps(int x,int y)
2507{
2508 assem_debug("fcmps s14, s15\n");
2509 output_w32(0xeeb47a67);
2510}
2511
2512void emit_fcmpd(int x,int y)
2513{
2514 assem_debug("fcmpd d6, d7\n");
2515 output_w32(0xeeb46b47);
2516}
2517
2518void emit_fmstat()
2519{
2520 assem_debug("fmstat\n");
2521 output_w32(0xeef1fa10);
2522}
2523
2524void emit_bicne_imm(int rs,int imm,int rt)
2525{
2526 u_int armval;
cfbd3c6e 2527 genimm_checked(imm,&armval);
57871462 2528 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2529 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2530}
2531
2532void emit_biccs_imm(int rs,int imm,int rt)
2533{
2534 u_int armval;
cfbd3c6e 2535 genimm_checked(imm,&armval);
57871462 2536 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2537 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2538}
2539
2540void emit_bicvc_imm(int rs,int imm,int rt)
2541{
2542 u_int armval;
cfbd3c6e 2543 genimm_checked(imm,&armval);
57871462 2544 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2545 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2546}
2547
2548void emit_bichi_imm(int rs,int imm,int rt)
2549{
2550 u_int armval;
cfbd3c6e 2551 genimm_checked(imm,&armval);
57871462 2552 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2553 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2554}
2555
2556void emit_orrvs_imm(int rs,int imm,int rt)
2557{
2558 u_int armval;
cfbd3c6e 2559 genimm_checked(imm,&armval);
57871462 2560 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2561 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2562}
2563
b9b61529 2564void emit_orrne_imm(int rs,int imm,int rt)
2565{
2566 u_int armval;
cfbd3c6e 2567 genimm_checked(imm,&armval);
b9b61529 2568 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2569 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2570}
2571
2572void emit_andne_imm(int rs,int imm,int rt)
2573{
2574 u_int armval;
cfbd3c6e 2575 genimm_checked(imm,&armval);
b9b61529 2576 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2577 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2578}
2579
57871462 2580void emit_jno_unlikely(int a)
2581{
2582 //emit_jno(a);
2583 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2584 output_w32(0x72800000|rd_rn_rm(15,15,0));
2585}
2586
054175e9 2587static void save_regs_all(u_int reglist)
57871462 2588{
054175e9 2589 int i;
57871462 2590 if(!reglist) return;
2591 assem_debug("stmia fp,{");
054175e9 2592 for(i=0;i<16;i++)
2593 if(reglist&(1<<i))
2594 assem_debug("r%d,",i);
57871462 2595 assem_debug("}\n");
2596 output_w32(0xe88b0000|reglist);
2597}
054175e9 2598static void restore_regs_all(u_int reglist)
57871462 2599{
054175e9 2600 int i;
57871462 2601 if(!reglist) return;
2602 assem_debug("ldmia fp,{");
054175e9 2603 for(i=0;i<16;i++)
2604 if(reglist&(1<<i))
2605 assem_debug("r%d,",i);
57871462 2606 assem_debug("}\n");
2607 output_w32(0xe89b0000|reglist);
2608}
054175e9 2609// Save registers before function call
2610static void save_regs(u_int reglist)
2611{
2612 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2613 save_regs_all(reglist);
2614}
2615// Restore registers after function call
2616static void restore_regs(u_int reglist)
2617{
2618 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2619 restore_regs_all(reglist);
2620}
57871462 2621
2622// Write back consts using r14 so we don't disturb the other registers
2623void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2624{
2625 int hr;
2626 for(hr=0;hr<HOST_REGS;hr++) {
2627 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2628 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2629 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2630 int value=constmap[i][hr];
2631 if(value==0) {
2632 emit_zeroreg(HOST_TEMPREG);
2633 }
2634 else {
2635 emit_movimm(value,HOST_TEMPREG);
2636 }
2637 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2638#ifndef FORCE32
57871462 2639 if((i_is32>>i_regmap[hr])&1) {
2640 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2641 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2642 }
24385cae 2643#endif
57871462 2644 }
2645 }
2646 }
2647 }
2648}
2649
2650/* Stubs/epilogue */
2651
2652void literal_pool(int n)
2653{
2654 if(!literalcount) return;
2655 if(n) {
2656 if((int)out-literals[0][0]<4096-n) return;
2657 }
2658 u_int *ptr;
2659 int i;
2660 for(i=0;i<literalcount;i++)
2661 {
77750690 2662 u_int l_addr=(u_int)out;
2663 int j;
2664 for(j=0;j<i;j++) {
2665 if(literals[j][1]==literals[i][1]) {
2666 //printf("dup %08x\n",literals[i][1]);
2667 l_addr=literals[j][0];
2668 break;
2669 }
2670 }
57871462 2671 ptr=(u_int *)literals[i][0];
77750690 2672 u_int offset=l_addr-(u_int)ptr-8;
57871462 2673 assert(offset<4096);
2674 assert(!(offset&3));
2675 *ptr|=offset;
77750690 2676 if(l_addr==(u_int)out) {
2677 literals[i][0]=l_addr; // remember for dupes
2678 output_w32(literals[i][1]);
2679 }
57871462 2680 }
2681 literalcount=0;
2682}
2683
2684void literal_pool_jumpover(int n)
2685{
2686 if(!literalcount) return;
2687 if(n) {
2688 if((int)out-literals[0][0]<4096-n) return;
2689 }
2690 int jaddr=(int)out;
2691 emit_jmp(0);
2692 literal_pool(0);
2693 set_jump_target(jaddr,(int)out);
2694}
2695
2696emit_extjump2(int addr, int target, int linker)
2697{
2698 u_char *ptr=(u_char *)addr;
2699 assert((ptr[3]&0x0e)==0xa);
2700 emit_loadlp(target,0);
2701 emit_loadlp(addr,1);
24385cae 2702 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2703 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2704//DEBUG >
2705#ifdef DEBUG_CYCLE_COUNT
2706 emit_readword((int)&last_count,ECX);
2707 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2708 emit_readword((int)&next_interupt,ECX);
2709 emit_writeword(HOST_CCREG,(int)&Count);
2710 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2711 emit_writeword(ECX,(int)&last_count);
2712#endif
2713//DEBUG <
2714 emit_jmp(linker);
2715}
2716
2717emit_extjump(int addr, int target)
2718{
2719 emit_extjump2(addr, target, (int)dyna_linker);
2720}
2721emit_extjump_ds(int addr, int target)
2722{
2723 emit_extjump2(addr, target, (int)dyna_linker_ds);
2724}
2725
13e35c04 2726// put rt_val into rt, potentially making use of rs with value rs_val
2727static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2728{
2729 u_int xor=rs_val^rt_val;
2730 u_int xs;
2731 for(xs=xor;xs!=0&&(xs&3)==0;xs>>=2)
2732 ;
2733 if(xs<0x100)
2734 emit_xorimm(rs,xor,rt);
2735 else
2736 emit_movimm(rt_val,rt);
2737}
cbbab9cd 2738
b96d3df7 2739// trashes r2
2740static void pass_args(int a0, int a1)
2741{
2742 if(a0==1&&a1==0) {
2743 // must swap
2744 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2745 }
2746 else if(a0!=0&&a1==0) {
2747 emit_mov(a1,1);
2748 if (a0>=0) emit_mov(a0,0);
2749 }
2750 else {
2751 if(a0>=0&&a0!=0) emit_mov(a0,0);
2752 if(a1>=0&&a1!=1) emit_mov(a1,1);
2753 }
2754}
2755
b1be1eee 2756static void mov_loadtype_adj(int type,int rs,int rt)
2757{
2758 switch(type) {
2759 case LOADB_STUB: emit_signextend8(rs,rt); break;
2760 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2761 case LOADH_STUB: emit_signextend16(rs,rt); break;
2762 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2763 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2764 default: assert(0);
2765 }
2766}
2767
2768#ifdef PCSX
2769#include "pcsxmem.h"
2770#include "pcsxmem_inline.c"
2771#endif
2772
57871462 2773do_readstub(int n)
2774{
2775 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2776 literal_pool(256);
2777 set_jump_target(stubs[n][1],(int)out);
2778 int type=stubs[n][0];
2779 int i=stubs[n][3];
2780 int rs=stubs[n][4];
2781 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2782 u_int reglist=stubs[n][7];
2783 signed char *i_regmap=i_regs->regmap;
2784 int addr=get_reg(i_regmap,AGEN1+(i&1));
2785 int rth,rt;
2786 int ds;
b9b61529 2787 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2788 rth=get_reg(i_regmap,FTEMP|64);
2789 rt=get_reg(i_regmap,FTEMP);
2790 }else{
2791 rth=get_reg(i_regmap,rt1[i]|64);
2792 rt=get_reg(i_regmap,rt1[i]);
2793 }
2794 assert(rs>=0);
c6c3b1b3 2795#ifdef PCSX
2796 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2797 reglist|=(1<<rs);
2798 for(r=0;r<=12;r++) {
2799 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2800 temp=r; break;
2801 }
2802 }
2803 if(rt>=0)
2804 reglist&=~(1<<rt);
2805 if(temp==-1) {
2806 save_regs(reglist);
2807 regs_saved=1;
2808 temp=(rs==0)?2:0;
2809 }
2810 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2811 temp2=1;
2812 emit_readword((int)&mem_rtab,temp);
2813 emit_shrimm(rs,12,temp2);
2814 emit_readword_dualindexedx4(temp,temp2,temp2);
2815 emit_lsls_imm(temp2,1,temp2);
2816 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2817 switch(type) {
2818 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2819 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2820 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2821 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2822 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2823 }
2824 }
2825 if(regs_saved) {
2826 restore_jump=(int)out;
2827 emit_jcc(0); // jump to reg restore
2828 }
2829 else
2830 emit_jcc(stubs[n][2]); // return address
2831
2832 if(!regs_saved)
2833 save_regs(reglist);
2834 int handler=0;
2835 if(type==LOADB_STUB||type==LOADBU_STUB)
2836 handler=(int)jump_handler_read8;
2837 if(type==LOADH_STUB||type==LOADHU_STUB)
2838 handler=(int)jump_handler_read16;
2839 if(type==LOADW_STUB)
2840 handler=(int)jump_handler_read32;
2841 assert(handler!=0);
b96d3df7 2842 pass_args(rs,temp2);
c6c3b1b3 2843 int cc=get_reg(i_regmap,CCREG);
2844 if(cc<0)
2845 emit_loadreg(CCREG,2);
2573466a 2846 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2847 emit_call(handler);
2848 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2849 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2850 }
2851 if(restore_jump)
2852 set_jump_target(restore_jump,(int)out);
2853 restore_regs(reglist);
2854 emit_jmp(stubs[n][2]); // return address
2855#else // !PCSX
57871462 2856 if(addr<0) addr=rt;
535d208a 2857 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2858 assert(addr>=0);
2859 int ftable=0;
2860 if(type==LOADB_STUB||type==LOADBU_STUB)
2861 ftable=(int)readmemb;
2862 if(type==LOADH_STUB||type==LOADHU_STUB)
2863 ftable=(int)readmemh;
2864 if(type==LOADW_STUB)
2865 ftable=(int)readmem;
24385cae 2866#ifndef FORCE32
57871462 2867 if(type==LOADD_STUB)
2868 ftable=(int)readmemd;
24385cae 2869#endif
2870 assert(ftable!=0);
57871462 2871 emit_writeword(rs,(int)&address);
2872 //emit_pusha();
2873 save_regs(reglist);
97a238a6 2874#ifndef PCSX
57871462 2875 ds=i_regs!=&regs[i];
2876 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2877 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2878 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2879 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2880 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2881#endif
57871462 2882 emit_shrimm(rs,16,1);
2883 int cc=get_reg(i_regmap,CCREG);
2884 if(cc<0) {
2885 emit_loadreg(CCREG,2);
2886 }
2887 emit_movimm(ftable,0);
2888 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2889#ifndef PCSX
57871462 2890 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2891#endif
57871462 2892 //emit_readword((int)&last_count,temp);
2893 //emit_add(cc,temp,cc);
2894 //emit_writeword(cc,(int)&Count);
2895 //emit_mov(15,14);
2896 emit_call((int)&indirect_jump_indexed);
2897 //emit_callreg(rs);
2898 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2899#ifndef PCSX
57871462 2900 // We really shouldn't need to update the count here,
2901 // but not doing so causes random crashes...
2902 emit_readword((int)&Count,HOST_TEMPREG);
2903 emit_readword((int)&next_interupt,2);
2904 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2905 emit_writeword(2,(int)&last_count);
2906 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2907 if(cc<0) {
2908 emit_storereg(CCREG,HOST_TEMPREG);
2909 }
f51dc36c 2910#endif
57871462 2911 //emit_popa();
2912 restore_regs(reglist);
2913 //if((cc=get_reg(regmap,CCREG))>=0) {
2914 // emit_loadreg(CCREG,cc);
2915 //}
f18c0f46 2916 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2917 assert(rt>=0);
2918 if(type==LOADB_STUB)
2919 emit_movsbl((int)&readmem_dword,rt);
2920 if(type==LOADBU_STUB)
2921 emit_movzbl((int)&readmem_dword,rt);
2922 if(type==LOADH_STUB)
2923 emit_movswl((int)&readmem_dword,rt);
2924 if(type==LOADHU_STUB)
2925 emit_movzwl((int)&readmem_dword,rt);
2926 if(type==LOADW_STUB)
2927 emit_readword((int)&readmem_dword,rt);
2928 if(type==LOADD_STUB) {
2929 emit_readword((int)&readmem_dword,rt);
2930 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2931 }
57871462 2932 }
2933 emit_jmp(stubs[n][2]); // return address
c6c3b1b3 2934#endif // !PCSX
57871462 2935}
2936
c6c3b1b3 2937#ifdef PCSX
2938// return memhandler, or get directly accessable address and return 0
2939u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2940{
2941 u_int l1,l2=0;
2942 l1=((u_int *)table)[addr>>12];
2943 if((l1&(1<<31))==0) {
2944 u_int v=l1<<1;
2945 *addr_host=v+addr;
2946 return 0;
2947 }
2948 else {
2949 l1<<=1;
2950 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2951 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2952 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2953 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2954 else
2955 l2=((u_int *)l1)[(addr&0xfff)/4];
2956 if((l2&(1<<31))==0) {
2957 u_int v=l2<<1;
2958 *addr_host=v+(addr&0xfff);
2959 return 0;
2960 }
2961 return l2<<1;
2962 }
2963}
2964#endif
2965
57871462 2966inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2967{
2968 int rs=get_reg(regmap,target);
2969 int rth=get_reg(regmap,target|64);
2970 int rt=get_reg(regmap,target);
535d208a 2971 if(rs<0) rs=get_reg(regmap,-1);
57871462 2972 assert(rs>=0);
c6c3b1b3 2973#ifdef PCSX
b1be1eee 2974 u_int handler,host_addr=0,is_dynamic,far_call=0;
2975 int cc=get_reg(regmap,CCREG);
2976 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2977 return;
c6c3b1b3 2978 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2979 if (handler==0) {
2980 if(rt<0)
2981 return;
13e35c04 2982 if(addr!=host_addr)
2983 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 2984 switch(type) {
2985 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2986 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2987 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2988 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2989 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2990 default: assert(0);
2991 }
2992 return;
2993 }
b1be1eee 2994 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2995 if(is_dynamic) {
2996 if(type==LOADB_STUB||type==LOADBU_STUB)
2997 handler=(int)jump_handler_read8;
2998 if(type==LOADH_STUB||type==LOADHU_STUB)
2999 handler=(int)jump_handler_read16;
3000 if(type==LOADW_STUB)
3001 handler=(int)jump_handler_read32;
3002 }
c6c3b1b3 3003
3004 // call a memhandler
3005 if(rt>=0)
3006 reglist&=~(1<<rt);
3007 save_regs(reglist);
3008 if(target==0)
3009 emit_movimm(addr,0);
3010 else if(rs!=0)
3011 emit_mov(rs,0);
c6c3b1b3 3012 int offset=(int)handler-(int)out-8;
3013 if(offset<-33554432||offset>=33554432) {
3014 // unreachable memhandler, a plugin func perhaps
b1be1eee 3015 emit_movimm(handler,12);
3016 far_call=1;
3017 }
3018 if(cc<0)
3019 emit_loadreg(CCREG,2);
3020 if(is_dynamic) {
3021 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
3022 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 3023 }
b1be1eee 3024 else {
3025 emit_readword((int)&last_count,3);
3026 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
3027 emit_add(2,3,2);
3028 emit_writeword(2,(int)&Count);
3029 }
3030
3031 if(far_call)
3032 emit_callreg(12);
c6c3b1b3 3033 else
3034 emit_call(handler);
b1be1eee 3035
c6c3b1b3 3036 if(rt>=0) {
3037 switch(type) {
3038 case LOADB_STUB: emit_signextend8(0,rt); break;
3039 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
3040 case LOADH_STUB: emit_signextend16(0,rt); break;
3041 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
3042 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
3043 default: assert(0);
3044 }
3045 }
3046 restore_regs(reglist);
3047#else // if !PCSX
57871462 3048 int ftable=0;
3049 if(type==LOADB_STUB||type==LOADBU_STUB)
3050 ftable=(int)readmemb;
3051 if(type==LOADH_STUB||type==LOADHU_STUB)
3052 ftable=(int)readmemh;
3053 if(type==LOADW_STUB)
3054 ftable=(int)readmem;
24385cae 3055#ifndef FORCE32
57871462 3056 if(type==LOADD_STUB)
3057 ftable=(int)readmemd;
24385cae 3058#endif
3059 assert(ftable!=0);
fd99c415 3060 if(target==0)
3061 emit_movimm(addr,rs);
57871462 3062 emit_writeword(rs,(int)&address);
3063 //emit_pusha();
3064 save_regs(reglist);
0c1fe38b 3065#ifndef PCSX
3066 if((signed int)addr>=(signed int)0xC0000000) {
3067 // Theoretically we can have a pagefault here, if the TLB has never
3068 // been enabled and the address is outside the range 80000000..BFFFFFFF
3069 // Write out the registers so the pagefault can be handled. This is
3070 // a very rare case and likely represents a bug.
3071 int ds=regmap!=regs[i].regmap;
3072 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3073 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3074 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3075 }
3076#endif
57871462 3077 //emit_shrimm(rs,16,1);
3078 int cc=get_reg(regmap,CCREG);
3079 if(cc<0) {
3080 emit_loadreg(CCREG,2);
3081 }
3082 //emit_movimm(ftable,0);
3083 emit_movimm(((u_int *)ftable)[addr>>16],0);
3084 //emit_readword((int)&last_count,12);
2573466a 3085 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3086#ifndef PCSX
57871462 3087 if((signed int)addr>=(signed int)0xC0000000) {
3088 // Pagefault address
3089 int ds=regmap!=regs[i].regmap;
3090 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3091 }
f51dc36c 3092#endif
57871462 3093 //emit_add(12,2,2);
3094 //emit_writeword(2,(int)&Count);
3095 //emit_call(((u_int *)ftable)[addr>>16]);
3096 emit_call((int)&indirect_jump);
f51dc36c 3097#ifndef PCSX
57871462 3098 // We really shouldn't need to update the count here,
3099 // but not doing so causes random crashes...
3100 emit_readword((int)&Count,HOST_TEMPREG);
3101 emit_readword((int)&next_interupt,2);
2573466a 3102 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3103 emit_writeword(2,(int)&last_count);
3104 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3105 if(cc<0) {
3106 emit_storereg(CCREG,HOST_TEMPREG);
3107 }
f51dc36c 3108#endif
57871462 3109 //emit_popa();
3110 restore_regs(reglist);
fd99c415 3111 if(rt>=0) {
3112 if(type==LOADB_STUB)
3113 emit_movsbl((int)&readmem_dword,rt);
3114 if(type==LOADBU_STUB)
3115 emit_movzbl((int)&readmem_dword,rt);
3116 if(type==LOADH_STUB)
3117 emit_movswl((int)&readmem_dword,rt);
3118 if(type==LOADHU_STUB)
3119 emit_movzwl((int)&readmem_dword,rt);
3120 if(type==LOADW_STUB)
3121 emit_readword((int)&readmem_dword,rt);
3122 if(type==LOADD_STUB) {
3123 emit_readword((int)&readmem_dword,rt);
3124 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
3125 }
57871462 3126 }
c6c3b1b3 3127#endif // !PCSX
57871462 3128}
3129
3130do_writestub(int n)
3131{
3132 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
3133 literal_pool(256);
3134 set_jump_target(stubs[n][1],(int)out);
3135 int type=stubs[n][0];
3136 int i=stubs[n][3];
3137 int rs=stubs[n][4];
3138 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3139 u_int reglist=stubs[n][7];
3140 signed char *i_regmap=i_regs->regmap;
3141 int addr=get_reg(i_regmap,AGEN1+(i&1));
3142 int rth,rt,r;
3143 int ds;
b9b61529 3144 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 3145 rth=get_reg(i_regmap,FTEMP|64);
3146 rt=get_reg(i_regmap,r=FTEMP);
3147 }else{
3148 rth=get_reg(i_regmap,rs2[i]|64);
3149 rt=get_reg(i_regmap,r=rs2[i]);
3150 }
3151 assert(rs>=0);
3152 assert(rt>=0);
b96d3df7 3153#ifdef PCSX
3154 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
3155 int reglist2=reglist|(1<<rs)|(1<<rt);
3156 for(rtmp=0;rtmp<=12;rtmp++) {
3157 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
3158 temp=rtmp; break;
3159 }
3160 }
3161 if(temp==-1) {
3162 save_regs(reglist);
3163 regs_saved=1;
3164 for(rtmp=0;rtmp<=3;rtmp++)
3165 if(rtmp!=rs&&rtmp!=rt)
3166 {temp=rtmp;break;}
3167 }
3168 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
3169 temp2=3;
3170 emit_readword((int)&mem_wtab,temp);
3171 emit_shrimm(rs,12,temp2);
3172 emit_readword_dualindexedx4(temp,temp2,temp2);
3173 emit_lsls_imm(temp2,1,temp2);
3174 switch(type) {
3175 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
3176 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
3177 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
3178 default: assert(0);
3179 }
3180 if(regs_saved) {
3181 restore_jump=(int)out;
3182 emit_jcc(0); // jump to reg restore
3183 }
3184 else
3185 emit_jcc(stubs[n][2]); // return address (invcode check)
3186
3187 if(!regs_saved)
3188 save_regs(reglist);
3189 int handler=0;
3190 switch(type) {
3191 case STOREB_STUB: handler=(int)jump_handler_write8; break;
3192 case STOREH_STUB: handler=(int)jump_handler_write16; break;
3193 case STOREW_STUB: handler=(int)jump_handler_write32; break;
3194 }
3195 assert(handler!=0);
3196 pass_args(rs,rt);
3197 if(temp2!=3)
3198 emit_mov(temp2,3);
3199 int cc=get_reg(i_regmap,CCREG);
3200 if(cc<0)
3201 emit_loadreg(CCREG,2);
2573466a 3202 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3203 // returns new cycle_count
3204 emit_call(handler);
2573466a 3205 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3206 if(cc<0)
3207 emit_storereg(CCREG,2);
3208 if(restore_jump)
3209 set_jump_target(restore_jump,(int)out);
3210 restore_regs(reglist);
3211 ra=stubs[n][2];
3212 if(!restore_jump) ra+=4*3; // skip invcode check
3213 emit_jmp(ra);
3214#else // if !PCSX
57871462 3215 if(addr<0) addr=get_reg(i_regmap,-1);
3216 assert(addr>=0);
3217 int ftable=0;
3218 if(type==STOREB_STUB)
3219 ftable=(int)writememb;
3220 if(type==STOREH_STUB)
3221 ftable=(int)writememh;
3222 if(type==STOREW_STUB)
3223 ftable=(int)writemem;
24385cae 3224#ifndef FORCE32
57871462 3225 if(type==STORED_STUB)
3226 ftable=(int)writememd;
24385cae 3227#endif
3228 assert(ftable!=0);
57871462 3229 emit_writeword(rs,(int)&address);
3230 //emit_shrimm(rs,16,rs);
3231 //emit_movmem_indexedx4(ftable,rs,rs);
3232 if(type==STOREB_STUB)
3233 emit_writebyte(rt,(int)&byte);
3234 if(type==STOREH_STUB)
3235 emit_writehword(rt,(int)&hword);
3236 if(type==STOREW_STUB)
3237 emit_writeword(rt,(int)&word);
3238 if(type==STORED_STUB) {
3d624f89 3239#ifndef FORCE32
57871462 3240 emit_writeword(rt,(int)&dword);
3241 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 3242#else
3243 printf("STORED_STUB\n");
3244#endif
57871462 3245 }
3246 //emit_pusha();
3247 save_regs(reglist);
97a238a6 3248#ifndef PCSX
57871462 3249 ds=i_regs!=&regs[i];
3250 int real_rs=get_reg(i_regmap,rs1[i]);
3251 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3252 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3253 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3254 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3255#endif
57871462 3256 emit_shrimm(rs,16,1);
3257 int cc=get_reg(i_regmap,CCREG);
3258 if(cc<0) {
3259 emit_loadreg(CCREG,2);
3260 }
3261 emit_movimm(ftable,0);
3262 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3263#ifndef PCSX
57871462 3264 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 3265#endif
57871462 3266 //emit_readword((int)&last_count,temp);
3267 //emit_addimm(cc,2*stubs[n][5]+2,cc);
3268 //emit_add(cc,temp,cc);
3269 //emit_writeword(cc,(int)&Count);
3270 emit_call((int)&indirect_jump_indexed);
3271 //emit_callreg(rs);
3272 emit_readword((int)&Count,HOST_TEMPREG);
3273 emit_readword((int)&next_interupt,2);
3274 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3275 emit_writeword(2,(int)&last_count);
3276 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3277 if(cc<0) {
3278 emit_storereg(CCREG,HOST_TEMPREG);
3279 }
3280 //emit_popa();
3281 restore_regs(reglist);
3282 //if((cc=get_reg(regmap,CCREG))>=0) {
3283 // emit_loadreg(CCREG,cc);
3284 //}
3285 emit_jmp(stubs[n][2]); // return address
b96d3df7 3286#endif // !PCSX
57871462 3287}
3288
3289inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3290{
3291 int rs=get_reg(regmap,-1);
3292 int rth=get_reg(regmap,target|64);
3293 int rt=get_reg(regmap,target);
3294 assert(rs>=0);
3295 assert(rt>=0);
cbbab9cd 3296#ifdef PCSX
b96d3df7 3297 u_int handler,host_addr=0;
b96d3df7 3298 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
3299 if (handler==0) {
13e35c04 3300 if(addr!=host_addr)
3301 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 3302 switch(type) {
3303 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
3304 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
3305 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
3306 default: assert(0);
3307 }
3308 return;
3309 }
3310
3311 // call a memhandler
3312 save_regs(reglist);
13e35c04 3313 pass_args(rs,rt);
b96d3df7 3314 int cc=get_reg(regmap,CCREG);
3315 if(cc<0)
3316 emit_loadreg(CCREG,2);
2573466a 3317 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 3318 emit_movimm(handler,3);
3319 // returns new cycle_count
3320 emit_call((int)jump_handler_write_h);
2573466a 3321 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 3322 if(cc<0)
3323 emit_storereg(CCREG,2);
3324 restore_regs(reglist);
3325#else // if !pcsx
57871462 3326 int ftable=0;
3327 if(type==STOREB_STUB)
3328 ftable=(int)writememb;
3329 if(type==STOREH_STUB)
3330 ftable=(int)writememh;
3331 if(type==STOREW_STUB)
3332 ftable=(int)writemem;
24385cae 3333#ifndef FORCE32
57871462 3334 if(type==STORED_STUB)
3335 ftable=(int)writememd;
24385cae 3336#endif
3337 assert(ftable!=0);
57871462 3338 emit_writeword(rs,(int)&address);
3339 //emit_shrimm(rs,16,rs);
3340 //emit_movmem_indexedx4(ftable,rs,rs);
3341 if(type==STOREB_STUB)
3342 emit_writebyte(rt,(int)&byte);
3343 if(type==STOREH_STUB)
3344 emit_writehword(rt,(int)&hword);
3345 if(type==STOREW_STUB)
3346 emit_writeword(rt,(int)&word);
3347 if(type==STORED_STUB) {
3d624f89 3348#ifndef FORCE32
57871462 3349 emit_writeword(rt,(int)&dword);
3350 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 3351#else
3352 printf("STORED_STUB\n");
3353#endif
57871462 3354 }
3355 //emit_pusha();
3356 save_regs(reglist);
0c1fe38b 3357#ifndef PCSX
3358 // rearmed note: load_all_consts prevents BIOS boot, some bug?
3359 if((signed int)addr>=(signed int)0xC0000000) {
3360 // Theoretically we can have a pagefault here, if the TLB has never
3361 // been enabled and the address is outside the range 80000000..BFFFFFFF
3362 // Write out the registers so the pagefault can be handled. This is
3363 // a very rare case and likely represents a bug.
3364 int ds=regmap!=regs[i].regmap;
3365 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3366 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3367 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3368 }
3369#endif
57871462 3370 //emit_shrimm(rs,16,1);
3371 int cc=get_reg(regmap,CCREG);
3372 if(cc<0) {
3373 emit_loadreg(CCREG,2);
3374 }
3375 //emit_movimm(ftable,0);
3376 emit_movimm(((u_int *)ftable)[addr>>16],0);
3377 //emit_readword((int)&last_count,12);
2573466a 3378 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3379#ifndef PCSX
57871462 3380 if((signed int)addr>=(signed int)0xC0000000) {
3381 // Pagefault address
3382 int ds=regmap!=regs[i].regmap;
3383 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3384 }
f51dc36c 3385#endif
57871462 3386 //emit_add(12,2,2);
3387 //emit_writeword(2,(int)&Count);
3388 //emit_call(((u_int *)ftable)[addr>>16]);
3389 emit_call((int)&indirect_jump);
3390 emit_readword((int)&Count,HOST_TEMPREG);
3391 emit_readword((int)&next_interupt,2);
2573466a 3392 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3393 emit_writeword(2,(int)&last_count);
3394 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3395 if(cc<0) {
3396 emit_storereg(CCREG,HOST_TEMPREG);
3397 }
3398 //emit_popa();
3399 restore_regs(reglist);
b96d3df7 3400#endif
57871462 3401}
3402
3403do_unalignedwritestub(int n)
3404{
b7918751 3405 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3406 literal_pool(256);
57871462 3407 set_jump_target(stubs[n][1],(int)out);
b7918751 3408
3409 int i=stubs[n][3];
3410 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3411 int addr=stubs[n][5];
3412 u_int reglist=stubs[n][7];
3413 signed char *i_regmap=i_regs->regmap;
3414 int temp2=get_reg(i_regmap,FTEMP);
3415 int rt;
3416 int ds, real_rs;
3417 rt=get_reg(i_regmap,rs2[i]);
3418 assert(rt>=0);
3419 assert(addr>=0);
3420 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3421 reglist|=(1<<addr);
3422 reglist&=~(1<<temp2);
3423
b96d3df7 3424#if 1
3425 // don't bother with it and call write handler
3426 save_regs(reglist);
3427 pass_args(addr,rt);
3428 int cc=get_reg(i_regmap,CCREG);
3429 if(cc<0)
3430 emit_loadreg(CCREG,2);
2573466a 3431 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3432 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 3433 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3434 if(cc<0)
3435 emit_storereg(CCREG,2);
3436 restore_regs(reglist);
3437 emit_jmp(stubs[n][2]); // return address
3438#else
b7918751 3439 emit_andimm(addr,0xfffffffc,temp2);
3440 emit_writeword(temp2,(int)&address);
3441
3442 save_regs(reglist);
97a238a6 3443#ifndef PCSX
b7918751 3444 ds=i_regs!=&regs[i];
3445 real_rs=get_reg(i_regmap,rs1[i]);
3446 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3447 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3448 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3449 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3450#endif
b7918751 3451 emit_shrimm(addr,16,1);
3452 int cc=get_reg(i_regmap,CCREG);
3453 if(cc<0) {
3454 emit_loadreg(CCREG,2);
3455 }
3456 emit_movimm((u_int)readmem,0);
3457 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3458#ifndef PCSX
3459 // pagefault address
3460 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3461#endif
b7918751 3462 emit_call((int)&indirect_jump_indexed);
3463 restore_regs(reglist);
3464
3465 emit_readword((int)&readmem_dword,temp2);
3466 int temp=addr; //hmh
3467 emit_shlimm(addr,3,temp);
3468 emit_andimm(temp,24,temp);
3469#ifdef BIG_ENDIAN_MIPS
3470 if (opcode[i]==0x2e) // SWR
3471#else
3472 if (opcode[i]==0x2a) // SWL
3473#endif
3474 emit_xorimm(temp,24,temp);
3475 emit_movimm(-1,HOST_TEMPREG);
55439448 3476 if (opcode[i]==0x2a) { // SWL
b7918751 3477 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3478 emit_orrshr(rt,temp,temp2);
3479 }else{
3480 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3481 emit_orrshl(rt,temp,temp2);
3482 }
3483 emit_readword((int)&address,addr);
3484 emit_writeword(temp2,(int)&word);
3485 //save_regs(reglist); // don't need to, no state changes
3486 emit_shrimm(addr,16,1);
3487 emit_movimm((u_int)writemem,0);
3488 //emit_call((int)&indirect_jump_indexed);
3489 emit_mov(15,14);
3490 emit_readword_dualindexedx4(0,1,15);
3491 emit_readword((int)&Count,HOST_TEMPREG);
3492 emit_readword((int)&next_interupt,2);
3493 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3494 emit_writeword(2,(int)&last_count);
3495 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3496 if(cc<0) {
3497 emit_storereg(CCREG,HOST_TEMPREG);
3498 }
3499 restore_regs(reglist);
57871462 3500 emit_jmp(stubs[n][2]); // return address
b96d3df7 3501#endif
57871462 3502}
3503
3504void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3505{
3506 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3507}
3508
3509do_invstub(int n)
3510{
3511 literal_pool(20);
3512 u_int reglist=stubs[n][3];
3513 set_jump_target(stubs[n][1],(int)out);
3514 save_regs(reglist);
3515 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3516 emit_call((int)&invalidate_addr);
3517 restore_regs(reglist);
3518 emit_jmp(stubs[n][2]); // return address
3519}
3520
3521int do_dirty_stub(int i)
3522{
3523 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3524 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3525 #ifdef PCSX
3526 addr=(u_int)source;
3527 #endif
57871462 3528 // Careful about the code output here, verify_dirty needs to parse it.
3529 #ifdef ARMv5_ONLY
ac545b3a 3530 emit_loadlp(addr,1);
57871462 3531 emit_loadlp((int)copy,2);
3532 emit_loadlp(slen*4,3);
3533 #else
ac545b3a 3534 emit_movw(addr&0x0000FFFF,1);
57871462 3535 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3536 emit_movt(addr&0xFFFF0000,1);
57871462 3537 emit_movt(((u_int)copy)&0xFFFF0000,2);
3538 emit_movw(slen*4,3);
3539 #endif
3540 emit_movimm(start+i*4,0);
3541 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3542 int entry=(int)out;
3543 load_regs_entry(i);
3544 if(entry==(int)out) entry=instr_addr[i];
3545 emit_jmp(instr_addr[i]);
3546 return entry;
3547}
3548
3549void do_dirty_stub_ds()
3550{
3551 // Careful about the code output here, verify_dirty needs to parse it.
3552 #ifdef ARMv5_ONLY
3553 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3554 emit_loadlp((int)copy,2);
3555 emit_loadlp(slen*4,3);
3556 #else
3557 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3558 emit_movw(((u_int)copy)&0x0000FFFF,2);
3559 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3560 emit_movt(((u_int)copy)&0xFFFF0000,2);
3561 emit_movw(slen*4,3);
3562 #endif
3563 emit_movimm(start+1,0);
3564 emit_call((int)&verify_code_ds);
3565}
3566
3567do_cop1stub(int n)
3568{
3569 literal_pool(256);
3570 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3571 set_jump_target(stubs[n][1],(int)out);
3572 int i=stubs[n][3];
3d624f89 3573// int rs=stubs[n][4];
57871462 3574 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3575 int ds=stubs[n][6];
3576 if(!ds) {
3577 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3578 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3579 }
3580 //else {printf("fp exception in delay slot\n");}
3581 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3582 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3583 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 3584 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 3585 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3586}
3587
63cb0298 3588#ifndef DISABLE_TLB
3589
57871462 3590/* TLB */
3591
3592int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3593{
3594 if(c) {
3595 if((signed int)addr>=(signed int)0xC0000000) {
3596 // address_generation already loaded the const
3597 emit_readword_dualindexedx4(FP,map,map);
3598 }
3599 else
3600 return -1; // No mapping
3601 }
3602 else {
3603 assert(s!=map);
3604 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3605 emit_addsr12(map,s,map);
3606 // Schedule this while we wait on the load
3607 //if(x) emit_xorimm(s,x,ar);
3608 if(shift>=0) emit_shlimm(s,3,shift);
3609 if(~a) emit_andimm(s,a,ar);
3610 emit_readword_dualindexedx4(FP,map,map);
3611 }
3612 return map;
3613}
3614int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3615{
3616 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3617 emit_test(map,map);
3618 *jaddr=(int)out;
3619 emit_js(0);
3620 }
3621 return map;
3622}
3623
3624int gen_tlb_addr_r(int ar, int map) {
3625 if(map>=0) {
3626 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3627 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3628 }
3629}
3630
3631int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3632{
3633 if(c) {
3634 if(addr<0x80800000||addr>=0xC0000000) {
3635 // address_generation already loaded the const
3636 emit_readword_dualindexedx4(FP,map,map);
3637 }
3638 else
3639 return -1; // No mapping
3640 }
3641 else {
3642 assert(s!=map);
3643 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3644 emit_addsr12(map,s,map);
3645 // Schedule this while we wait on the load
3646 //if(x) emit_xorimm(s,x,ar);
3647 emit_readword_dualindexedx4(FP,map,map);
3648 }
3649 return map;
3650}
3651int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3652{
3653 if(!c||addr<0x80800000||addr>=0xC0000000) {
3654 emit_testimm(map,0x40000000);
3655 *jaddr=(int)out;
3656 emit_jne(0);
3657 }
3658}
3659
3660int gen_tlb_addr_w(int ar, int map) {
3661 if(map>=0) {
3662 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3663 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3664 }
3665}
3666
3667// Generate the address of the memory_map entry, relative to dynarec_local
3668generate_map_const(u_int addr,int reg) {
3669 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3670 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3671}
3672
63cb0298 3673#else
3674
3675static int do_tlb_r() { return 0; }
3676static int do_tlb_r_branch() { return 0; }
3677static int gen_tlb_addr_r() { return 0; }
3678static int do_tlb_w() { return 0; }
3679static int do_tlb_w_branch() { return 0; }
3680static int gen_tlb_addr_w() { return 0; }
3681
3682#endif // DISABLE_TLB
3683
57871462 3684/* Special assem */
3685
3686void shift_assemble_arm(int i,struct regstat *i_regs)
3687{
3688 if(rt1[i]) {
3689 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3690 {
3691 signed char s,t,shift;
3692 t=get_reg(i_regs->regmap,rt1[i]);
3693 s=get_reg(i_regs->regmap,rs1[i]);
3694 shift=get_reg(i_regs->regmap,rs2[i]);
3695 if(t>=0){
3696 if(rs1[i]==0)
3697 {
3698 emit_zeroreg(t);
3699 }
3700 else if(rs2[i]==0)
3701 {
3702 assert(s>=0);
3703 if(s!=t) emit_mov(s,t);
3704 }
3705 else
3706 {
3707 emit_andimm(shift,31,HOST_TEMPREG);
3708 if(opcode2[i]==4) // SLLV
3709 {
3710 emit_shl(s,HOST_TEMPREG,t);
3711 }
3712 if(opcode2[i]==6) // SRLV
3713 {
3714 emit_shr(s,HOST_TEMPREG,t);
3715 }
3716 if(opcode2[i]==7) // SRAV
3717 {
3718 emit_sar(s,HOST_TEMPREG,t);
3719 }
3720 }
3721 }
3722 } else { // DSLLV/DSRLV/DSRAV
3723 signed char sh,sl,th,tl,shift;
3724 th=get_reg(i_regs->regmap,rt1[i]|64);
3725 tl=get_reg(i_regs->regmap,rt1[i]);
3726 sh=get_reg(i_regs->regmap,rs1[i]|64);
3727 sl=get_reg(i_regs->regmap,rs1[i]);
3728 shift=get_reg(i_regs->regmap,rs2[i]);
3729 if(tl>=0){
3730 if(rs1[i]==0)
3731 {
3732 emit_zeroreg(tl);
3733 if(th>=0) emit_zeroreg(th);
3734 }
3735 else if(rs2[i]==0)
3736 {
3737 assert(sl>=0);
3738 if(sl!=tl) emit_mov(sl,tl);
3739 if(th>=0&&sh!=th) emit_mov(sh,th);
3740 }
3741 else
3742 {
3743 // FIXME: What if shift==tl ?
3744 assert(shift!=tl);
3745 int temp=get_reg(i_regs->regmap,-1);
3746 int real_th=th;
3747 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3748 assert(sl>=0);
3749 assert(sh>=0);
3750 emit_andimm(shift,31,HOST_TEMPREG);
3751 if(opcode2[i]==0x14) // DSLLV
3752 {
3753 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3754 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3755 emit_orrshr(sl,HOST_TEMPREG,th);
3756 emit_andimm(shift,31,HOST_TEMPREG);
3757 emit_testimm(shift,32);
3758 emit_shl(sl,HOST_TEMPREG,tl);
3759 if(th>=0) emit_cmovne_reg(tl,th);
3760 emit_cmovne_imm(0,tl);
3761 }
3762 if(opcode2[i]==0x16) // DSRLV
3763 {
3764 assert(th>=0);
3765 emit_shr(sl,HOST_TEMPREG,tl);
3766 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3767 emit_orrshl(sh,HOST_TEMPREG,tl);
3768 emit_andimm(shift,31,HOST_TEMPREG);
3769 emit_testimm(shift,32);
3770 emit_shr(sh,HOST_TEMPREG,th);
3771 emit_cmovne_reg(th,tl);
3772 if(real_th>=0) emit_cmovne_imm(0,th);
3773 }
3774 if(opcode2[i]==0x17) // DSRAV
3775 {
3776 assert(th>=0);
3777 emit_shr(sl,HOST_TEMPREG,tl);
3778 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3779 if(real_th>=0) {
3780 assert(temp>=0);
3781 emit_sarimm(th,31,temp);
3782 }
3783 emit_orrshl(sh,HOST_TEMPREG,tl);
3784 emit_andimm(shift,31,HOST_TEMPREG);
3785 emit_testimm(shift,32);
3786 emit_sar(sh,HOST_TEMPREG,th);
3787 emit_cmovne_reg(th,tl);
3788 if(real_th>=0) emit_cmovne_reg(temp,th);
3789 }
3790 }
3791 }
3792 }
3793 }
3794}
ffb0b9e0 3795
3796#ifdef PCSX
3797static void speculate_mov(int rs,int rt)
3798{
3799 if(rt!=0) {
3800 smrv_strong_next|=1<<rt;
3801 smrv[rt]=smrv[rs];
3802 }
3803}
3804
3805static void speculate_mov_weak(int rs,int rt)
3806{
3807 if(rt!=0) {
3808 smrv_weak_next|=1<<rt;
3809 smrv[rt]=smrv[rs];
3810 }
3811}
3812
3813static void speculate_register_values(int i)
3814{
3815 if(i==0) {
3816 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3817 // gp,sp are likely to stay the same throughout the block
3818 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3819 smrv_weak_next=~smrv_strong_next;
3820 //printf(" llr %08x\n", smrv[4]);
3821 }
3822 smrv_strong=smrv_strong_next;
3823 smrv_weak=smrv_weak_next;
3824 switch(itype[i]) {
3825 case ALU:
3826 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3827 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3828 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3829 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3830 else {
3831 smrv_strong_next&=~(1<<rt1[i]);
3832 smrv_weak_next&=~(1<<rt1[i]);
3833 }
3834 break;
3835 case SHIFTIMM:
3836 smrv_strong_next&=~(1<<rt1[i]);
3837 smrv_weak_next&=~(1<<rt1[i]);
3838 // fallthrough
3839 case IMM16:
3840 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3841 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3842 if(hr>=0) {
3843 if(get_final_value(hr,i,&value))
3844 smrv[rt1[i]]=value;
3845 else smrv[rt1[i]]=constmap[i][hr];
3846 smrv_strong_next|=1<<rt1[i];
3847 }
3848 }
3849 else {
3850 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3851 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3852 }
3853 break;
3854 case LOAD:
3855 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3856 // special case for BIOS
3857 smrv[rt1[i]]=0xa0000000;
3858 smrv_strong_next|=1<<rt1[i];
3859 break;
3860 }
3861 // fallthrough
3862 case SHIFT:
3863 case LOADLR:
3864 case MOV:
3865 smrv_strong_next&=~(1<<rt1[i]);
3866 smrv_weak_next&=~(1<<rt1[i]);
3867 break;
3868 case COP0:
3869 case COP2:
3870 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3871 smrv_strong_next&=~(1<<rt1[i]);
3872 smrv_weak_next&=~(1<<rt1[i]);
3873 }
3874 break;
3875 case C2LS:
3876 if (opcode[i]==0x32) { // LWC2
3877 smrv_strong_next&=~(1<<rt1[i]);
3878 smrv_weak_next&=~(1<<rt1[i]);
3879 }
3880 break;
3881 }
3882#if 0
3883 int r=4;
3884 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3885 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3886#endif
3887}
3888
3889enum {
3890 MTYPE_8000 = 0,
3891 MTYPE_8020,
3892 MTYPE_0000,
3893 MTYPE_A000,
3894 MTYPE_1F80,
3895};
3896
3897static int get_ptr_mem_type(u_int a)
3898{
3899 if(a < 0x00200000) {
3900 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3901 // return wrong, must use memhandler for BIOS self-test to pass
3902 // 007 does similar stuff from a00 mirror, weird stuff
3903 return MTYPE_8000;
3904 return MTYPE_0000;
3905 }
3906 if(0x1f800000 <= a && a < 0x1f801000)
3907 return MTYPE_1F80;
3908 if(0x80200000 <= a && a < 0x80800000)
3909 return MTYPE_8020;
3910 if(0xa0000000 <= a && a < 0xa0200000)
3911 return MTYPE_A000;
3912 return MTYPE_8000;
3913}
3914#endif
3915
3916static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3917{
3918 int jaddr,type=0;
3919
3920#ifdef PCSX
3921 int mr=rs1[i];
3922 if(((smrv_strong|smrv_weak)>>mr)&1) {
3923 type=get_ptr_mem_type(smrv[mr]);
3924 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3925 }
3926 else {
3927 // use the mirror we are running on
3928 type=get_ptr_mem_type(start);
3929 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3930 }
3931
3932 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3933 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3934 addr=*addr_reg_override=HOST_TEMPREG;
3935 type=0;
3936 }
3937 else if(type==MTYPE_0000) { // RAM 0 mirror
3938 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3939 addr=*addr_reg_override=HOST_TEMPREG;
3940 type=0;
3941 }
3942 else if(type==MTYPE_A000) { // RAM A mirror
3943 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3944 addr=*addr_reg_override=HOST_TEMPREG;
3945 type=0;
3946 }
3947 else if(type==MTYPE_1F80) { // scratchpad
3948 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3949 emit_cmpimm(HOST_TEMPREG,0x1000);
3950 jaddr=(int)out;
3951 emit_jc(0);
3952 }
3953#endif
3954
3955 if(type==0)
3956 {
3957 emit_cmpimm(addr,RAM_SIZE);
3958 jaddr=(int)out;
3959 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3960 // Hint to branch predictor that the branch is unlikely to be taken
3961 if(rs1[i]>=28)
3962 emit_jno_unlikely(0);
3963 else
3964 #endif
3965 emit_jno(0);
3966 }
3967
3968 return jaddr;
3969}
3970
57871462 3971#define shift_assemble shift_assemble_arm
3972
3973void loadlr_assemble_arm(int i,struct regstat *i_regs)
3974{
3975 int s,th,tl,temp,temp2,addr,map=-1;
3976 int offset;
3977 int jaddr=0;
af4ee1fe 3978 int memtarget=0,c=0;
ffb0b9e0 3979 int fastload_reg_override=0;
57871462 3980 u_int hr,reglist=0;
3981 th=get_reg(i_regs->regmap,rt1[i]|64);
3982 tl=get_reg(i_regs->regmap,rt1[i]);
3983 s=get_reg(i_regs->regmap,rs1[i]);
3984 temp=get_reg(i_regs->regmap,-1);
3985 temp2=get_reg(i_regs->regmap,FTEMP);
3986 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3987 assert(addr<0);
3988 offset=imm[i];
3989 for(hr=0;hr<HOST_REGS;hr++) {
3990 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3991 }
3992 reglist|=1<<temp;
3993 if(offset||s<0||c) addr=temp2;
3994 else addr=s;
3995 if(s>=0) {
3996 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3997 if(c) {
3998 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3999 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
4000 }
57871462 4001 }
535d208a 4002 if(!using_tlb) {
4003 if(!c) {
4004 #ifdef RAM_OFFSET
4005 map=get_reg(i_regs->regmap,ROREG);
4006 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
4007 #endif
4008 emit_shlimm(addr,3,temp);
4009 if (opcode[i]==0x22||opcode[i]==0x26) {
4010 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 4011 }else{
535d208a 4012 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 4013 }
ffb0b9e0 4014 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
535d208a 4015 }
4016 else {
4017 if (opcode[i]==0x22||opcode[i]==0x26) {
4018 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4019 }else{
4020 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4021 }
57871462 4022 }
535d208a 4023 }else{ // using tlb
4024 int a;
4025 if(c) {
4026 a=-1;
4027 }else if (opcode[i]==0x22||opcode[i]==0x26) {
4028 a=0xFFFFFFFC; // LWL/LWR
4029 }else{
4030 a=0xFFFFFFF8; // LDL/LDR
4031 }
4032 map=get_reg(i_regs->regmap,TLREG);
4033 assert(map>=0);
ea3d2e6e 4034 reglist&=~(1<<map);
535d208a 4035 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
4036 if(c) {
4037 if (opcode[i]==0x22||opcode[i]==0x26) {
4038 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4039 }else{
4040 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4041 }
535d208a 4042 }
4043 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
4044 }
4045 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
4046 if(!c||memtarget) {
ffb0b9e0 4047 int a=temp2;
4048 if(fastload_reg_override) a=fastload_reg_override;
535d208a 4049 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 4050 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 4051 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4052 }
4053 else
4054 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
4055 if(rt1[i]) {
4056 assert(tl>=0);
57871462 4057 emit_andimm(temp,24,temp);
2002a1db 4058#ifdef BIG_ENDIAN_MIPS
4059 if (opcode[i]==0x26) // LWR
4060#else
4061 if (opcode[i]==0x22) // LWL
4062#endif
4063 emit_xorimm(temp,24,temp);
57871462 4064 emit_movimm(-1,HOST_TEMPREG);
4065 if (opcode[i]==0x26) {
4066 emit_shr(temp2,temp,temp2);
4067 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
4068 }else{
4069 emit_shl(temp2,temp,temp2);
4070 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
4071 }
4072 emit_or(temp2,tl,tl);
57871462 4073 }
535d208a 4074 //emit_storereg(rt1[i],tl); // DEBUG
4075 }
4076 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 4077 // FIXME: little endian, fastload_reg_override
535d208a 4078 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
4079 if(!c||memtarget) {
4080 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
4081 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
4082 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
4083 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4084 }
4085 else
4086 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
4087 if(rt1[i]) {
4088 assert(th>=0);
4089 assert(tl>=0);
57871462 4090 emit_testimm(temp,32);
4091 emit_andimm(temp,24,temp);
4092 if (opcode[i]==0x1A) { // LDL
4093 emit_rsbimm(temp,32,HOST_TEMPREG);
4094 emit_shl(temp2h,temp,temp2h);
4095 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
4096 emit_movimm(-1,HOST_TEMPREG);
4097 emit_shl(temp2,temp,temp2);
4098 emit_cmove_reg(temp2h,th);
4099 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
4100 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
4101 emit_orreq(temp2,tl,tl);
4102 emit_orrne(temp2,th,th);
4103 }
4104 if (opcode[i]==0x1B) { // LDR
4105 emit_xorimm(temp,24,temp);
4106 emit_rsbimm(temp,32,HOST_TEMPREG);
4107 emit_shr(temp2,temp,temp2);
4108 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
4109 emit_movimm(-1,HOST_TEMPREG);
4110 emit_shr(temp2h,temp,temp2h);
4111 emit_cmovne_reg(temp2,tl);
4112 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
4113 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
4114 emit_orrne(temp2h,th,th);
4115 emit_orreq(temp2h,tl,tl);
4116 }
4117 }
4118 }
4119}
4120#define loadlr_assemble loadlr_assemble_arm
4121
4122void cop0_assemble(int i,struct regstat *i_regs)
4123{
4124 if(opcode2[i]==0) // MFC0
4125 {
4126 signed char t=get_reg(i_regs->regmap,rt1[i]);
4127 char copr=(source[i]>>11)&0x1f;
4128 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 4129 if(t>=0&&rt1[i]!=0) {
7139f3c8 4130#ifdef MUPEN64
57871462 4131 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4132 emit_movimm((source[i]>>11)&0x1f,1);
4133 emit_writeword(0,(int)&PC);
4134 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
4135 if(copr==9) {
4136 emit_readword((int)&last_count,ECX);
4137 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
4138 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4139 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4140 emit_writeword(HOST_CCREG,(int)&Count);
4141 }
4142 emit_call((int)MFC0);
4143 emit_readword((int)&readmem_dword,t);
7139f3c8 4144#else
4145 emit_readword((int)&reg_cop0+copr*4,t);
4146#endif
57871462 4147 }
4148 }
4149 else if(opcode2[i]==4) // MTC0
4150 {
4151 signed char s=get_reg(i_regs->regmap,rs1[i]);
4152 char copr=(source[i]>>11)&0x1f;
4153 assert(s>=0);
63cb0298 4154#ifdef MUPEN64
57871462 4155 emit_writeword(s,(int)&readmem_dword);
4156 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
4157 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4158 emit_movimm((source[i]>>11)&0x1f,1);
4159 emit_writeword(0,(int)&PC);
4160 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
63cb0298 4161#else
4162 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 4163#endif
4164 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 4165 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 4166 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 4167 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 4168 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4169 emit_writeword(HOST_CCREG,(int)&Count);
4170 }
4171 // What a mess. The status register (12) can enable interrupts,
4172 // so needs a special case to handle a pending interrupt.
4173 // The interrupt must be taken immediately, because a subsequent
4174 // instruction might disable interrupts again.
7139f3c8 4175 if(copr==12||copr==13) {
fca1aef2 4176#ifdef PCSX
4177 if (is_delayslot) {
4178 // burn cycles to cause cc_interrupt, which will
4179 // reschedule next_interupt. Relies on CCREG from above.
4180 assem_debug("MTC0 DS %d\n", copr);
4181 emit_writeword(HOST_CCREG,(int)&last_count);
4182 emit_movimm(0,HOST_CCREG);
4183 emit_storereg(CCREG,HOST_CCREG);
63cb0298 4184 if(s!=1)
4185 emit_mov(s,1);
fca1aef2 4186 emit_movimm(copr,0);
4187 emit_call((int)pcsx_mtc0_ds);
4188 return;
4189 }
4190#endif
63cb0298 4191 emit_movimm(start+i*4+4,HOST_TEMPREG);
4192 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
4193 emit_movimm(0,HOST_TEMPREG);
4194 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 4195 }
4196 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
4197 //else
fca1aef2 4198#ifdef PCSX
63cb0298 4199 if(s!=1)
4200 emit_mov(s,1);
fca1aef2 4201 emit_movimm(copr,0);
4202 emit_call((int)pcsx_mtc0);
4203#else
57871462 4204 emit_call((int)MTC0);
fca1aef2 4205#endif
7139f3c8 4206 if(copr==9||copr==11||copr==12||copr==13) {
57871462 4207 emit_readword((int)&Count,HOST_CCREG);
4208 emit_readword((int)&next_interupt,ECX);
2573466a 4209 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4210 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
4211 emit_writeword(ECX,(int)&last_count);
4212 emit_storereg(CCREG,HOST_CCREG);
4213 }
7139f3c8 4214 if(copr==12||copr==13) {
57871462 4215 assert(!is_delayslot);
4216 emit_readword((int)&pending_exception,14);
4217 }
4218 emit_loadreg(rs1[i],s);
4219 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
4220 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 4221 if(copr==12||copr==13) {
57871462 4222 emit_test(14,14);
4223 emit_jne((int)&do_interrupt);
4224 }
4225 cop1_usable=0;
4226 }
4227 else
4228 {
4229 assert(opcode2[i]==0x10);
3d624f89 4230#ifndef DISABLE_TLB
57871462 4231 if((source[i]&0x3f)==0x01) // TLBR
4232 emit_call((int)TLBR);
4233 if((source[i]&0x3f)==0x02) // TLBWI
4234 emit_call((int)TLBWI_new);
4235 if((source[i]&0x3f)==0x06) { // TLBWR
4236 // The TLB entry written by TLBWR is dependent on the count,
4237 // so update the cycle count
4238 emit_readword((int)&last_count,ECX);
4239 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
4240 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4241 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4242 emit_writeword(HOST_CCREG,(int)&Count);
4243 emit_call((int)TLBWR_new);
4244 }
4245 if((source[i]&0x3f)==0x08) // TLBP
4246 emit_call((int)TLBP);
3d624f89 4247#endif
576bbd8f 4248#ifdef PCSX
4249 if((source[i]&0x3f)==0x10) // RFE
4250 {
4251 emit_readword((int)&Status,0);
4252 emit_andimm(0,0x3c,1);
4253 emit_andimm(0,~0xf,0);
4254 emit_orrshr_imm(1,2,0);
4255 emit_writeword(0,(int)&Status);
4256 }
4257#else
57871462 4258 if((source[i]&0x3f)==0x18) // ERET
4259 {
4260 int count=ccadj[i];
4261 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2573466a 4262 emit_addimm(HOST_CCREG,CLOCK_ADJUST(count),HOST_CCREG); // TODO: Should there be an extra cycle here?
57871462 4263 emit_jmp((int)jump_eret);
4264 }
576bbd8f 4265#endif
57871462 4266 }
4267}
4268
b9b61529 4269static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
4270{
4271 switch (copr) {
4272 case 1:
4273 case 3:
4274 case 5:
4275 case 8:
4276 case 9:
4277 case 10:
4278 case 11:
4279 emit_readword((int)&reg_cop2d[copr],tl);
4280 emit_signextend16(tl,tl);
4281 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
4282 break;
4283 case 7:
4284 case 16:
4285 case 17:
4286 case 18:
4287 case 19:
4288 emit_readword((int)&reg_cop2d[copr],tl);
4289 emit_andimm(tl,0xffff,tl);
4290 emit_writeword(tl,(int)&reg_cop2d[copr]);
4291 break;
4292 case 15:
4293 emit_readword((int)&reg_cop2d[14],tl); // SXY2
4294 emit_writeword(tl,(int)&reg_cop2d[copr]);
4295 break;
4296 case 28:
b9b61529 4297 case 29:
4298 emit_readword((int)&reg_cop2d[9],temp);
4299 emit_testimm(temp,0x8000); // do we need this?
4300 emit_andimm(temp,0xf80,temp);
4301 emit_andne_imm(temp,0,temp);
f70d384d 4302 emit_shrimm(temp,7,tl);
b9b61529 4303 emit_readword((int)&reg_cop2d[10],temp);
4304 emit_testimm(temp,0x8000);
4305 emit_andimm(temp,0xf80,temp);
4306 emit_andne_imm(temp,0,temp);
f70d384d 4307 emit_orrshr_imm(temp,2,tl);
b9b61529 4308 emit_readword((int)&reg_cop2d[11],temp);
4309 emit_testimm(temp,0x8000);
4310 emit_andimm(temp,0xf80,temp);
4311 emit_andne_imm(temp,0,temp);
f70d384d 4312 emit_orrshl_imm(temp,3,tl);
b9b61529 4313 emit_writeword(tl,(int)&reg_cop2d[copr]);
4314 break;
4315 default:
4316 emit_readword((int)&reg_cop2d[copr],tl);
4317 break;
4318 }
4319}
4320
4321static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
4322{
4323 switch (copr) {
4324 case 15:
4325 emit_readword((int)&reg_cop2d[13],temp); // SXY1
4326 emit_writeword(sl,(int)&reg_cop2d[copr]);
4327 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
4328 emit_readword((int)&reg_cop2d[14],temp); // SXY2
4329 emit_writeword(sl,(int)&reg_cop2d[14]);
4330 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
4331 break;
4332 case 28:
4333 emit_andimm(sl,0x001f,temp);
f70d384d 4334 emit_shlimm(temp,7,temp);
b9b61529 4335 emit_writeword(temp,(int)&reg_cop2d[9]);
4336 emit_andimm(sl,0x03e0,temp);
f70d384d 4337 emit_shlimm(temp,2,temp);
b9b61529 4338 emit_writeword(temp,(int)&reg_cop2d[10]);
4339 emit_andimm(sl,0x7c00,temp);
f70d384d 4340 emit_shrimm(temp,3,temp);
b9b61529 4341 emit_writeword(temp,(int)&reg_cop2d[11]);
4342 emit_writeword(sl,(int)&reg_cop2d[28]);
4343 break;
4344 case 30:
4345 emit_movs(sl,temp);
4346 emit_mvnmi(temp,temp);
4347 emit_clz(temp,temp);
4348 emit_writeword(sl,(int)&reg_cop2d[30]);
4349 emit_writeword(temp,(int)&reg_cop2d[31]);
4350 break;
b9b61529 4351 case 31:
4352 break;
4353 default:
4354 emit_writeword(sl,(int)&reg_cop2d[copr]);
4355 break;
4356 }
4357}
4358
4359void cop2_assemble(int i,struct regstat *i_regs)
4360{
4361 u_int copr=(source[i]>>11)&0x1f;
4362 signed char temp=get_reg(i_regs->regmap,-1);
4363 if (opcode2[i]==0) { // MFC2
4364 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4365 if(tl>=0&&rt1[i]!=0)
b9b61529 4366 cop2_get_dreg(copr,tl,temp);
4367 }
4368 else if (opcode2[i]==4) { // MTC2
4369 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4370 cop2_put_dreg(copr,sl,temp);
4371 }
4372 else if (opcode2[i]==2) // CFC2
4373 {
4374 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4375 if(tl>=0&&rt1[i]!=0)
b9b61529 4376 emit_readword((int)&reg_cop2c[copr],tl);
4377 }
4378 else if (opcode2[i]==6) // CTC2
4379 {
4380 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4381 switch(copr) {
4382 case 4:
4383 case 12:
4384 case 20:
4385 case 26:
4386 case 27:
4387 case 29:
4388 case 30:
4389 emit_signextend16(sl,temp);
4390 break;
4391 case 31:
4392 //value = value & 0x7ffff000;
4393 //if (value & 0x7f87e000) value |= 0x80000000;
4394 emit_shrimm(sl,12,temp);
4395 emit_shlimm(temp,12,temp);
4396 emit_testimm(temp,0x7f000000);
4397 emit_testeqimm(temp,0x00870000);
4398 emit_testeqimm(temp,0x0000e000);
4399 emit_orrne_imm(temp,0x80000000,temp);
4400 break;
4401 default:
4402 temp=sl;
4403 break;
4404 }
4405 emit_writeword(temp,(int)&reg_cop2c[copr]);
4406 assert(sl>=0);
4407 }
4408}
4409
054175e9 4410static void c2op_prologue(u_int op,u_int reglist)
4411{
4412 save_regs_all(reglist);
82ed88eb 4413#ifdef PCNT
4414 emit_movimm(op,0);
4415 emit_call((int)pcnt_gte_start);
4416#endif
054175e9 4417 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
4418}
4419
4420static void c2op_epilogue(u_int op,u_int reglist)
4421{
82ed88eb 4422#ifdef PCNT
4423 emit_movimm(op,0);
4424 emit_call((int)pcnt_gte_end);
4425#endif
054175e9 4426 restore_regs_all(reglist);
4427}
4428
4429static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 4430{
4431 signed char temp=get_reg(i_regs->regmap,-1);
4432 u_int c2op=source[i]&0x3f;
4433 u_int hr,reglist=0;
054175e9 4434 int need_flags,need_ir;
b9b61529 4435 for(hr=0;hr<HOST_REGS;hr++) {
4436 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4437 }
b9b61529 4438
4439 if (gte_handlers[c2op]!=NULL) {
bedfea38 4440 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 4441 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
4442 assem_debug("gte unneeded %016llx, need_flags %d, need_ir %d\n",
4443 gte_unneeded[i+1],need_flags,need_ir);
bedfea38 4444#ifdef ARMv5_ONLY
4445 // let's take more risk here
4446 need_flags=need_flags&&gte_reads_flags;
4447#endif
054175e9 4448 switch(c2op) {
4449 case GTE_MVMVA: {
4450 int shift = (source[i] >> 19) & 1;
4451 int v = (source[i] >> 15) & 3;
4452 int cv = (source[i] >> 13) & 3;
4453 int mx = (source[i] >> 17) & 3;
4454 int lm = (source[i] >> 10) & 1;
4455 reglist&=0x10ff; // +{r4-r7}
4456 c2op_prologue(c2op,reglist);
4457 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
4458 if(v<3)
4459 emit_ldrd(v*8,0,4);
4460 else {
4461 emit_movzwl_indexed(9*4,0,4); // gteIR
4462 emit_movzwl_indexed(10*4,0,6);
4463 emit_movzwl_indexed(11*4,0,5);
4464 emit_orrshl_imm(6,16,4);
4465 }
4466 if(mx<3)
4467 emit_addimm(0,32*4+mx*8*4,6);
4468 else
4469 emit_readword((int)&zeromem_ptr,6);
4470 if(cv<3)
4471 emit_addimm(0,32*4+(cv*8+5)*4,7);
4472 else
4473 emit_readword((int)&zeromem_ptr,7);
4474#ifdef __ARM_NEON__
4475 emit_movimm(source[i],1); // opcode
4476 emit_call((int)gteMVMVA_part_neon);
4477 if(need_flags) {
4478 emit_movimm(lm,1);
4479 emit_call((int)gteMACtoIR_flags_neon);
4480 }
4481#else
4482 if(cv==3&&shift)
4483 emit_call((int)gteMVMVA_part_cv3sh12_arm);
4484 else {
4485 emit_movimm(shift,1);
4486 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
4487 }
4488 if(need_flags||need_ir) {
4489 if(need_flags)
4490 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
4491 else
4492 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf)); // lm0 borked
4493 }
4494#endif
4495 break;
4496 }
b9b61529 4497
054175e9 4498 default:
4499 reglist&=0x100f;
4500 c2op_prologue(c2op,reglist);
4501 emit_movimm(source[i],1); // opcode
4502 emit_writeword(1,(int)&psxRegs.code);
4503 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
4504 break;
4505 }
4506 c2op_epilogue(c2op,reglist);
4507 }
b9b61529 4508}
4509
4510void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 4511{
4512 // XXX: should just just do the exception instead
4513 if(!cop1_usable) {
4514 int jaddr=(int)out;
4515 emit_jmp(0);
4516 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
4517 cop1_usable=1;
4518 }
4519}
4520
57871462 4521void cop1_assemble(int i,struct regstat *i_regs)
4522{
3d624f89 4523#ifndef DISABLE_COP1
57871462 4524 // Check cop1 unusable
4525 if(!cop1_usable) {
4526 signed char rs=get_reg(i_regs->regmap,CSREG);
4527 assert(rs>=0);
4528 emit_testimm(rs,0x20000000);
4529 int jaddr=(int)out;
4530 emit_jeq(0);
4531 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4532 cop1_usable=1;
4533 }
4534 if (opcode2[i]==0) { // MFC1
4535 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4536 if(tl>=0) {
4537 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
4538 emit_readword_indexed(0,tl,tl);
4539 }
4540 }
4541 else if (opcode2[i]==1) { // DMFC1
4542 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4543 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
4544 if(tl>=0) {
4545 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
4546 if(th>=0) emit_readword_indexed(4,tl,th);
4547 emit_readword_indexed(0,tl,tl);
4548 }
4549 }
4550 else if (opcode2[i]==4) { // MTC1
4551 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4552 signed char temp=get_reg(i_regs->regmap,-1);
4553 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4554 emit_writeword_indexed(sl,0,temp);
4555 }
4556 else if (opcode2[i]==5) { // DMTC1
4557 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4558 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
4559 signed char temp=get_reg(i_regs->regmap,-1);
4560 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4561 emit_writeword_indexed(sh,4,temp);
4562 emit_writeword_indexed(sl,0,temp);
4563 }
4564 else if (opcode2[i]==2) // CFC1
4565 {
4566 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4567 if(tl>=0) {
4568 u_int copr=(source[i]>>11)&0x1f;
4569 if(copr==0) emit_readword((int)&FCR0,tl);
4570 if(copr==31) emit_readword((int)&FCR31,tl);
4571 }
4572 }
4573 else if (opcode2[i]==6) // CTC1
4574 {
4575 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4576 u_int copr=(source[i]>>11)&0x1f;
4577 assert(sl>=0);
4578 if(copr==31)
4579 {
4580 emit_writeword(sl,(int)&FCR31);
4581 // Set the rounding mode
4582 //FIXME
4583 //char temp=get_reg(i_regs->regmap,-1);
4584 //emit_andimm(sl,3,temp);
4585 //emit_fldcw_indexed((int)&rounding_modes,temp);
4586 }
4587 }
3d624f89 4588#else
4589 cop1_unusable(i, i_regs);
4590#endif
57871462 4591}
4592
4593void fconv_assemble_arm(int i,struct regstat *i_regs)
4594{
3d624f89 4595#ifndef DISABLE_COP1
57871462 4596 signed char temp=get_reg(i_regs->regmap,-1);
4597 assert(temp>=0);
4598 // Check cop1 unusable
4599 if(!cop1_usable) {
4600 signed char rs=get_reg(i_regs->regmap,CSREG);
4601 assert(rs>=0);
4602 emit_testimm(rs,0x20000000);
4603 int jaddr=(int)out;
4604 emit_jeq(0);
4605 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4606 cop1_usable=1;
4607 }
4608
4609 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4610 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
4611 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4612 emit_flds(temp,15);
4613 emit_ftosizs(15,15); // float->int, truncate
4614 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4615 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4616 emit_fsts(15,temp);
4617 return;
4618 }
4619 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
4620 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4621 emit_vldr(temp,7);
4622 emit_ftosizd(7,13); // double->int, truncate
4623 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4624 emit_fsts(13,temp);
4625 return;
4626 }
4627
4628 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
4629 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4630 emit_flds(temp,13);
4631 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4632 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4633 emit_fsitos(13,15);
4634 emit_fsts(15,temp);
4635 return;
4636 }
4637 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
4638 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4639 emit_flds(temp,13);
4640 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4641 emit_fsitod(13,7);
4642 emit_vstr(7,temp);
4643 return;
4644 }
4645
4646 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
4647 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4648 emit_flds(temp,13);
4649 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4650 emit_fcvtds(13,7);
4651 emit_vstr(7,temp);
4652 return;
4653 }
4654 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
4655 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4656 emit_vldr(temp,7);
4657 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4658 emit_fcvtsd(7,13);
4659 emit_fsts(13,temp);
4660 return;
4661 }
4662 #endif
4663
4664 // C emulation code
4665
4666 u_int hr,reglist=0;
4667 for(hr=0;hr<HOST_REGS;hr++) {
4668 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4669 }
4670 save_regs(reglist);
4671
4672 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
4673 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4674 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4675 emit_call((int)cvt_s_w);
4676 }
4677 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
4678 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4679 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4680 emit_call((int)cvt_d_w);
4681 }
4682 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
4683 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4684 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4685 emit_call((int)cvt_s_l);
4686 }
4687 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
4688 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4689 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4690 emit_call((int)cvt_d_l);
4691 }
4692
4693 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
4694 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4695 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4696 emit_call((int)cvt_d_s);
4697 }
4698 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4699 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4700 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4701 emit_call((int)cvt_w_s);
4702 }
4703 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4704 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4705 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4706 emit_call((int)cvt_l_s);
4707 }
4708
4709 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4710 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4711 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4712 emit_call((int)cvt_s_d);
4713 }
4714 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4715 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4716 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4717 emit_call((int)cvt_w_d);
4718 }
4719 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4720 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4721 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4722 emit_call((int)cvt_l_d);
4723 }
4724
4725 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4726 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4727 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4728 emit_call((int)round_l_s);
4729 }
4730 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4731 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4732 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4733 emit_call((int)trunc_l_s);
4734 }
4735 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4736 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4737 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4738 emit_call((int)ceil_l_s);
4739 }
4740 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4741 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4742 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4743 emit_call((int)floor_l_s);
4744 }
4745 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4746 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4747 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4748 emit_call((int)round_w_s);
4749 }
4750 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4751 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4752 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4753 emit_call((int)trunc_w_s);
4754 }
4755 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4756 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4757 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4758 emit_call((int)ceil_w_s);
4759 }
4760 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4761 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4762 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4763 emit_call((int)floor_w_s);
4764 }
4765
4766 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4767 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4768 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4769 emit_call((int)round_l_d);
4770 }
4771 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4772 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4773 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4774 emit_call((int)trunc_l_d);
4775 }
4776 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4777 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4778 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4779 emit_call((int)ceil_l_d);
4780 }
4781 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4782 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4783 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4784 emit_call((int)floor_l_d);
4785 }
4786 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4787 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4788 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4789 emit_call((int)round_w_d);
4790 }
4791 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4792 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4793 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4794 emit_call((int)trunc_w_d);
4795 }
4796 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4797 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4798 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4799 emit_call((int)ceil_w_d);
4800 }
4801 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4802 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4803 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4804 emit_call((int)floor_w_d);
4805 }
4806
4807 restore_regs(reglist);
3d624f89 4808#else
4809 cop1_unusable(i, i_regs);
4810#endif
57871462 4811}
4812#define fconv_assemble fconv_assemble_arm
4813
4814void fcomp_assemble(int i,struct regstat *i_regs)
4815{
3d624f89 4816#ifndef DISABLE_COP1
57871462 4817 signed char fs=get_reg(i_regs->regmap,FSREG);
4818 signed char temp=get_reg(i_regs->regmap,-1);
4819 assert(temp>=0);
4820 // Check cop1 unusable
4821 if(!cop1_usable) {
4822 signed char cs=get_reg(i_regs->regmap,CSREG);
4823 assert(cs>=0);
4824 emit_testimm(cs,0x20000000);
4825 int jaddr=(int)out;
4826 emit_jeq(0);
4827 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4828 cop1_usable=1;
4829 }
4830
4831 if((source[i]&0x3f)==0x30) {
4832 emit_andimm(fs,~0x800000,fs);
4833 return;
4834 }
4835
4836 if((source[i]&0x3e)==0x38) {
4837 // sf/ngle - these should throw exceptions for NaNs
4838 emit_andimm(fs,~0x800000,fs);
4839 return;
4840 }
4841
4842 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4843 if(opcode2[i]==0x10) {
4844 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4845 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4846 emit_orimm(fs,0x800000,fs);
4847 emit_flds(temp,14);
4848 emit_flds(HOST_TEMPREG,15);
4849 emit_fcmps(14,15);
4850 emit_fmstat();
4851 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4852 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4853 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4854 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4855 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4856 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4857 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4858 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4859 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4860 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4861 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4862 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4863 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4864 return;
4865 }
4866 if(opcode2[i]==0x11) {
4867 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4868 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4869 emit_orimm(fs,0x800000,fs);
4870 emit_vldr(temp,6);
4871 emit_vldr(HOST_TEMPREG,7);
4872 emit_fcmpd(6,7);
4873 emit_fmstat();
4874 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4875 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4876 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4877 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4878 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4879 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4880 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4881 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4882 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4883 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4884 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4885 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4886 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4887 return;
4888 }
4889 #endif
4890
4891 // C only
4892
4893 u_int hr,reglist=0;
4894 for(hr=0;hr<HOST_REGS;hr++) {
4895 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4896 }
4897 reglist&=~(1<<fs);
4898 save_regs(reglist);
4899 if(opcode2[i]==0x10) {
4900 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4901 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4902 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4903 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4904 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4905 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4906 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4907 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4908 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4909 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4910 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4911 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4912 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4913 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4914 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4915 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4916 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4917 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4918 }
4919 if(opcode2[i]==0x11) {
4920 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4921 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4922 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4923 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4924 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4925 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4926 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4927 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4928 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4929 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4930 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4931 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4932 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4933 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4934 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4935 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4936 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4937 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4938 }
4939 restore_regs(reglist);
4940 emit_loadreg(FSREG,fs);
3d624f89 4941#else
4942 cop1_unusable(i, i_regs);
4943#endif
57871462 4944}
4945
4946void float_assemble(int i,struct regstat *i_regs)
4947{
3d624f89 4948#ifndef DISABLE_COP1
57871462 4949 signed char temp=get_reg(i_regs->regmap,-1);
4950 assert(temp>=0);
4951 // Check cop1 unusable
4952 if(!cop1_usable) {
4953 signed char cs=get_reg(i_regs->regmap,CSREG);
4954 assert(cs>=0);
4955 emit_testimm(cs,0x20000000);
4956 int jaddr=(int)out;
4957 emit_jeq(0);
4958 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4959 cop1_usable=1;
4960 }
4961
4962 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4963 if((source[i]&0x3f)==6) // mov
4964 {
4965 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4966 if(opcode2[i]==0x10) {
4967 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4968 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4969 emit_readword_indexed(0,temp,temp);
4970 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4971 }
4972 if(opcode2[i]==0x11) {
4973 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4974 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4975 emit_vldr(temp,7);
4976 emit_vstr(7,HOST_TEMPREG);
4977 }
4978 }
4979 return;
4980 }
4981
4982 if((source[i]&0x3f)>3)
4983 {
4984 if(opcode2[i]==0x10) {
4985 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4986 emit_flds(temp,15);
4987 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4988 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4989 }
4990 if((source[i]&0x3f)==4) // sqrt
4991 emit_fsqrts(15,15);
4992 if((source[i]&0x3f)==5) // abs
4993 emit_fabss(15,15);
4994 if((source[i]&0x3f)==7) // neg
4995 emit_fnegs(15,15);
4996 emit_fsts(15,temp);
4997 }
4998 if(opcode2[i]==0x11) {
4999 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5000 emit_vldr(temp,7);
5001 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5002 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5003 }
5004 if((source[i]&0x3f)==4) // sqrt
5005 emit_fsqrtd(7,7);
5006 if((source[i]&0x3f)==5) // abs
5007 emit_fabsd(7,7);
5008 if((source[i]&0x3f)==7) // neg
5009 emit_fnegd(7,7);
5010 emit_vstr(7,temp);
5011 }
5012 return;
5013 }
5014 if((source[i]&0x3f)<4)
5015 {
5016 if(opcode2[i]==0x10) {
5017 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5018 }
5019 if(opcode2[i]==0x11) {
5020 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5021 }
5022 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
5023 if(opcode2[i]==0x10) {
5024 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
5025 emit_flds(temp,15);
5026 emit_flds(HOST_TEMPREG,13);
5027 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5028 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5029 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5030 }
5031 }
5032 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
5033 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
5034 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
5035 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
5036 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5037 emit_fsts(15,HOST_TEMPREG);
5038 }else{
5039 emit_fsts(15,temp);
5040 }
5041 }
5042 else if(opcode2[i]==0x11) {
5043 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
5044 emit_vldr(temp,7);
5045 emit_vldr(HOST_TEMPREG,6);
5046 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5047 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5048 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5049 }
5050 }
5051 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
5052 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
5053 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
5054 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
5055 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5056 emit_vstr(7,HOST_TEMPREG);
5057 }else{
5058 emit_vstr(7,temp);
5059 }
5060 }
5061 }
5062 else {
5063 if(opcode2[i]==0x10) {
5064 emit_flds(temp,15);
5065 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5066 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5067 }
5068 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
5069 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
5070 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
5071 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
5072 emit_fsts(15,temp);
5073 }
5074 else if(opcode2[i]==0x11) {
5075 emit_vldr(temp,7);
5076 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5077 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5078 }
5079 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
5080 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
5081 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
5082 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
5083 emit_vstr(7,temp);
5084 }
5085 }
5086 return;
5087 }
5088 #endif
5089
5090 u_int hr,reglist=0;
5091 for(hr=0;hr<HOST_REGS;hr++) {
5092 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
5093 }
5094 if(opcode2[i]==0x10) { // Single precision
5095 save_regs(reglist);
5096 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
5097 if((source[i]&0x3f)<4) {
5098 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
5099 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
5100 }else{
5101 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
5102 }
5103 switch(source[i]&0x3f)
5104 {
5105 case 0x00: emit_call((int)add_s);break;
5106 case 0x01: emit_call((int)sub_s);break;
5107 case 0x02: emit_call((int)mul_s);break;
5108 case 0x03: emit_call((int)div_s);break;
5109 case 0x04: emit_call((int)sqrt_s);break;
5110 case 0x05: emit_call((int)abs_s);break;
5111 case 0x06: emit_call((int)mov_s);break;
5112 case 0x07: emit_call((int)neg_s);break;
5113 }
5114 restore_regs(reglist);
5115 }
5116 if(opcode2[i]==0x11) { // Double precision
5117 save_regs(reglist);
5118 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5119 if((source[i]&0x3f)<4) {
5120 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5121 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
5122 }else{
5123 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
5124 }
5125 switch(source[i]&0x3f)
5126 {
5127 case 0x00: emit_call((int)add_d);break;
5128 case 0x01: emit_call((int)sub_d);break;
5129 case 0x02: emit_call((int)mul_d);break;
5130 case 0x03: emit_call((int)div_d);break;
5131 case 0x04: emit_call((int)sqrt_d);break;
5132 case 0x05: emit_call((int)abs_d);break;
5133 case 0x06: emit_call((int)mov_d);break;
5134 case 0x07: emit_call((int)neg_d);break;
5135 }
5136 restore_regs(reglist);
5137 }
3d624f89 5138#else
5139 cop1_unusable(i, i_regs);
5140#endif
57871462 5141}
5142
5143void multdiv_assemble_arm(int i,struct regstat *i_regs)
5144{
5145 // case 0x18: MULT
5146 // case 0x19: MULTU
5147 // case 0x1A: DIV
5148 // case 0x1B: DIVU
5149 // case 0x1C: DMULT
5150 // case 0x1D: DMULTU
5151 // case 0x1E: DDIV
5152 // case 0x1F: DDIVU
5153 if(rs1[i]&&rs2[i])
5154 {
5155 if((opcode2[i]&4)==0) // 32-bit
5156 {
5157 if(opcode2[i]==0x18) // MULT
5158 {
5159 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5160 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5161 signed char hi=get_reg(i_regs->regmap,HIREG);
5162 signed char lo=get_reg(i_regs->regmap,LOREG);
5163 assert(m1>=0);
5164 assert(m2>=0);
5165 assert(hi>=0);
5166 assert(lo>=0);
5167 emit_smull(m1,m2,hi,lo);
5168 }
5169 if(opcode2[i]==0x19) // MULTU
5170 {
5171 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5172 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5173 signed char hi=get_reg(i_regs->regmap,HIREG);
5174 signed char lo=get_reg(i_regs->regmap,LOREG);
5175 assert(m1>=0);
5176 assert(m2>=0);
5177 assert(hi>=0);
5178 assert(lo>=0);
5179 emit_umull(m1,m2,hi,lo);
5180 }
5181 if(opcode2[i]==0x1A) // DIV
5182 {
5183 signed char d1=get_reg(i_regs->regmap,rs1[i]);
5184 signed char d2=get_reg(i_regs->regmap,rs2[i]);
5185 assert(d1>=0);
5186 assert(d2>=0);
5187 signed char quotient=get_reg(i_regs->regmap,LOREG);
5188 signed char remainder=get_reg(i_regs->regmap,HIREG);
5189 assert(quotient>=0);
5190 assert(remainder>=0);
5191 emit_movs(d1,remainder);
44a80f6a 5192 emit_movimm(0xffffffff,quotient);
5193 emit_negmi(quotient,quotient); // .. quotient and ..
5194 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 5195 emit_movs(d2,HOST_TEMPREG);
5196 emit_jeq((int)out+52); // Division by zero
5197 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
5198 emit_clz(HOST_TEMPREG,quotient);
5199 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
5200 emit_orimm(quotient,1<<31,quotient);
5201 emit_shr(quotient,quotient,quotient);
5202 emit_cmp(remainder,HOST_TEMPREG);
5203 emit_subcs(remainder,HOST_TEMPREG,remainder);
5204 emit_adcs(quotient,quotient,quotient);
5205 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
5206 emit_jcc((int)out-16); // -4
5207 emit_teq(d1,d2);
5208 emit_negmi(quotient,quotient);
5209 emit_test(d1,d1);
5210 emit_negmi(remainder,remainder);
5211 }
5212 if(opcode2[i]==0x1B) // DIVU
5213 {
5214 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
5215 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
5216 assert(d1>=0);
5217 assert(d2>=0);
5218 signed char quotient=get_reg(i_regs->regmap,LOREG);
5219 signed char remainder=get_reg(i_regs->regmap,HIREG);
5220 assert(quotient>=0);
5221 assert(remainder>=0);
44a80f6a 5222 emit_mov(d1,remainder);
5223 emit_movimm(0xffffffff,quotient); // div0 case
57871462 5224 emit_test(d2,d2);
44a80f6a 5225 emit_jeq((int)out+40); // Division by zero
57871462 5226 emit_clz(d2,HOST_TEMPREG);
5227 emit_movimm(1<<31,quotient);
5228 emit_shl(d2,HOST_TEMPREG,d2);
57871462 5229 emit_shr(quotient,HOST_TEMPREG,quotient);
5230 emit_cmp(remainder,d2);
5231 emit_subcs(remainder,d2,remainder);
5232 emit_adcs(quotient,quotient,quotient);
5233 emit_shrcc_imm(d2,1,d2);
5234 emit_jcc((int)out-16); // -4
5235 }
5236 }
5237 else // 64-bit
4600ba03 5238#ifndef FORCE32
57871462 5239 {
5240 if(opcode2[i]==0x1C) // DMULT
5241 {
5242 assert(opcode2[i]!=0x1C);
5243 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5244 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5245 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5246 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5247 assert(m1h>=0);
5248 assert(m2h>=0);
5249 assert(m1l>=0);
5250 assert(m2l>=0);
5251 emit_pushreg(m2h);
5252 emit_pushreg(m2l);
5253 emit_pushreg(m1h);
5254 emit_pushreg(m1l);
5255 emit_call((int)&mult64);
5256 emit_popreg(m1l);
5257 emit_popreg(m1h);
5258 emit_popreg(m2l);
5259 emit_popreg(m2h);
5260 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5261 signed char hil=get_reg(i_regs->regmap,HIREG);
5262 if(hih>=0) emit_loadreg(HIREG|64,hih);
5263 if(hil>=0) emit_loadreg(HIREG,hil);
5264 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5265 signed char lol=get_reg(i_regs->regmap,LOREG);
5266 if(loh>=0) emit_loadreg(LOREG|64,loh);
5267 if(lol>=0) emit_loadreg(LOREG,lol);
5268 }
5269 if(opcode2[i]==0x1D) // DMULTU
5270 {
5271 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5272 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5273 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5274 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5275 assert(m1h>=0);
5276 assert(m2h>=0);
5277 assert(m1l>=0);
5278 assert(m2l>=0);
5279 save_regs(0x100f);
5280 if(m1l!=0) emit_mov(m1l,0);
5281 if(m1h==0) emit_readword((int)&dynarec_local,1);
5282 else if(m1h>1) emit_mov(m1h,1);
5283 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
5284 else if(m2l>2) emit_mov(m2l,2);
5285 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
5286 else if(m2h>3) emit_mov(m2h,3);
5287 emit_call((int)&multu64);
5288 restore_regs(0x100f);
5289 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5290 signed char hil=get_reg(i_regs->regmap,HIREG);
5291 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5292 signed char lol=get_reg(i_regs->regmap,LOREG);
5293 /*signed char temp=get_reg(i_regs->regmap,-1);
5294 signed char rh=get_reg(i_regs->regmap,HIREG|64);
5295 signed char rl=get_reg(i_regs->regmap,HIREG);
5296 assert(m1h>=0);
5297 assert(m2h>=0);
5298 assert(m1l>=0);
5299 assert(m2l>=0);
5300 assert(temp>=0);
5301 //emit_mov(m1l,EAX);
5302 //emit_mul(m2l);
5303 emit_umull(rl,rh,m1l,m2l);
5304 emit_storereg(LOREG,rl);
5305 emit_mov(rh,temp);
5306 //emit_mov(m1h,EAX);
5307 //emit_mul(m2l);
5308 emit_umull(rl,rh,m1h,m2l);
5309 emit_adds(rl,temp,temp);
5310 emit_adcimm(rh,0,rh);
5311 emit_storereg(HIREG,rh);
5312 //emit_mov(m2h,EAX);
5313 //emit_mul(m1l);
5314 emit_umull(rl,rh,m1l,m2h);
5315 emit_adds(rl,temp,temp);
5316 emit_adcimm(rh,0,rh);
5317 emit_storereg(LOREG|64,temp);
5318 emit_mov(rh,temp);
5319 //emit_mov(m2h,EAX);
5320 //emit_mul(m1h);
5321 emit_umull(rl,rh,m1h,m2h);
5322 emit_adds(rl,temp,rl);
5323 emit_loadreg(HIREG,temp);
5324 emit_adcimm(rh,0,rh);
5325 emit_adds(rl,temp,rl);
5326 emit_adcimm(rh,0,rh);
5327 // DEBUG
5328 /*
5329 emit_pushreg(m2h);
5330 emit_pushreg(m2l);
5331 emit_pushreg(m1h);
5332 emit_pushreg(m1l);
5333 emit_call((int)&multu64);
5334 emit_popreg(m1l);
5335 emit_popreg(m1h);
5336 emit_popreg(m2l);
5337 emit_popreg(m2h);
5338 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5339 signed char hil=get_reg(i_regs->regmap,HIREG);
5340 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
5341 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
5342 */
5343 // Shouldn't be necessary
5344 //char loh=get_reg(i_regs->regmap,LOREG|64);
5345 //char lol=get_reg(i_regs->regmap,LOREG);
5346 //if(loh>=0) emit_loadreg(LOREG|64,loh);
5347 //if(lol>=0) emit_loadreg(LOREG,lol);
5348 }
5349 if(opcode2[i]==0x1E) // DDIV
5350 {
5351 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5352 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5353 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5354 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5355 assert(d1h>=0);
5356 assert(d2h>=0);
5357 assert(d1l>=0);
5358 assert(d2l>=0);
5359 save_regs(0x100f);
5360 if(d1l!=0) emit_mov(d1l,0);
5361 if(d1h==0) emit_readword((int)&dynarec_local,1);
5362 else if(d1h>1) emit_mov(d1h,1);
5363 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5364 else if(d2l>2) emit_mov(d2l,2);
5365 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5366 else if(d2h>3) emit_mov(d2h,3);
5367 emit_call((int)&div64);
5368 restore_regs(0x100f);
5369 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5370 signed char hil=get_reg(i_regs->regmap,HIREG);
5371 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5372 signed char lol=get_reg(i_regs->regmap,LOREG);
5373 if(hih>=0) emit_loadreg(HIREG|64,hih);
5374 if(hil>=0) emit_loadreg(HIREG,hil);
5375 if(loh>=0) emit_loadreg(LOREG|64,loh);
5376 if(lol>=0) emit_loadreg(LOREG,lol);
5377 }
5378 if(opcode2[i]==0x1F) // DDIVU
5379 {
5380 //u_int hr,reglist=0;
5381 //for(hr=0;hr<HOST_REGS;hr++) {
5382 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
5383 //}
5384 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5385 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5386 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5387 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5388 assert(d1h>=0);
5389 assert(d2h>=0);
5390 assert(d1l>=0);
5391 assert(d2l>=0);
5392 save_regs(0x100f);
5393 if(d1l!=0) emit_mov(d1l,0);
5394 if(d1h==0) emit_readword((int)&dynarec_local,1);
5395 else if(d1h>1) emit_mov(d1h,1);
5396 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5397 else if(d2l>2) emit_mov(d2l,2);
5398 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5399 else if(d2h>3) emit_mov(d2h,3);
5400 emit_call((int)&divu64);
5401 restore_regs(0x100f);
5402 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5403 signed char hil=get_reg(i_regs->regmap,HIREG);
5404 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5405 signed char lol=get_reg(i_regs->regmap,LOREG);
5406 if(hih>=0) emit_loadreg(HIREG|64,hih);
5407 if(hil>=0) emit_loadreg(HIREG,hil);
5408 if(loh>=0) emit_loadreg(LOREG|64,loh);
5409 if(lol>=0) emit_loadreg(LOREG,lol);
5410 }
5411 }
4600ba03 5412#else
5413 assert(0);
5414#endif
57871462 5415 }
5416 else
5417 {
5418 // Multiply by zero is zero.
5419 // MIPS does not have a divide by zero exception.
5420 // The result is undefined, we return zero.
5421 signed char hr=get_reg(i_regs->regmap,HIREG);
5422 signed char lr=get_reg(i_regs->regmap,LOREG);
5423 if(hr>=0) emit_zeroreg(hr);
5424 if(lr>=0) emit_zeroreg(lr);
5425 }
5426}
5427#define multdiv_assemble multdiv_assemble_arm
5428
5429void do_preload_rhash(int r) {
5430 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
5431 // register. On ARM the hash can be done with a single instruction (below)
5432}
5433
5434void do_preload_rhtbl(int ht) {
5435 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
5436}
5437
5438void do_rhash(int rs,int rh) {
5439 emit_andimm(rs,0xf8,rh);
5440}
5441
5442void do_miniht_load(int ht,int rh) {
5443 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
5444 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
5445}
5446
5447void do_miniht_jump(int rs,int rh,int ht) {
5448 emit_cmp(rh,rs);
5449 emit_ldreq_indexed(ht,4,15);
5450 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5451 emit_mov(rs,7);
5452 emit_jmp(jump_vaddr_reg[7]);
5453 #else
5454 emit_jmp(jump_vaddr_reg[rs]);
5455 #endif
5456}
5457
5458void do_miniht_insert(u_int return_address,int rt,int temp) {
5459 #ifdef ARMv5_ONLY
5460 emit_movimm(return_address,rt); // PC into link register
5461 add_to_linker((int)out,return_address,1);
5462 emit_pcreladdr(temp);
5463 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5464 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5465 #else
5466 emit_movw(return_address&0x0000FFFF,rt);
5467 add_to_linker((int)out,return_address,1);
5468 emit_pcreladdr(temp);
5469 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5470 emit_movt(return_address&0xFFFF0000,rt);
5471 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5472 #endif
5473}
5474
5475// Sign-extend to 64 bits and write out upper half of a register
5476// This is useful where we have a 32-bit value in a register, and want to
5477// keep it in a 32-bit register, but can't guarantee that it won't be read
5478// as a 64-bit value later.
5479void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
5480{
24385cae 5481#ifndef FORCE32
57871462 5482 if(is32_pre==is32) return;
5483 int hr,reg;
5484 for(hr=0;hr<HOST_REGS;hr++) {
5485 if(hr!=EXCLUDE_REG) {
5486 //if(pre[hr]==entry[hr]) {
5487 if((reg=pre[hr])>=0) {
5488 if((dirty>>hr)&1) {
5489 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
5490 emit_sarimm(hr,31,HOST_TEMPREG);
5491 emit_storereg(reg|64,HOST_TEMPREG);
5492 }
5493 }
5494 }
5495 //}
5496 }
5497 }
24385cae 5498#endif
57871462 5499}
5500
5501void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
5502{
5503 //if(dirty_pre==dirty) return;
5504 int hr,reg,new_hr;
5505 for(hr=0;hr<HOST_REGS;hr++) {
5506 if(hr!=EXCLUDE_REG) {
5507 reg=pre[hr];
5508 if(((~u)>>(reg&63))&1) {
f776eb14 5509 if(reg>0) {
57871462 5510 if(((dirty_pre&~dirty)>>hr)&1) {
5511 if(reg>0&&reg<34) {
5512 emit_storereg(reg,hr);
5513 if( ((is32_pre&~uu)>>reg)&1 ) {
5514 emit_sarimm(hr,31,HOST_TEMPREG);
5515 emit_storereg(reg|64,HOST_TEMPREG);
5516 }
5517 }
5518 else if(reg>=64) {
5519 emit_storereg(reg,hr);
5520 }
5521 }
5522 }
57871462 5523 }
5524 }
5525 }
5526}
5527
5528
5529/* using strd could possibly help but you'd have to allocate registers in pairs
5530void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
5531{
5532 int hr;
5533 int wrote=-1;
5534 for(hr=HOST_REGS-1;hr>=0;hr--) {
5535 if(hr!=EXCLUDE_REG) {
5536 if(pre[hr]!=entry[hr]) {
5537 if(pre[hr]>=0) {
5538 if((dirty>>hr)&1) {
5539 if(get_reg(entry,pre[hr])<0) {
5540 if(pre[hr]<64) {
5541 if(!((u>>pre[hr])&1)) {
5542 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
5543 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5544 emit_sarimm(hr,31,hr+1);
5545 emit_strdreg(pre[hr],hr);
5546 }
5547 else
5548 emit_storereg(pre[hr],hr);
5549 }else{
5550 emit_storereg(pre[hr],hr);
5551 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5552 emit_sarimm(hr,31,hr);
5553 emit_storereg(pre[hr]|64,hr);
5554 }
5555 }
5556 }
5557 }else{
5558 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
5559 emit_storereg(pre[hr],hr);
5560 }
5561 }
5562 wrote=hr;
5563 }
5564 }
5565 }
5566 }
5567 }
5568 }
5569 for(hr=0;hr<HOST_REGS;hr++) {
5570 if(hr!=EXCLUDE_REG) {
5571 if(pre[hr]!=entry[hr]) {
5572 if(pre[hr]>=0) {
5573 int nr;
5574 if((nr=get_reg(entry,pre[hr]))>=0) {
5575 emit_mov(hr,nr);
5576 }
5577 }
5578 }
5579 }
5580 }
5581}
5582#define wb_invalidate wb_invalidate_arm
5583*/
5584
dd3a91a1 5585// Clearing the cache is rather slow on ARM Linux, so mark the areas
5586// that need to be cleared, and then only clear these areas once.
5587void do_clear_cache()
5588{
5589 int i,j;
5590 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
5591 {
5592 u_int bitmap=needs_clear_cache[i];
5593 if(bitmap) {
5594 u_int start,end;
5595 for(j=0;j<32;j++)
5596 {
5597 if(bitmap&(1<<j)) {
5598 start=BASE_ADDR+i*131072+j*4096;
5599 end=start+4095;
5600 j++;
5601 while(j<32) {
5602 if(bitmap&(1<<j)) {
5603 end+=4096;
5604 j++;
5605 }else{
5606 __clear_cache((void *)start,(void *)end);
5607 break;
5608 }
5609 }
5610 }
5611 }
5612 needs_clear_cache[i]=0;
5613 }
5614 }
5615}
5616
57871462 5617// CPU-architecture-specific initialization
5618void arch_init() {
3d624f89 5619#ifndef DISABLE_COP1
57871462 5620 rounding_modes[0]=0x0<<22; // round
5621 rounding_modes[1]=0x3<<22; // trunc
5622 rounding_modes[2]=0x1<<22; // ceil
5623 rounding_modes[3]=0x2<<22; // floor
3d624f89 5624#endif
57871462 5625}
b9b61529 5626
5627// vim:shiftwidth=2:expandtab