drc: implement memory access speculation
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22extern int cycle_count;
23extern int last_count;
24extern int pcaddr;
25extern int pending_exception;
26extern int branch_target;
27extern uint64_t readmem_dword;
3d624f89 28#ifdef MUPEN64
57871462 29extern precomp_instr fake_pc;
3d624f89 30#endif
57871462 31extern void *dynarec_local;
32extern u_int memory_map[1048576];
33extern u_int mini_ht[32][2];
34extern u_int rounding_modes[4];
35
36void indirect_jump_indexed();
37void indirect_jump();
38void do_interrupt();
39void jump_vaddr_r0();
40void jump_vaddr_r1();
41void jump_vaddr_r2();
42void jump_vaddr_r3();
43void jump_vaddr_r4();
44void jump_vaddr_r5();
45void jump_vaddr_r6();
46void jump_vaddr_r7();
47void jump_vaddr_r8();
48void jump_vaddr_r9();
49void jump_vaddr_r10();
50void jump_vaddr_r12();
51
52const u_int jump_vaddr_reg[16] = {
53 (int)jump_vaddr_r0,
54 (int)jump_vaddr_r1,
55 (int)jump_vaddr_r2,
56 (int)jump_vaddr_r3,
57 (int)jump_vaddr_r4,
58 (int)jump_vaddr_r5,
59 (int)jump_vaddr_r6,
60 (int)jump_vaddr_r7,
61 (int)jump_vaddr_r8,
62 (int)jump_vaddr_r9,
63 (int)jump_vaddr_r10,
64 0,
65 (int)jump_vaddr_r12,
66 0,
67 0,
68 0};
69
0bbd1454 70void invalidate_addr_r0();
71void invalidate_addr_r1();
72void invalidate_addr_r2();
73void invalidate_addr_r3();
74void invalidate_addr_r4();
75void invalidate_addr_r5();
76void invalidate_addr_r6();
77void invalidate_addr_r7();
78void invalidate_addr_r8();
79void invalidate_addr_r9();
80void invalidate_addr_r10();
81void invalidate_addr_r12();
82
83const u_int invalidate_addr_reg[16] = {
84 (int)invalidate_addr_r0,
85 (int)invalidate_addr_r1,
86 (int)invalidate_addr_r2,
87 (int)invalidate_addr_r3,
88 (int)invalidate_addr_r4,
89 (int)invalidate_addr_r5,
90 (int)invalidate_addr_r6,
91 (int)invalidate_addr_r7,
92 (int)invalidate_addr_r8,
93 (int)invalidate_addr_r9,
94 (int)invalidate_addr_r10,
95 0,
96 (int)invalidate_addr_r12,
97 0,
98 0,
99 0};
100
57871462 101#include "fpu.h"
102
dd3a91a1 103unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
104
57871462 105/* Linker */
106
107void set_jump_target(int addr,u_int target)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 if(ptr[3]==0xe2) {
112 assert((target-(u_int)ptr2-8)<1024);
113 assert((addr&3)==0);
114 assert((target&3)==0);
115 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
116 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
117 }
118 else if(ptr[3]==0x72) {
119 // generated by emit_jno_unlikely
120 if((target-(u_int)ptr2-8)<1024) {
121 assert((addr&3)==0);
122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
124 }
125 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
126 assert((addr&3)==0);
127 assert((target&3)==0);
128 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
129 }
130 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
131 }
132 else {
133 assert((ptr[3]&0x0e)==0xa);
134 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
135 }
136}
137
138// This optionally copies the instruction from the target of the branch into
139// the space before the branch. Works, but the difference in speed is
140// usually insignificant.
141void set_jump_target_fillslot(int addr,u_int target,int copy)
142{
143 u_char *ptr=(u_char *)addr;
144 u_int *ptr2=(u_int *)ptr;
145 assert(!copy||ptr2[-1]==0xe28dd000);
146 if(ptr[3]==0xe2) {
147 assert(!copy);
148 assert((target-(u_int)ptr2-8)<4096);
149 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
150 }
151 else {
152 assert((ptr[3]&0x0e)==0xa);
153 u_int target_insn=*(u_int *)target;
154 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
155 copy=0;
156 }
157 if((target_insn&0x0c100000)==0x04100000) { // Load
158 copy=0;
159 }
160 if(target_insn&0x08000000) {
161 copy=0;
162 }
163 if(copy) {
164 ptr2[-1]=target_insn;
165 target+=4;
166 }
167 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
168 }
169}
170
171/* Literal pool */
172add_literal(int addr,int val)
173{
15776b68 174 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 175 literals[literalcount][0]=addr;
176 literals[literalcount][1]=val;
177 literalcount++;
178}
179
f76eeef9 180void *kill_pointer(void *stub)
57871462 181{
182 int *ptr=(int *)(stub+4);
183 assert((*ptr&0x0ff00000)==0x05900000);
184 u_int offset=*ptr&0xfff;
185 int **l_ptr=(void *)ptr+offset+8;
186 int *i_ptr=*l_ptr;
187 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 188 return i_ptr;
57871462 189}
190
f968d35d 191// find where external branch is liked to using addr of it's stub:
192// get address that insn one after stub loads (dyna_linker arg1),
193// treat it as a pointer to branch insn,
194// return addr where that branch jumps to
57871462 195int get_pointer(void *stub)
196{
197 //printf("get_pointer(%x)\n",(int)stub);
198 int *ptr=(int *)(stub+4);
f968d35d 199 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 200 u_int offset=*ptr&0xfff;
201 int **l_ptr=(void *)ptr+offset+8;
202 int *i_ptr=*l_ptr;
203 assert((*i_ptr&0x0f000000)==0x0a000000);
204 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
205}
206
207// Find the "clean" entry point from a "dirty" entry point
208// by skipping past the call to verify_code
209u_int get_clean_addr(int addr)
210{
211 int *ptr=(int *)addr;
212 #ifdef ARMv5_ONLY
213 ptr+=4;
214 #else
215 ptr+=6;
216 #endif
217 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
218 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
219 ptr++;
220 if((*ptr&0xFF000000)==0xea000000) {
221 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
222 }
223 return (u_int)ptr;
224}
225
226int verify_dirty(int addr)
227{
228 u_int *ptr=(u_int *)addr;
229 #ifdef ARMv5_ONLY
230 // get from literal pool
15776b68 231 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 232 u_int offset=*ptr&0xfff;
233 u_int *l_ptr=(void *)ptr+offset+8;
234 u_int source=l_ptr[0];
235 u_int copy=l_ptr[1];
236 u_int len=l_ptr[2];
237 ptr+=4;
238 #else
239 // ARMv7 movw/movt
240 assert((*ptr&0xFFF00000)==0xe3000000);
241 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
242 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
243 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
244 ptr+=6;
245 #endif
246 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
247 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 248 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 249 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
250 unsigned int page=source>>12;
251 unsigned int map_value=memory_map[page];
252 if(map_value>=0x80000000) return 0;
253 while(page<((source+len-1)>>12)) {
254 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
255 }
256 source = source+(map_value<<2);
257 }
258 //printf("verify_dirty: %x %x %x\n",source,copy,len);
259 return !memcmp((void *)source,(void *)copy,len);
260}
261
262// This doesn't necessarily find all clean entry points, just
263// guarantees that it's not dirty
264int isclean(int addr)
265{
266 #ifdef ARMv5_ONLY
267 int *ptr=((u_int *)addr)+4;
268 #else
269 int *ptr=((u_int *)addr)+6;
270 #endif
271 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
272 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
273 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
274 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
275 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
276 return 1;
277}
278
279void get_bounds(int addr,u_int *start,u_int *end)
280{
281 u_int *ptr=(u_int *)addr;
282 #ifdef ARMv5_ONLY
283 // get from literal pool
15776b68 284 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 285 u_int offset=*ptr&0xfff;
286 u_int *l_ptr=(void *)ptr+offset+8;
287 u_int source=l_ptr[0];
288 //u_int copy=l_ptr[1];
289 u_int len=l_ptr[2];
290 ptr+=4;
291 #else
292 // ARMv7 movw/movt
293 assert((*ptr&0xFFF00000)==0xe3000000);
294 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
295 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
296 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
297 ptr+=6;
298 #endif
299 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
300 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 301 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 302 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
303 if(memory_map[source>>12]>=0x80000000) source = 0;
304 else source = source+(memory_map[source>>12]<<2);
305 }
306 *start=source;
307 *end=source+len;
308}
309
310/* Register allocation */
311
312// Note: registers are allocated clean (unmodified state)
313// if you intend to modify the register, you must call dirty_reg().
314void alloc_reg(struct regstat *cur,int i,signed char reg)
315{
316 int r,hr;
317 int preferred_reg = (reg&7);
318 if(reg==CCREG) preferred_reg=HOST_CCREG;
319 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
320
321 // Don't allocate unused registers
322 if((cur->u>>reg)&1) return;
323
324 // see if it's already allocated
325 for(hr=0;hr<HOST_REGS;hr++)
326 {
327 if(cur->regmap[hr]==reg) return;
328 }
329
330 // Keep the same mapping if the register was already allocated in a loop
331 preferred_reg = loop_reg(i,reg,preferred_reg);
332
333 // Try to allocate the preferred register
334 if(cur->regmap[preferred_reg]==-1) {
335 cur->regmap[preferred_reg]=reg;
336 cur->dirty&=~(1<<preferred_reg);
337 cur->isconst&=~(1<<preferred_reg);
338 return;
339 }
340 r=cur->regmap[preferred_reg];
341 if(r<64&&((cur->u>>r)&1)) {
342 cur->regmap[preferred_reg]=reg;
343 cur->dirty&=~(1<<preferred_reg);
344 cur->isconst&=~(1<<preferred_reg);
345 return;
346 }
347 if(r>=64&&((cur->uu>>(r&63))&1)) {
348 cur->regmap[preferred_reg]=reg;
349 cur->dirty&=~(1<<preferred_reg);
350 cur->isconst&=~(1<<preferred_reg);
351 return;
352 }
353
354 // Clear any unneeded registers
355 // We try to keep the mapping consistent, if possible, because it
356 // makes branches easier (especially loops). So we try to allocate
357 // first (see above) before removing old mappings. If this is not
358 // possible then go ahead and clear out the registers that are no
359 // longer needed.
360 for(hr=0;hr<HOST_REGS;hr++)
361 {
362 r=cur->regmap[hr];
363 if(r>=0) {
364 if(r<64) {
365 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
366 }
367 else
368 {
369 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
370 }
371 }
372 }
373 // Try to allocate any available register, but prefer
374 // registers that have not been used recently.
375 if(i>0) {
376 for(hr=0;hr<HOST_REGS;hr++) {
377 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
378 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
379 cur->regmap[hr]=reg;
380 cur->dirty&=~(1<<hr);
381 cur->isconst&=~(1<<hr);
382 return;
383 }
384 }
385 }
386 }
387 // Try to allocate any available register
388 for(hr=0;hr<HOST_REGS;hr++) {
389 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
390 cur->regmap[hr]=reg;
391 cur->dirty&=~(1<<hr);
392 cur->isconst&=~(1<<hr);
393 return;
394 }
395 }
396
397 // Ok, now we have to evict someone
398 // Pick a register we hopefully won't need soon
399 u_char hsn[MAXREG+1];
400 memset(hsn,10,sizeof(hsn));
401 int j;
402 lsn(hsn,i,&preferred_reg);
403 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
404 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
405 if(i>0) {
406 // Don't evict the cycle count at entry points, otherwise the entry
407 // stub will have to write it.
408 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
409 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
410 for(j=10;j>=3;j--)
411 {
412 // Alloc preferred register if available
413 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
414 for(hr=0;hr<HOST_REGS;hr++) {
415 // Evict both parts of a 64-bit register
416 if((cur->regmap[hr]&63)==r) {
417 cur->regmap[hr]=-1;
418 cur->dirty&=~(1<<hr);
419 cur->isconst&=~(1<<hr);
420 }
421 }
422 cur->regmap[preferred_reg]=reg;
423 return;
424 }
425 for(r=1;r<=MAXREG;r++)
426 {
427 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
428 for(hr=0;hr<HOST_REGS;hr++) {
429 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
430 if(cur->regmap[hr]==r+64) {
431 cur->regmap[hr]=reg;
432 cur->dirty&=~(1<<hr);
433 cur->isconst&=~(1<<hr);
434 return;
435 }
436 }
437 }
438 for(hr=0;hr<HOST_REGS;hr++) {
439 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
440 if(cur->regmap[hr]==r) {
441 cur->regmap[hr]=reg;
442 cur->dirty&=~(1<<hr);
443 cur->isconst&=~(1<<hr);
444 return;
445 }
446 }
447 }
448 }
449 }
450 }
451 }
452 for(j=10;j>=0;j--)
453 {
454 for(r=1;r<=MAXREG;r++)
455 {
456 if(hsn[r]==j) {
457 for(hr=0;hr<HOST_REGS;hr++) {
458 if(cur->regmap[hr]==r+64) {
459 cur->regmap[hr]=reg;
460 cur->dirty&=~(1<<hr);
461 cur->isconst&=~(1<<hr);
462 return;
463 }
464 }
465 for(hr=0;hr<HOST_REGS;hr++) {
466 if(cur->regmap[hr]==r) {
467 cur->regmap[hr]=reg;
468 cur->dirty&=~(1<<hr);
469 cur->isconst&=~(1<<hr);
470 return;
471 }
472 }
473 }
474 }
475 }
476 printf("This shouldn't happen (alloc_reg)");exit(1);
477}
478
479void alloc_reg64(struct regstat *cur,int i,signed char reg)
480{
481 int preferred_reg = 8+(reg&1);
482 int r,hr;
483
484 // allocate the lower 32 bits
485 alloc_reg(cur,i,reg);
486
487 // Don't allocate unused registers
488 if((cur->uu>>reg)&1) return;
489
490 // see if the upper half is already allocated
491 for(hr=0;hr<HOST_REGS;hr++)
492 {
493 if(cur->regmap[hr]==reg+64) return;
494 }
495
496 // Keep the same mapping if the register was already allocated in a loop
497 preferred_reg = loop_reg(i,reg,preferred_reg);
498
499 // Try to allocate the preferred register
500 if(cur->regmap[preferred_reg]==-1) {
501 cur->regmap[preferred_reg]=reg|64;
502 cur->dirty&=~(1<<preferred_reg);
503 cur->isconst&=~(1<<preferred_reg);
504 return;
505 }
506 r=cur->regmap[preferred_reg];
507 if(r<64&&((cur->u>>r)&1)) {
508 cur->regmap[preferred_reg]=reg|64;
509 cur->dirty&=~(1<<preferred_reg);
510 cur->isconst&=~(1<<preferred_reg);
511 return;
512 }
513 if(r>=64&&((cur->uu>>(r&63))&1)) {
514 cur->regmap[preferred_reg]=reg|64;
515 cur->dirty&=~(1<<preferred_reg);
516 cur->isconst&=~(1<<preferred_reg);
517 return;
518 }
519
520 // Clear any unneeded registers
521 // We try to keep the mapping consistent, if possible, because it
522 // makes branches easier (especially loops). So we try to allocate
523 // first (see above) before removing old mappings. If this is not
524 // possible then go ahead and clear out the registers that are no
525 // longer needed.
526 for(hr=HOST_REGS-1;hr>=0;hr--)
527 {
528 r=cur->regmap[hr];
529 if(r>=0) {
530 if(r<64) {
531 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
532 }
533 else
534 {
535 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
536 }
537 }
538 }
539 // Try to allocate any available register, but prefer
540 // registers that have not been used recently.
541 if(i>0) {
542 for(hr=0;hr<HOST_REGS;hr++) {
543 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
544 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
545 cur->regmap[hr]=reg|64;
546 cur->dirty&=~(1<<hr);
547 cur->isconst&=~(1<<hr);
548 return;
549 }
550 }
551 }
552 }
553 // Try to allocate any available register
554 for(hr=0;hr<HOST_REGS;hr++) {
555 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
556 cur->regmap[hr]=reg|64;
557 cur->dirty&=~(1<<hr);
558 cur->isconst&=~(1<<hr);
559 return;
560 }
561 }
562
563 // Ok, now we have to evict someone
564 // Pick a register we hopefully won't need soon
565 u_char hsn[MAXREG+1];
566 memset(hsn,10,sizeof(hsn));
567 int j;
568 lsn(hsn,i,&preferred_reg);
569 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
570 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
571 if(i>0) {
572 // Don't evict the cycle count at entry points, otherwise the entry
573 // stub will have to write it.
574 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
575 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
576 for(j=10;j>=3;j--)
577 {
578 // Alloc preferred register if available
579 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
580 for(hr=0;hr<HOST_REGS;hr++) {
581 // Evict both parts of a 64-bit register
582 if((cur->regmap[hr]&63)==r) {
583 cur->regmap[hr]=-1;
584 cur->dirty&=~(1<<hr);
585 cur->isconst&=~(1<<hr);
586 }
587 }
588 cur->regmap[preferred_reg]=reg|64;
589 return;
590 }
591 for(r=1;r<=MAXREG;r++)
592 {
593 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
594 for(hr=0;hr<HOST_REGS;hr++) {
595 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
596 if(cur->regmap[hr]==r+64) {
597 cur->regmap[hr]=reg|64;
598 cur->dirty&=~(1<<hr);
599 cur->isconst&=~(1<<hr);
600 return;
601 }
602 }
603 }
604 for(hr=0;hr<HOST_REGS;hr++) {
605 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
606 if(cur->regmap[hr]==r) {
607 cur->regmap[hr]=reg|64;
608 cur->dirty&=~(1<<hr);
609 cur->isconst&=~(1<<hr);
610 return;
611 }
612 }
613 }
614 }
615 }
616 }
617 }
618 for(j=10;j>=0;j--)
619 {
620 for(r=1;r<=MAXREG;r++)
621 {
622 if(hsn[r]==j) {
623 for(hr=0;hr<HOST_REGS;hr++) {
624 if(cur->regmap[hr]==r+64) {
625 cur->regmap[hr]=reg|64;
626 cur->dirty&=~(1<<hr);
627 cur->isconst&=~(1<<hr);
628 return;
629 }
630 }
631 for(hr=0;hr<HOST_REGS;hr++) {
632 if(cur->regmap[hr]==r) {
633 cur->regmap[hr]=reg|64;
634 cur->dirty&=~(1<<hr);
635 cur->isconst&=~(1<<hr);
636 return;
637 }
638 }
639 }
640 }
641 }
642 printf("This shouldn't happen");exit(1);
643}
644
645// Allocate a temporary register. This is done without regard to
646// dirty status or whether the register we request is on the unneeded list
647// Note: This will only allocate one register, even if called multiple times
648void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
649{
650 int r,hr;
651 int preferred_reg = -1;
652
653 // see if it's already allocated
654 for(hr=0;hr<HOST_REGS;hr++)
655 {
656 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
657 }
658
659 // Try to allocate any available register
660 for(hr=HOST_REGS-1;hr>=0;hr--) {
661 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
662 cur->regmap[hr]=reg;
663 cur->dirty&=~(1<<hr);
664 cur->isconst&=~(1<<hr);
665 return;
666 }
667 }
668
669 // Find an unneeded register
670 for(hr=HOST_REGS-1;hr>=0;hr--)
671 {
672 r=cur->regmap[hr];
673 if(r>=0) {
674 if(r<64) {
675 if((cur->u>>r)&1) {
676 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
677 cur->regmap[hr]=reg;
678 cur->dirty&=~(1<<hr);
679 cur->isconst&=~(1<<hr);
680 return;
681 }
682 }
683 }
684 else
685 {
686 if((cur->uu>>(r&63))&1) {
687 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
688 cur->regmap[hr]=reg;
689 cur->dirty&=~(1<<hr);
690 cur->isconst&=~(1<<hr);
691 return;
692 }
693 }
694 }
695 }
696 }
697
698 // Ok, now we have to evict someone
699 // Pick a register we hopefully won't need soon
700 // TODO: we might want to follow unconditional jumps here
701 // TODO: get rid of dupe code and make this into a function
702 u_char hsn[MAXREG+1];
703 memset(hsn,10,sizeof(hsn));
704 int j;
705 lsn(hsn,i,&preferred_reg);
706 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
707 if(i>0) {
708 // Don't evict the cycle count at entry points, otherwise the entry
709 // stub will have to write it.
710 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
711 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
712 for(j=10;j>=3;j--)
713 {
714 for(r=1;r<=MAXREG;r++)
715 {
716 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
717 for(hr=0;hr<HOST_REGS;hr++) {
718 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
719 if(cur->regmap[hr]==r+64) {
720 cur->regmap[hr]=reg;
721 cur->dirty&=~(1<<hr);
722 cur->isconst&=~(1<<hr);
723 return;
724 }
725 }
726 }
727 for(hr=0;hr<HOST_REGS;hr++) {
728 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
729 if(cur->regmap[hr]==r) {
730 cur->regmap[hr]=reg;
731 cur->dirty&=~(1<<hr);
732 cur->isconst&=~(1<<hr);
733 return;
734 }
735 }
736 }
737 }
738 }
739 }
740 }
741 for(j=10;j>=0;j--)
742 {
743 for(r=1;r<=MAXREG;r++)
744 {
745 if(hsn[r]==j) {
746 for(hr=0;hr<HOST_REGS;hr++) {
747 if(cur->regmap[hr]==r+64) {
748 cur->regmap[hr]=reg;
749 cur->dirty&=~(1<<hr);
750 cur->isconst&=~(1<<hr);
751 return;
752 }
753 }
754 for(hr=0;hr<HOST_REGS;hr++) {
755 if(cur->regmap[hr]==r) {
756 cur->regmap[hr]=reg;
757 cur->dirty&=~(1<<hr);
758 cur->isconst&=~(1<<hr);
759 return;
760 }
761 }
762 }
763 }
764 }
765 printf("This shouldn't happen");exit(1);
766}
767// Allocate a specific ARM register.
768void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
769{
770 int n;
f776eb14 771 int dirty=0;
57871462 772
773 // see if it's already allocated (and dealloc it)
774 for(n=0;n<HOST_REGS;n++)
775 {
f776eb14 776 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
777 dirty=(cur->dirty>>n)&1;
778 cur->regmap[n]=-1;
779 }
57871462 780 }
781
782 cur->regmap[hr]=reg;
783 cur->dirty&=~(1<<hr);
f776eb14 784 cur->dirty|=dirty<<hr;
57871462 785 cur->isconst&=~(1<<hr);
786}
787
788// Alloc cycle count into dedicated register
789alloc_cc(struct regstat *cur,int i)
790{
791 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
792}
793
794/* Special alloc */
795
796
797/* Assembler */
798
799char regname[16][4] = {
800 "r0",
801 "r1",
802 "r2",
803 "r3",
804 "r4",
805 "r5",
806 "r6",
807 "r7",
808 "r8",
809 "r9",
810 "r10",
811 "fp",
812 "r12",
813 "sp",
814 "lr",
815 "pc"};
816
817void output_byte(u_char byte)
818{
819 *(out++)=byte;
820}
821void output_modrm(u_char mod,u_char rm,u_char ext)
822{
823 assert(mod<4);
824 assert(rm<8);
825 assert(ext<8);
826 u_char byte=(mod<<6)|(ext<<3)|rm;
827 *(out++)=byte;
828}
829void output_sib(u_char scale,u_char index,u_char base)
830{
831 assert(scale<4);
832 assert(index<8);
833 assert(base<8);
834 u_char byte=(scale<<6)|(index<<3)|base;
835 *(out++)=byte;
836}
837void output_w32(u_int word)
838{
839 *((u_int *)out)=word;
840 out+=4;
841}
842u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
843{
844 assert(rd<16);
845 assert(rn<16);
846 assert(rm<16);
847 return((rn<<16)|(rd<<12)|rm);
848}
849u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
850{
851 assert(rd<16);
852 assert(rn<16);
853 assert(imm<256);
854 assert((shift&1)==0);
855 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
856}
857u_int genimm(u_int imm,u_int *encoded)
858{
c2e3bd42 859 *encoded=0;
860 if(imm==0) return 1;
57871462 861 int i=32;
862 while(i>0)
863 {
864 if(imm<256) {
865 *encoded=((i&30)<<7)|imm;
866 return 1;
867 }
868 imm=(imm>>2)|(imm<<30);i-=2;
869 }
870 return 0;
871}
cfbd3c6e 872void genimm_checked(u_int imm,u_int *encoded)
873{
874 u_int ret=genimm(imm,encoded);
875 assert(ret);
876}
57871462 877u_int genjmp(u_int addr)
878{
879 int offset=addr-(int)out-8;
e80343e2 880 if(offset<-33554432||offset>=33554432) {
881 if (addr>2) {
882 printf("genjmp: out of range: %08x\n", offset);
883 exit(1);
884 }
885 return 0;
886 }
57871462 887 return ((u_int)offset>>2)&0xffffff;
888}
889
890void emit_mov(int rs,int rt)
891{
892 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
893 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
894}
895
896void emit_movs(int rs,int rt)
897{
898 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
899 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
900}
901
902void emit_add(int rs1,int rs2,int rt)
903{
904 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
905 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
906}
907
908void emit_adds(int rs1,int rs2,int rt)
909{
910 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
911 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
912}
913
914void emit_adcs(int rs1,int rs2,int rt)
915{
916 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
917 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
918}
919
920void emit_sbc(int rs1,int rs2,int rt)
921{
922 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
923 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
924}
925
926void emit_sbcs(int rs1,int rs2,int rt)
927{
928 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
929 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
930}
931
932void emit_neg(int rs, int rt)
933{
934 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
935 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
936}
937
938void emit_negs(int rs, int rt)
939{
940 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
941 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
942}
943
944void emit_sub(int rs1,int rs2,int rt)
945{
946 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
947 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
948}
949
950void emit_subs(int rs1,int rs2,int rt)
951{
952 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
953 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
954}
955
956void emit_zeroreg(int rt)
957{
958 assem_debug("mov %s,#0\n",regname[rt]);
959 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
960}
961
790ee18e 962void emit_loadlp(u_int imm,u_int rt)
963{
964 add_literal((int)out,imm);
965 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
966 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
967}
968void emit_movw(u_int imm,u_int rt)
969{
970 assert(imm<65536);
971 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
972 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
973}
974void emit_movt(u_int imm,u_int rt)
975{
976 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
977 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
978}
979void emit_movimm(u_int imm,u_int rt)
980{
981 u_int armval;
982 if(genimm(imm,&armval)) {
983 assem_debug("mov %s,#%d\n",regname[rt],imm);
984 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
985 }else if(genimm(~imm,&armval)) {
986 assem_debug("mvn %s,#%d\n",regname[rt],imm);
987 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
988 }else if(imm<65536) {
989 #ifdef ARMv5_ONLY
990 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
991 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
992 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
993 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
994 #else
995 emit_movw(imm,rt);
996 #endif
997 }else{
998 #ifdef ARMv5_ONLY
999 emit_loadlp(imm,rt);
1000 #else
1001 emit_movw(imm&0x0000FFFF,rt);
1002 emit_movt(imm&0xFFFF0000,rt);
1003 #endif
1004 }
1005}
1006void emit_pcreladdr(u_int rt)
1007{
1008 assem_debug("add %s,pc,#?\n",regname[rt]);
1009 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1010}
1011
57871462 1012void emit_loadreg(int r, int hr)
1013{
3d624f89 1014#ifdef FORCE32
1015 if(r&64) {
1016 printf("64bit load in 32bit mode!\n");
7f2607ea 1017 assert(0);
1018 return;
3d624f89 1019 }
1020#endif
57871462 1021 if((r&63)==0)
1022 emit_zeroreg(hr);
1023 else {
3d624f89 1024 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1025 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1026 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1027 if(r==CCREG) addr=(int)&cycle_count;
1028 if(r==CSREG) addr=(int)&Status;
1029 if(r==FSREG) addr=(int)&FCR31;
1030 if(r==INVCP) addr=(int)&invc_ptr;
1031 u_int offset = addr-(u_int)&dynarec_local;
1032 assert(offset<4096);
1033 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1034 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1035 }
1036}
1037void emit_storereg(int r, int hr)
1038{
3d624f89 1039#ifdef FORCE32
1040 if(r&64) {
1041 printf("64bit store in 32bit mode!\n");
7f2607ea 1042 assert(0);
1043 return;
3d624f89 1044 }
1045#endif
1046 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1047 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1048 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1049 if(r==CCREG) addr=(int)&cycle_count;
1050 if(r==FSREG) addr=(int)&FCR31;
1051 u_int offset = addr-(u_int)&dynarec_local;
1052 assert(offset<4096);
1053 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1054 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1055}
1056
1057void emit_test(int rs, int rt)
1058{
1059 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1060 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1061}
1062
1063void emit_testimm(int rs,int imm)
1064{
1065 u_int armval;
5a05d80c 1066 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1067 genimm_checked(imm,&armval);
57871462 1068 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1069}
1070
b9b61529 1071void emit_testeqimm(int rs,int imm)
1072{
1073 u_int armval;
1074 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1075 genimm_checked(imm,&armval);
b9b61529 1076 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1077}
1078
57871462 1079void emit_not(int rs,int rt)
1080{
1081 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1082 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1083}
1084
b9b61529 1085void emit_mvnmi(int rs,int rt)
1086{
1087 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1088 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1089}
1090
57871462 1091void emit_and(u_int rs1,u_int rs2,u_int rt)
1092{
1093 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1094 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1095}
1096
1097void emit_or(u_int rs1,u_int rs2,u_int rt)
1098{
1099 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1100 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1101}
1102void emit_or_and_set_flags(int rs1,int rs2,int rt)
1103{
1104 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1105 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1106}
1107
f70d384d 1108void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1109{
1110 assert(rs<16);
1111 assert(rt<16);
1112 assert(imm<32);
1113 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1114 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1115}
1116
576bbd8f 1117void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1118{
1119 assert(rs<16);
1120 assert(rt<16);
1121 assert(imm<32);
1122 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1123 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1124}
1125
57871462 1126void emit_xor(u_int rs1,u_int rs2,u_int rt)
1127{
1128 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1129 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1130}
1131
57871462 1132void emit_addimm(u_int rs,int imm,u_int rt)
1133{
1134 assert(rs<16);
1135 assert(rt<16);
1136 if(imm!=0) {
57871462 1137 u_int armval;
1138 if(genimm(imm,&armval)) {
1139 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1140 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1141 }else if(genimm(-imm,&armval)) {
1142 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1143 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1144 }else if(imm<0) {
ffb0b9e0 1145 assert(imm>-65536);
57871462 1146 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1147 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1148 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1149 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1150 }else{
ffb0b9e0 1151 assert(imm<65536);
57871462 1152 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1153 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1154 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1155 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1156 }
1157 }
1158 else if(rs!=rt) emit_mov(rs,rt);
1159}
1160
1161void emit_addimm_and_set_flags(int imm,int rt)
1162{
1163 assert(imm>-65536&&imm<65536);
1164 u_int armval;
1165 if(genimm(imm,&armval)) {
1166 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1167 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1168 }else if(genimm(-imm,&armval)) {
1169 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1170 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1171 }else if(imm<0) {
1172 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1173 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1174 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1175 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1176 }else{
1177 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1178 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1179 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1180 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1181 }
1182}
1183void emit_addimm_no_flags(u_int imm,u_int rt)
1184{
1185 emit_addimm(rt,imm,rt);
1186}
1187
1188void emit_addnop(u_int r)
1189{
1190 assert(r<16);
1191 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1192 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1193}
1194
1195void emit_adcimm(u_int rs,int imm,u_int rt)
1196{
1197 u_int armval;
cfbd3c6e 1198 genimm_checked(imm,&armval);
57871462 1199 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1200 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1201}
1202/*void emit_sbcimm(int imm,u_int rt)
1203{
1204 u_int armval;
cfbd3c6e 1205 genimm_checked(imm,&armval);
57871462 1206 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1207 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1208}*/
1209void emit_sbbimm(int imm,u_int rt)
1210{
1211 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1212 assert(rt<8);
1213 if(imm<128&&imm>=-128) {
1214 output_byte(0x83);
1215 output_modrm(3,rt,3);
1216 output_byte(imm);
1217 }
1218 else
1219 {
1220 output_byte(0x81);
1221 output_modrm(3,rt,3);
1222 output_w32(imm);
1223 }
1224}
1225void emit_rscimm(int rs,int imm,u_int rt)
1226{
1227 assert(0);
1228 u_int armval;
cfbd3c6e 1229 genimm_checked(imm,&armval);
57871462 1230 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1231 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1232}
1233
1234void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1235{
1236 // TODO: if(genimm(imm,&armval)) ...
1237 // else
1238 emit_movimm(imm,HOST_TEMPREG);
1239 emit_adds(HOST_TEMPREG,rsl,rtl);
1240 emit_adcimm(rsh,0,rth);
1241}
1242
1243void emit_sbb(int rs1,int rs2)
1244{
1245 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1246 output_byte(0x19);
1247 output_modrm(3,rs1,rs2);
1248}
1249
1250void emit_andimm(int rs,int imm,int rt)
1251{
1252 u_int armval;
790ee18e 1253 if(imm==0) {
1254 emit_zeroreg(rt);
1255 }else if(genimm(imm,&armval)) {
57871462 1256 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1257 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1258 }else if(genimm(~imm,&armval)) {
1259 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1260 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1261 }else if(imm==65535) {
1262 #ifdef ARMv5_ONLY
1263 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1264 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1265 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1266 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1267 #else
1268 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1269 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1270 #endif
1271 }else{
1272 assert(imm>0&&imm<65535);
1273 #ifdef ARMv5_ONLY
1274 assem_debug("mov r14,#%d\n",imm&0xFF00);
1275 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1276 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1277 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1278 #else
1279 emit_movw(imm,HOST_TEMPREG);
1280 #endif
1281 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1282 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1283 }
1284}
1285
1286void emit_orimm(int rs,int imm,int rt)
1287{
1288 u_int armval;
790ee18e 1289 if(imm==0) {
1290 if(rs!=rt) emit_mov(rs,rt);
1291 }else if(genimm(imm,&armval)) {
57871462 1292 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1293 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1294 }else{
1295 assert(imm>0&&imm<65536);
1296 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1297 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1298 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1299 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1300 }
1301}
1302
1303void emit_xorimm(int rs,int imm,int rt)
1304{
57871462 1305 u_int armval;
790ee18e 1306 if(imm==0) {
1307 if(rs!=rt) emit_mov(rs,rt);
1308 }else if(genimm(imm,&armval)) {
57871462 1309 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1310 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1311 }else{
514ed0d9 1312 assert(imm>0&&imm<65536);
57871462 1313 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1314 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1315 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1316 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1317 }
1318}
1319
1320void emit_shlimm(int rs,u_int imm,int rt)
1321{
1322 assert(imm>0);
1323 assert(imm<32);
1324 //if(imm==1) ...
1325 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1326 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1327}
1328
c6c3b1b3 1329void emit_lsls_imm(int rs,int imm,int rt)
1330{
1331 assert(imm>0);
1332 assert(imm<32);
1333 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1334 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1335}
1336
57871462 1337void emit_shrimm(int rs,u_int imm,int rt)
1338{
1339 assert(imm>0);
1340 assert(imm<32);
1341 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1342 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1343}
1344
1345void emit_sarimm(int rs,u_int imm,int rt)
1346{
1347 assert(imm>0);
1348 assert(imm<32);
1349 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1350 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1351}
1352
1353void emit_rorimm(int rs,u_int imm,int rt)
1354{
1355 assert(imm>0);
1356 assert(imm<32);
1357 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1358 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1359}
1360
1361void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1362{
1363 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1364 assert(imm>0);
1365 assert(imm<32);
1366 //if(imm==1) ...
1367 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1368 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1369 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1370 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1371}
1372
1373void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1374{
1375 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1376 assert(imm>0);
1377 assert(imm<32);
1378 //if(imm==1) ...
1379 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1380 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1381 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1382 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1383}
1384
b9b61529 1385void emit_signextend16(int rs,int rt)
1386{
1387 #ifdef ARMv5_ONLY
1388 emit_shlimm(rs,16,rt);
1389 emit_sarimm(rt,16,rt);
1390 #else
1391 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1392 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1393 #endif
1394}
1395
c6c3b1b3 1396void emit_signextend8(int rs,int rt)
1397{
1398 #ifdef ARMv5_ONLY
1399 emit_shlimm(rs,24,rt);
1400 emit_sarimm(rt,24,rt);
1401 #else
1402 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1403 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1404 #endif
1405}
1406
57871462 1407void emit_shl(u_int rs,u_int shift,u_int rt)
1408{
1409 assert(rs<16);
1410 assert(rt<16);
1411 assert(shift<16);
1412 //if(imm==1) ...
1413 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1414 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1415}
1416void emit_shr(u_int rs,u_int shift,u_int rt)
1417{
1418 assert(rs<16);
1419 assert(rt<16);
1420 assert(shift<16);
1421 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1422 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1423}
1424void emit_sar(u_int rs,u_int shift,u_int rt)
1425{
1426 assert(rs<16);
1427 assert(rt<16);
1428 assert(shift<16);
1429 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1430 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1431}
1432void emit_shlcl(int r)
1433{
1434 assem_debug("shl %%%s,%%cl\n",regname[r]);
1435 assert(0);
1436}
1437void emit_shrcl(int r)
1438{
1439 assem_debug("shr %%%s,%%cl\n",regname[r]);
1440 assert(0);
1441}
1442void emit_sarcl(int r)
1443{
1444 assem_debug("sar %%%s,%%cl\n",regname[r]);
1445 assert(0);
1446}
1447
1448void emit_shldcl(int r1,int r2)
1449{
1450 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1451 assert(0);
1452}
1453void emit_shrdcl(int r1,int r2)
1454{
1455 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1456 assert(0);
1457}
1458void emit_orrshl(u_int rs,u_int shift,u_int rt)
1459{
1460 assert(rs<16);
1461 assert(rt<16);
1462 assert(shift<16);
1463 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1464 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1465}
1466void emit_orrshr(u_int rs,u_int shift,u_int rt)
1467{
1468 assert(rs<16);
1469 assert(rt<16);
1470 assert(shift<16);
1471 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1472 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1473}
1474
1475void emit_cmpimm(int rs,int imm)
1476{
1477 u_int armval;
1478 if(genimm(imm,&armval)) {
5a05d80c 1479 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1480 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1481 }else if(genimm(-imm,&armval)) {
5a05d80c 1482 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1483 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1484 }else if(imm>0) {
1485 assert(imm<65536);
1486 #ifdef ARMv5_ONLY
1487 emit_movimm(imm,HOST_TEMPREG);
1488 #else
1489 emit_movw(imm,HOST_TEMPREG);
1490 #endif
1491 assem_debug("cmp %s,r14\n",regname[rs]);
1492 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1493 }else{
1494 assert(imm>-65536);
1495 #ifdef ARMv5_ONLY
1496 emit_movimm(-imm,HOST_TEMPREG);
1497 #else
1498 emit_movw(-imm,HOST_TEMPREG);
1499 #endif
1500 assem_debug("cmn %s,r14\n",regname[rs]);
1501 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1502 }
1503}
1504
1505void emit_cmovne(u_int *addr,int rt)
1506{
1507 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1508 assert(0);
1509}
1510void emit_cmovl(u_int *addr,int rt)
1511{
1512 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1513 assert(0);
1514}
1515void emit_cmovs(u_int *addr,int rt)
1516{
1517 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1518 assert(0);
1519}
1520void emit_cmovne_imm(int imm,int rt)
1521{
1522 assem_debug("movne %s,#%d\n",regname[rt],imm);
1523 u_int armval;
cfbd3c6e 1524 genimm_checked(imm,&armval);
57871462 1525 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1526}
1527void emit_cmovl_imm(int imm,int rt)
1528{
1529 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1530 u_int armval;
cfbd3c6e 1531 genimm_checked(imm,&armval);
57871462 1532 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1533}
1534void emit_cmovb_imm(int imm,int rt)
1535{
1536 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1537 u_int armval;
cfbd3c6e 1538 genimm_checked(imm,&armval);
57871462 1539 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1540}
1541void emit_cmovs_imm(int imm,int rt)
1542{
1543 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1544 u_int armval;
cfbd3c6e 1545 genimm_checked(imm,&armval);
57871462 1546 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1547}
1548void emit_cmove_reg(int rs,int rt)
1549{
1550 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1551 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1552}
1553void emit_cmovne_reg(int rs,int rt)
1554{
1555 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1556 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1557}
1558void emit_cmovl_reg(int rs,int rt)
1559{
1560 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1561 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1562}
1563void emit_cmovs_reg(int rs,int rt)
1564{
1565 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1566 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1567}
1568
1569void emit_slti32(int rs,int imm,int rt)
1570{
1571 if(rs!=rt) emit_zeroreg(rt);
1572 emit_cmpimm(rs,imm);
1573 if(rs==rt) emit_movimm(0,rt);
1574 emit_cmovl_imm(1,rt);
1575}
1576void emit_sltiu32(int rs,int imm,int rt)
1577{
1578 if(rs!=rt) emit_zeroreg(rt);
1579 emit_cmpimm(rs,imm);
1580 if(rs==rt) emit_movimm(0,rt);
1581 emit_cmovb_imm(1,rt);
1582}
1583void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1584{
1585 assert(rsh!=rt);
1586 emit_slti32(rsl,imm,rt);
1587 if(imm>=0)
1588 {
1589 emit_test(rsh,rsh);
1590 emit_cmovne_imm(0,rt);
1591 emit_cmovs_imm(1,rt);
1592 }
1593 else
1594 {
1595 emit_cmpimm(rsh,-1);
1596 emit_cmovne_imm(0,rt);
1597 emit_cmovl_imm(1,rt);
1598 }
1599}
1600void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1601{
1602 assert(rsh!=rt);
1603 emit_sltiu32(rsl,imm,rt);
1604 if(imm>=0)
1605 {
1606 emit_test(rsh,rsh);
1607 emit_cmovne_imm(0,rt);
1608 }
1609 else
1610 {
1611 emit_cmpimm(rsh,-1);
1612 emit_cmovne_imm(1,rt);
1613 }
1614}
1615
1616void emit_cmp(int rs,int rt)
1617{
1618 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1619 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1620}
1621void emit_set_gz32(int rs, int rt)
1622{
1623 //assem_debug("set_gz32\n");
1624 emit_cmpimm(rs,1);
1625 emit_movimm(1,rt);
1626 emit_cmovl_imm(0,rt);
1627}
1628void emit_set_nz32(int rs, int rt)
1629{
1630 //assem_debug("set_nz32\n");
1631 if(rs!=rt) emit_movs(rs,rt);
1632 else emit_test(rs,rs);
1633 emit_cmovne_imm(1,rt);
1634}
1635void emit_set_gz64_32(int rsh, int rsl, int rt)
1636{
1637 //assem_debug("set_gz64\n");
1638 emit_set_gz32(rsl,rt);
1639 emit_test(rsh,rsh);
1640 emit_cmovne_imm(1,rt);
1641 emit_cmovs_imm(0,rt);
1642}
1643void emit_set_nz64_32(int rsh, int rsl, int rt)
1644{
1645 //assem_debug("set_nz64\n");
1646 emit_or_and_set_flags(rsh,rsl,rt);
1647 emit_cmovne_imm(1,rt);
1648}
1649void emit_set_if_less32(int rs1, int rs2, int rt)
1650{
1651 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1652 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1653 emit_cmp(rs1,rs2);
1654 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1655 emit_cmovl_imm(1,rt);
1656}
1657void emit_set_if_carry32(int rs1, int rs2, int rt)
1658{
1659 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1660 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1661 emit_cmp(rs1,rs2);
1662 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1663 emit_cmovb_imm(1,rt);
1664}
1665void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1666{
1667 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1668 assert(u1!=rt);
1669 assert(u2!=rt);
1670 emit_cmp(l1,l2);
1671 emit_movimm(0,rt);
1672 emit_sbcs(u1,u2,HOST_TEMPREG);
1673 emit_cmovl_imm(1,rt);
1674}
1675void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1676{
1677 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1678 assert(u1!=rt);
1679 assert(u2!=rt);
1680 emit_cmp(l1,l2);
1681 emit_movimm(0,rt);
1682 emit_sbcs(u1,u2,HOST_TEMPREG);
1683 emit_cmovb_imm(1,rt);
1684}
1685
1686void emit_call(int a)
1687{
1688 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1689 u_int offset=genjmp(a);
1690 output_w32(0xeb000000|offset);
1691}
1692void emit_jmp(int a)
1693{
1694 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1695 u_int offset=genjmp(a);
1696 output_w32(0xea000000|offset);
1697}
1698void emit_jne(int a)
1699{
1700 assem_debug("bne %x\n",a);
1701 u_int offset=genjmp(a);
1702 output_w32(0x1a000000|offset);
1703}
1704void emit_jeq(int a)
1705{
1706 assem_debug("beq %x\n",a);
1707 u_int offset=genjmp(a);
1708 output_w32(0x0a000000|offset);
1709}
1710void emit_js(int a)
1711{
1712 assem_debug("bmi %x\n",a);
1713 u_int offset=genjmp(a);
1714 output_w32(0x4a000000|offset);
1715}
1716void emit_jns(int a)
1717{
1718 assem_debug("bpl %x\n",a);
1719 u_int offset=genjmp(a);
1720 output_w32(0x5a000000|offset);
1721}
1722void emit_jl(int a)
1723{
1724 assem_debug("blt %x\n",a);
1725 u_int offset=genjmp(a);
1726 output_w32(0xba000000|offset);
1727}
1728void emit_jge(int a)
1729{
1730 assem_debug("bge %x\n",a);
1731 u_int offset=genjmp(a);
1732 output_w32(0xaa000000|offset);
1733}
1734void emit_jno(int a)
1735{
1736 assem_debug("bvc %x\n",a);
1737 u_int offset=genjmp(a);
1738 output_w32(0x7a000000|offset);
1739}
1740void emit_jc(int a)
1741{
1742 assem_debug("bcs %x\n",a);
1743 u_int offset=genjmp(a);
1744 output_w32(0x2a000000|offset);
1745}
1746void emit_jcc(int a)
1747{
1748 assem_debug("bcc %x\n",a);
1749 u_int offset=genjmp(a);
1750 output_w32(0x3a000000|offset);
1751}
1752
1753void emit_pushimm(int imm)
1754{
1755 assem_debug("push $%x\n",imm);
1756 assert(0);
1757}
1758void emit_pusha()
1759{
1760 assem_debug("pusha\n");
1761 assert(0);
1762}
1763void emit_popa()
1764{
1765 assem_debug("popa\n");
1766 assert(0);
1767}
1768void emit_pushreg(u_int r)
1769{
1770 assem_debug("push %%%s\n",regname[r]);
1771 assert(0);
1772}
1773void emit_popreg(u_int r)
1774{
1775 assem_debug("pop %%%s\n",regname[r]);
1776 assert(0);
1777}
1778void emit_callreg(u_int r)
1779{
c6c3b1b3 1780 assert(r<15);
1781 assem_debug("blx %s\n",regname[r]);
1782 output_w32(0xe12fff30|r);
57871462 1783}
1784void emit_jmpreg(u_int r)
1785{
1786 assem_debug("mov pc,%s\n",regname[r]);
1787 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1788}
1789
1790void emit_readword_indexed(int offset, int rs, int rt)
1791{
1792 assert(offset>-4096&&offset<4096);
1793 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1794 if(offset>=0) {
1795 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1796 }else{
1797 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1798 }
1799}
1800void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1801{
1802 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1803 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1804}
c6c3b1b3 1805void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1806{
1807 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1808 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1809}
1810void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1811{
1812 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1813 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1814}
1815void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1816{
1817 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1818 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1819}
1820void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1821{
1822 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1823 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1824}
1825void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1826{
1827 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1828 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1829}
57871462 1830void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1831{
1832 if(map<0) emit_readword_indexed(addr, rs, rt);
1833 else {
1834 assert(addr==0);
1835 emit_readword_dualindexedx4(rs, map, rt);
1836 }
1837}
1838void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1839{
1840 if(map<0) {
1841 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1842 emit_readword_indexed(addr+4, rs, rl);
1843 }else{
1844 assert(rh!=rs);
1845 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1846 emit_addimm(map,1,map);
1847 emit_readword_indexed_tlb(addr, rs, map, rl);
1848 }
1849}
1850void emit_movsbl_indexed(int offset, int rs, int rt)
1851{
1852 assert(offset>-256&&offset<256);
1853 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1854 if(offset>=0) {
1855 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1856 }else{
1857 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1858 }
1859}
1860void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1861{
1862 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1863 else {
1864 if(addr==0) {
1865 emit_shlimm(map,2,map);
1866 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1867 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1868 }else{
1869 assert(addr>-256&&addr<256);
1870 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1871 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1872 emit_movsbl_indexed(addr, rt, rt);
1873 }
1874 }
1875}
1876void emit_movswl_indexed(int offset, int rs, int rt)
1877{
1878 assert(offset>-256&&offset<256);
1879 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1880 if(offset>=0) {
1881 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1882 }else{
1883 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1884 }
1885}
1886void emit_movzbl_indexed(int offset, int rs, int rt)
1887{
1888 assert(offset>-4096&&offset<4096);
1889 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1890 if(offset>=0) {
1891 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1892 }else{
1893 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1894 }
1895}
1896void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1897{
1898 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1899 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1900}
1901void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1902{
1903 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1904 else {
1905 if(addr==0) {
1906 emit_movzbl_dualindexedx4(rs, map, rt);
1907 }else{
1908 emit_addimm(rs,addr,rt);
1909 emit_movzbl_dualindexedx4(rt, map, rt);
1910 }
1911 }
1912}
1913void emit_movzwl_indexed(int offset, int rs, int rt)
1914{
1915 assert(offset>-256&&offset<256);
1916 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1917 if(offset>=0) {
1918 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1919 }else{
1920 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1921 }
1922}
1923void emit_readword(int addr, int rt)
1924{
1925 u_int offset = addr-(u_int)&dynarec_local;
1926 assert(offset<4096);
1927 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1928 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1929}
1930void emit_movsbl(int addr, int rt)
1931{
1932 u_int offset = addr-(u_int)&dynarec_local;
1933 assert(offset<256);
1934 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1935 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1936}
1937void emit_movswl(int addr, int rt)
1938{
1939 u_int offset = addr-(u_int)&dynarec_local;
1940 assert(offset<256);
1941 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1942 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1943}
1944void emit_movzbl(int addr, int rt)
1945{
1946 u_int offset = addr-(u_int)&dynarec_local;
1947 assert(offset<4096);
1948 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1949 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1950}
1951void emit_movzwl(int addr, int rt)
1952{
1953 u_int offset = addr-(u_int)&dynarec_local;
1954 assert(offset<256);
1955 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1956 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1957}
1958void emit_movzwl_reg(int rs, int rt)
1959{
1960 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1961 assert(0);
1962}
1963
1964void emit_xchg(int rs, int rt)
1965{
1966 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1967 assert(0);
1968}
1969void emit_writeword_indexed(int rt, int offset, int rs)
1970{
1971 assert(offset>-4096&&offset<4096);
1972 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1973 if(offset>=0) {
1974 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1975 }else{
1976 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1977 }
1978}
1979void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1980{
1981 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1982 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1983}
1984void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1985{
1986 if(map<0) emit_writeword_indexed(rt, addr, rs);
1987 else {
1988 assert(addr==0);
1989 emit_writeword_dualindexedx4(rt, rs, map);
1990 }
1991}
1992void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1993{
1994 if(map<0) {
1995 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1996 emit_writeword_indexed(rl, addr+4, rs);
1997 }else{
1998 assert(rh>=0);
1999 if(temp!=rs) emit_addimm(map,1,temp);
2000 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2001 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2002 else {
2003 emit_addimm(rs,4,rs);
2004 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2005 }
2006 }
2007}
2008void emit_writehword_indexed(int rt, int offset, int rs)
2009{
2010 assert(offset>-256&&offset<256);
2011 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2012 if(offset>=0) {
2013 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2014 }else{
2015 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2016 }
2017}
2018void emit_writebyte_indexed(int rt, int offset, int rs)
2019{
2020 assert(offset>-4096&&offset<4096);
2021 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2022 if(offset>=0) {
2023 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2024 }else{
2025 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2026 }
2027}
2028void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2029{
2030 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2031 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2032}
2033void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2034{
2035 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2036 else {
2037 if(addr==0) {
2038 emit_writebyte_dualindexedx4(rt, rs, map);
2039 }else{
2040 emit_addimm(rs,addr,temp);
2041 emit_writebyte_dualindexedx4(rt, temp, map);
2042 }
2043 }
2044}
b96d3df7 2045void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2046{
2047 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2048 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2049}
2050void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2051{
2052 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2053 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2054}
2055void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2056{
2057 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2058 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2059}
57871462 2060void emit_writeword(int rt, int addr)
2061{
2062 u_int offset = addr-(u_int)&dynarec_local;
2063 assert(offset<4096);
2064 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2065 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2066}
2067void emit_writehword(int rt, int addr)
2068{
2069 u_int offset = addr-(u_int)&dynarec_local;
2070 assert(offset<256);
2071 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2072 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2073}
2074void emit_writebyte(int rt, int addr)
2075{
2076 u_int offset = addr-(u_int)&dynarec_local;
2077 assert(offset<4096);
74426039 2078 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2079 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2080}
2081void emit_writeword_imm(int imm, int addr)
2082{
2083 assem_debug("movl $%x,%x\n",imm,addr);
2084 assert(0);
2085}
2086void emit_writebyte_imm(int imm, int addr)
2087{
2088 assem_debug("movb $%x,%x\n",imm,addr);
2089 assert(0);
2090}
2091
2092void emit_mul(int rs)
2093{
2094 assem_debug("mul %%%s\n",regname[rs]);
2095 assert(0);
2096}
2097void emit_imul(int rs)
2098{
2099 assem_debug("imul %%%s\n",regname[rs]);
2100 assert(0);
2101}
2102void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2103{
2104 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2105 assert(rs1<16);
2106 assert(rs2<16);
2107 assert(hi<16);
2108 assert(lo<16);
2109 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2110}
2111void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2112{
2113 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2114 assert(rs1<16);
2115 assert(rs2<16);
2116 assert(hi<16);
2117 assert(lo<16);
2118 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2119}
2120
2121void emit_div(int rs)
2122{
2123 assem_debug("div %%%s\n",regname[rs]);
2124 assert(0);
2125}
2126void emit_idiv(int rs)
2127{
2128 assem_debug("idiv %%%s\n",regname[rs]);
2129 assert(0);
2130}
2131void emit_cdq()
2132{
2133 assem_debug("cdq\n");
2134 assert(0);
2135}
2136
2137void emit_clz(int rs,int rt)
2138{
2139 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2140 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2141}
2142
2143void emit_subcs(int rs1,int rs2,int rt)
2144{
2145 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2146 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2147}
2148
2149void emit_shrcc_imm(int rs,u_int imm,int rt)
2150{
2151 assert(imm>0);
2152 assert(imm<32);
2153 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2154 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2155}
2156
2157void emit_negmi(int rs, int rt)
2158{
2159 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2160 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2161}
2162
2163void emit_negsmi(int rs, int rt)
2164{
2165 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2166 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2167}
2168
2169void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2170{
2171 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2172 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2173}
2174
2175void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2176{
2177 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2178 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2179}
2180
2181void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2182{
2183 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2184 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2185}
2186
2187void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2188{
2189 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2190 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2191}
2192
2193void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2194{
2195 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2196 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2197}
2198
2199void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2200{
2201 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2202 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2203}
2204
2205void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2206{
2207 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2208 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2209}
2210
2211void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2212{
2213 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2214 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2215}
2216
2217void emit_teq(int rs, int rt)
2218{
2219 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2220 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2221}
2222
2223void emit_rsbimm(int rs, int imm, int rt)
2224{
2225 u_int armval;
cfbd3c6e 2226 genimm_checked(imm,&armval);
57871462 2227 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2228 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2229}
2230
2231// Load 2 immediates optimizing for small code size
2232void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2233{
2234 emit_movimm(imm1,rt1);
2235 u_int armval;
2236 if(genimm(imm2-imm1,&armval)) {
2237 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2238 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2239 }else if(genimm(imm1-imm2,&armval)) {
2240 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2241 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2242 }
2243 else emit_movimm(imm2,rt2);
2244}
2245
2246// Conditionally select one of two immediates, optimizing for small code size
2247// This will only be called if HAVE_CMOV_IMM is defined
2248void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2249{
2250 u_int armval;
2251 if(genimm(imm2-imm1,&armval)) {
2252 emit_movimm(imm1,rt);
2253 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2254 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2255 }else if(genimm(imm1-imm2,&armval)) {
2256 emit_movimm(imm1,rt);
2257 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2258 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2259 }
2260 else {
2261 #ifdef ARMv5_ONLY
2262 emit_movimm(imm1,rt);
2263 add_literal((int)out,imm2);
2264 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2265 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2266 #else
2267 emit_movw(imm1&0x0000FFFF,rt);
2268 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2269 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2270 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2271 }
2272 emit_movt(imm1&0xFFFF0000,rt);
2273 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2274 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2275 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2276 }
2277 #endif
2278 }
2279}
2280
2281// special case for checking invalid_code
2282void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2283{
2284 assert(0);
2285}
2286
2287// special case for checking invalid_code
2288void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2289{
2290 assert(imm<128&&imm>=0);
2291 assert(r>=0&&r<16);
2292 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2293 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2294 emit_cmpimm(HOST_TEMPREG,imm);
2295}
2296
2297// special case for tlb mapping
2298void emit_addsr12(int rs1,int rs2,int rt)
2299{
2300 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2301 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2302}
2303
0bbd1454 2304void emit_callne(int a)
2305{
2306 assem_debug("blne %x\n",a);
2307 u_int offset=genjmp(a);
2308 output_w32(0x1b000000|offset);
2309}
2310
57871462 2311// Used to preload hash table entries
2312void emit_prefetch(void *addr)
2313{
2314 assem_debug("prefetch %x\n",(int)addr);
2315 output_byte(0x0F);
2316 output_byte(0x18);
2317 output_modrm(0,5,1);
2318 output_w32((int)addr);
2319}
2320void emit_prefetchreg(int r)
2321{
2322 assem_debug("pld %s\n",regname[r]);
2323 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2324}
2325
2326// Special case for mini_ht
2327void emit_ldreq_indexed(int rs, u_int offset, int rt)
2328{
2329 assert(offset<4096);
2330 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2331 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2332}
2333
2334void emit_flds(int r,int sr)
2335{
2336 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2337 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2338}
2339
2340void emit_vldr(int r,int vr)
2341{
2342 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2343 output_w32(0xed900b00|(vr<<12)|(r<<16));
2344}
2345
2346void emit_fsts(int sr,int r)
2347{
2348 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2349 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2350}
2351
2352void emit_vstr(int vr,int r)
2353{
2354 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2355 output_w32(0xed800b00|(vr<<12)|(r<<16));
2356}
2357
2358void emit_ftosizs(int s,int d)
2359{
2360 assem_debug("ftosizs s%d,s%d\n",d,s);
2361 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2362}
2363
2364void emit_ftosizd(int s,int d)
2365{
2366 assem_debug("ftosizd s%d,d%d\n",d,s);
2367 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2368}
2369
2370void emit_fsitos(int s,int d)
2371{
2372 assem_debug("fsitos s%d,s%d\n",d,s);
2373 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2374}
2375
2376void emit_fsitod(int s,int d)
2377{
2378 assem_debug("fsitod d%d,s%d\n",d,s);
2379 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2380}
2381
2382void emit_fcvtds(int s,int d)
2383{
2384 assem_debug("fcvtds d%d,s%d\n",d,s);
2385 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2386}
2387
2388void emit_fcvtsd(int s,int d)
2389{
2390 assem_debug("fcvtsd s%d,d%d\n",d,s);
2391 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2392}
2393
2394void emit_fsqrts(int s,int d)
2395{
2396 assem_debug("fsqrts d%d,s%d\n",d,s);
2397 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2398}
2399
2400void emit_fsqrtd(int s,int d)
2401{
2402 assem_debug("fsqrtd s%d,d%d\n",d,s);
2403 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2404}
2405
2406void emit_fabss(int s,int d)
2407{
2408 assem_debug("fabss d%d,s%d\n",d,s);
2409 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2410}
2411
2412void emit_fabsd(int s,int d)
2413{
2414 assem_debug("fabsd s%d,d%d\n",d,s);
2415 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2416}
2417
2418void emit_fnegs(int s,int d)
2419{
2420 assem_debug("fnegs d%d,s%d\n",d,s);
2421 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2422}
2423
2424void emit_fnegd(int s,int d)
2425{
2426 assem_debug("fnegd s%d,d%d\n",d,s);
2427 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2428}
2429
2430void emit_fadds(int s1,int s2,int d)
2431{
2432 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2433 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2434}
2435
2436void emit_faddd(int s1,int s2,int d)
2437{
2438 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2439 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2440}
2441
2442void emit_fsubs(int s1,int s2,int d)
2443{
2444 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2445 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2446}
2447
2448void emit_fsubd(int s1,int s2,int d)
2449{
2450 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2451 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2452}
2453
2454void emit_fmuls(int s1,int s2,int d)
2455{
2456 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2457 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2458}
2459
2460void emit_fmuld(int s1,int s2,int d)
2461{
2462 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2463 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2464}
2465
2466void emit_fdivs(int s1,int s2,int d)
2467{
2468 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2469 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2470}
2471
2472void emit_fdivd(int s1,int s2,int d)
2473{
2474 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2475 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2476}
2477
2478void emit_fcmps(int x,int y)
2479{
2480 assem_debug("fcmps s14, s15\n");
2481 output_w32(0xeeb47a67);
2482}
2483
2484void emit_fcmpd(int x,int y)
2485{
2486 assem_debug("fcmpd d6, d7\n");
2487 output_w32(0xeeb46b47);
2488}
2489
2490void emit_fmstat()
2491{
2492 assem_debug("fmstat\n");
2493 output_w32(0xeef1fa10);
2494}
2495
2496void emit_bicne_imm(int rs,int imm,int rt)
2497{
2498 u_int armval;
cfbd3c6e 2499 genimm_checked(imm,&armval);
57871462 2500 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2501 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2502}
2503
2504void emit_biccs_imm(int rs,int imm,int rt)
2505{
2506 u_int armval;
cfbd3c6e 2507 genimm_checked(imm,&armval);
57871462 2508 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2509 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2510}
2511
2512void emit_bicvc_imm(int rs,int imm,int rt)
2513{
2514 u_int armval;
cfbd3c6e 2515 genimm_checked(imm,&armval);
57871462 2516 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2517 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2518}
2519
2520void emit_bichi_imm(int rs,int imm,int rt)
2521{
2522 u_int armval;
cfbd3c6e 2523 genimm_checked(imm,&armval);
57871462 2524 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2525 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2526}
2527
2528void emit_orrvs_imm(int rs,int imm,int rt)
2529{
2530 u_int armval;
cfbd3c6e 2531 genimm_checked(imm,&armval);
57871462 2532 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2533 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2534}
2535
b9b61529 2536void emit_orrne_imm(int rs,int imm,int rt)
2537{
2538 u_int armval;
cfbd3c6e 2539 genimm_checked(imm,&armval);
b9b61529 2540 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2541 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2542}
2543
2544void emit_andne_imm(int rs,int imm,int rt)
2545{
2546 u_int armval;
cfbd3c6e 2547 genimm_checked(imm,&armval);
b9b61529 2548 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2549 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2550}
2551
57871462 2552void emit_jno_unlikely(int a)
2553{
2554 //emit_jno(a);
2555 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2556 output_w32(0x72800000|rd_rn_rm(15,15,0));
2557}
2558
2559// Save registers before function call
2560void save_regs(u_int reglist)
2561{
2562 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2563 if(!reglist) return;
2564 assem_debug("stmia fp,{");
2565 if(reglist&1) assem_debug("r0, ");
2566 if(reglist&2) assem_debug("r1, ");
2567 if(reglist&4) assem_debug("r2, ");
2568 if(reglist&8) assem_debug("r3, ");
2569 if(reglist&0x1000) assem_debug("r12");
2570 assem_debug("}\n");
2571 output_w32(0xe88b0000|reglist);
2572}
2573// Restore registers after function call
2574void restore_regs(u_int reglist)
2575{
2576 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2577 if(!reglist) return;
2578 assem_debug("ldmia fp,{");
2579 if(reglist&1) assem_debug("r0, ");
2580 if(reglist&2) assem_debug("r1, ");
2581 if(reglist&4) assem_debug("r2, ");
2582 if(reglist&8) assem_debug("r3, ");
2583 if(reglist&0x1000) assem_debug("r12");
2584 assem_debug("}\n");
2585 output_w32(0xe89b0000|reglist);
2586}
2587
2588// Write back consts using r14 so we don't disturb the other registers
2589void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2590{
2591 int hr;
2592 for(hr=0;hr<HOST_REGS;hr++) {
2593 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2594 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2595 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2596 int value=constmap[i][hr];
2597 if(value==0) {
2598 emit_zeroreg(HOST_TEMPREG);
2599 }
2600 else {
2601 emit_movimm(value,HOST_TEMPREG);
2602 }
2603 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2604#ifndef FORCE32
57871462 2605 if((i_is32>>i_regmap[hr])&1) {
2606 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2607 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2608 }
24385cae 2609#endif
57871462 2610 }
2611 }
2612 }
2613 }
2614}
2615
2616/* Stubs/epilogue */
2617
2618void literal_pool(int n)
2619{
2620 if(!literalcount) return;
2621 if(n) {
2622 if((int)out-literals[0][0]<4096-n) return;
2623 }
2624 u_int *ptr;
2625 int i;
2626 for(i=0;i<literalcount;i++)
2627 {
2628 ptr=(u_int *)literals[i][0];
2629 u_int offset=(u_int)out-(u_int)ptr-8;
2630 assert(offset<4096);
2631 assert(!(offset&3));
2632 *ptr|=offset;
2633 output_w32(literals[i][1]);
2634 }
2635 literalcount=0;
2636}
2637
2638void literal_pool_jumpover(int n)
2639{
2640 if(!literalcount) return;
2641 if(n) {
2642 if((int)out-literals[0][0]<4096-n) return;
2643 }
2644 int jaddr=(int)out;
2645 emit_jmp(0);
2646 literal_pool(0);
2647 set_jump_target(jaddr,(int)out);
2648}
2649
2650emit_extjump2(int addr, int target, int linker)
2651{
2652 u_char *ptr=(u_char *)addr;
2653 assert((ptr[3]&0x0e)==0xa);
2654 emit_loadlp(target,0);
2655 emit_loadlp(addr,1);
24385cae 2656 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2657 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2658//DEBUG >
2659#ifdef DEBUG_CYCLE_COUNT
2660 emit_readword((int)&last_count,ECX);
2661 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2662 emit_readword((int)&next_interupt,ECX);
2663 emit_writeword(HOST_CCREG,(int)&Count);
2664 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2665 emit_writeword(ECX,(int)&last_count);
2666#endif
2667//DEBUG <
2668 emit_jmp(linker);
2669}
2670
2671emit_extjump(int addr, int target)
2672{
2673 emit_extjump2(addr, target, (int)dyna_linker);
2674}
2675emit_extjump_ds(int addr, int target)
2676{
2677 emit_extjump2(addr, target, (int)dyna_linker_ds);
2678}
2679
cbbab9cd 2680#ifdef PCSX
2681#include "pcsxmem_inline.c"
2682#endif
2683
b96d3df7 2684// trashes r2
2685static void pass_args(int a0, int a1)
2686{
2687 if(a0==1&&a1==0) {
2688 // must swap
2689 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2690 }
2691 else if(a0!=0&&a1==0) {
2692 emit_mov(a1,1);
2693 if (a0>=0) emit_mov(a0,0);
2694 }
2695 else {
2696 if(a0>=0&&a0!=0) emit_mov(a0,0);
2697 if(a1>=0&&a1!=1) emit_mov(a1,1);
2698 }
2699}
2700
57871462 2701do_readstub(int n)
2702{
2703 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2704 literal_pool(256);
2705 set_jump_target(stubs[n][1],(int)out);
2706 int type=stubs[n][0];
2707 int i=stubs[n][3];
2708 int rs=stubs[n][4];
2709 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2710 u_int reglist=stubs[n][7];
2711 signed char *i_regmap=i_regs->regmap;
2712 int addr=get_reg(i_regmap,AGEN1+(i&1));
2713 int rth,rt;
2714 int ds;
b9b61529 2715 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2716 rth=get_reg(i_regmap,FTEMP|64);
2717 rt=get_reg(i_regmap,FTEMP);
2718 }else{
2719 rth=get_reg(i_regmap,rt1[i]|64);
2720 rt=get_reg(i_regmap,rt1[i]);
2721 }
2722 assert(rs>=0);
c6c3b1b3 2723#ifdef PCSX
2724 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2725 reglist|=(1<<rs);
2726 for(r=0;r<=12;r++) {
2727 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2728 temp=r; break;
2729 }
2730 }
2731 if(rt>=0)
2732 reglist&=~(1<<rt);
2733 if(temp==-1) {
2734 save_regs(reglist);
2735 regs_saved=1;
2736 temp=(rs==0)?2:0;
2737 }
2738 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2739 temp2=1;
2740 emit_readword((int)&mem_rtab,temp);
2741 emit_shrimm(rs,12,temp2);
2742 emit_readword_dualindexedx4(temp,temp2,temp2);
2743 emit_lsls_imm(temp2,1,temp2);
2744 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2745 switch(type) {
2746 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2747 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2748 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2749 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2750 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2751 }
2752 }
2753 if(regs_saved) {
2754 restore_jump=(int)out;
2755 emit_jcc(0); // jump to reg restore
2756 }
2757 else
2758 emit_jcc(stubs[n][2]); // return address
2759
2760 if(!regs_saved)
2761 save_regs(reglist);
2762 int handler=0;
2763 if(type==LOADB_STUB||type==LOADBU_STUB)
2764 handler=(int)jump_handler_read8;
2765 if(type==LOADH_STUB||type==LOADHU_STUB)
2766 handler=(int)jump_handler_read16;
2767 if(type==LOADW_STUB)
2768 handler=(int)jump_handler_read32;
2769 assert(handler!=0);
b96d3df7 2770 pass_args(rs,temp2);
c6c3b1b3 2771 int cc=get_reg(i_regmap,CCREG);
2772 if(cc<0)
2773 emit_loadreg(CCREG,2);
2774 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*stubs[n][6]+2,2);
2775 emit_call(handler);
2776 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2777 switch(type) {
2778 case LOADB_STUB: emit_signextend8(0,rt); break;
2779 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2780 case LOADH_STUB: emit_signextend16(0,rt); break;
2781 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2782 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2783 }
2784 }
2785 if(restore_jump)
2786 set_jump_target(restore_jump,(int)out);
2787 restore_regs(reglist);
2788 emit_jmp(stubs[n][2]); // return address
2789#else // !PCSX
57871462 2790 if(addr<0) addr=rt;
535d208a 2791 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2792 assert(addr>=0);
2793 int ftable=0;
2794 if(type==LOADB_STUB||type==LOADBU_STUB)
2795 ftable=(int)readmemb;
2796 if(type==LOADH_STUB||type==LOADHU_STUB)
2797 ftable=(int)readmemh;
2798 if(type==LOADW_STUB)
2799 ftable=(int)readmem;
24385cae 2800#ifndef FORCE32
57871462 2801 if(type==LOADD_STUB)
2802 ftable=(int)readmemd;
24385cae 2803#endif
2804 assert(ftable!=0);
57871462 2805 emit_writeword(rs,(int)&address);
2806 //emit_pusha();
2807 save_regs(reglist);
97a238a6 2808#ifndef PCSX
57871462 2809 ds=i_regs!=&regs[i];
2810 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2811 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2812 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2813 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2814 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2815#endif
57871462 2816 emit_shrimm(rs,16,1);
2817 int cc=get_reg(i_regmap,CCREG);
2818 if(cc<0) {
2819 emit_loadreg(CCREG,2);
2820 }
2821 emit_movimm(ftable,0);
2822 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2823#ifndef PCSX
57871462 2824 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2825#endif
57871462 2826 //emit_readword((int)&last_count,temp);
2827 //emit_add(cc,temp,cc);
2828 //emit_writeword(cc,(int)&Count);
2829 //emit_mov(15,14);
2830 emit_call((int)&indirect_jump_indexed);
2831 //emit_callreg(rs);
2832 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2833#ifndef PCSX
57871462 2834 // We really shouldn't need to update the count here,
2835 // but not doing so causes random crashes...
2836 emit_readword((int)&Count,HOST_TEMPREG);
2837 emit_readword((int)&next_interupt,2);
2838 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2839 emit_writeword(2,(int)&last_count);
2840 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2841 if(cc<0) {
2842 emit_storereg(CCREG,HOST_TEMPREG);
2843 }
f51dc36c 2844#endif
57871462 2845 //emit_popa();
2846 restore_regs(reglist);
2847 //if((cc=get_reg(regmap,CCREG))>=0) {
2848 // emit_loadreg(CCREG,cc);
2849 //}
f18c0f46 2850 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2851 assert(rt>=0);
2852 if(type==LOADB_STUB)
2853 emit_movsbl((int)&readmem_dword,rt);
2854 if(type==LOADBU_STUB)
2855 emit_movzbl((int)&readmem_dword,rt);
2856 if(type==LOADH_STUB)
2857 emit_movswl((int)&readmem_dword,rt);
2858 if(type==LOADHU_STUB)
2859 emit_movzwl((int)&readmem_dword,rt);
2860 if(type==LOADW_STUB)
2861 emit_readword((int)&readmem_dword,rt);
2862 if(type==LOADD_STUB) {
2863 emit_readword((int)&readmem_dword,rt);
2864 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2865 }
57871462 2866 }
2867 emit_jmp(stubs[n][2]); // return address
c6c3b1b3 2868#endif // !PCSX
57871462 2869}
2870
c6c3b1b3 2871#ifdef PCSX
2872// return memhandler, or get directly accessable address and return 0
2873u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2874{
2875 u_int l1,l2=0;
2876 l1=((u_int *)table)[addr>>12];
2877 if((l1&(1<<31))==0) {
2878 u_int v=l1<<1;
2879 *addr_host=v+addr;
2880 return 0;
2881 }
2882 else {
2883 l1<<=1;
2884 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2885 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2886 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2887 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2888 else
2889 l2=((u_int *)l1)[(addr&0xfff)/4];
2890 if((l2&(1<<31))==0) {
2891 u_int v=l2<<1;
2892 *addr_host=v+(addr&0xfff);
2893 return 0;
2894 }
2895 return l2<<1;
2896 }
2897}
2898#endif
2899
57871462 2900inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2901{
2902 int rs=get_reg(regmap,target);
2903 int rth=get_reg(regmap,target|64);
2904 int rt=get_reg(regmap,target);
535d208a 2905 if(rs<0) rs=get_reg(regmap,-1);
57871462 2906 assert(rs>=0);
c6c3b1b3 2907#ifdef PCSX
2908 u_int handler,host_addr=0;
2909 if(pcsx_direct_read(type,addr,target?rs:-1,rt))
2910 return;
2911 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2912 if (handler==0) {
2913 if(rt<0)
2914 return;
2915 if(target==0||addr!=host_addr)
2916 emit_movimm(host_addr,rs);
2917 switch(type) {
2918 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2919 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2920 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2921 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2922 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2923 default: assert(0);
2924 }
2925 return;
2926 }
2927
2928 // call a memhandler
2929 if(rt>=0)
2930 reglist&=~(1<<rt);
2931 save_regs(reglist);
2932 if(target==0)
2933 emit_movimm(addr,0);
2934 else if(rs!=0)
2935 emit_mov(rs,0);
2936 int cc=get_reg(regmap,CCREG);
2937 if(cc<0)
2938 emit_loadreg(CCREG,2);
2939 emit_readword((int)&last_count,3);
2940 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2941 emit_add(2,3,3);
2942 emit_writeword(3,(int)&Count);
2943
2944 int offset=(int)handler-(int)out-8;
2945 if(offset<-33554432||offset>=33554432) {
2946 // unreachable memhandler, a plugin func perhaps
2947 emit_movimm(handler,1);
2948 emit_callreg(1);
2949 }
2950 else
2951 emit_call(handler);
2952 if(rt>=0) {
2953 switch(type) {
2954 case LOADB_STUB: emit_signextend8(0,rt); break;
2955 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2956 case LOADH_STUB: emit_signextend16(0,rt); break;
2957 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2958 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2959 default: assert(0);
2960 }
2961 }
2962 restore_regs(reglist);
2963#else // if !PCSX
57871462 2964 int ftable=0;
2965 if(type==LOADB_STUB||type==LOADBU_STUB)
2966 ftable=(int)readmemb;
2967 if(type==LOADH_STUB||type==LOADHU_STUB)
2968 ftable=(int)readmemh;
2969 if(type==LOADW_STUB)
2970 ftable=(int)readmem;
24385cae 2971#ifndef FORCE32
57871462 2972 if(type==LOADD_STUB)
2973 ftable=(int)readmemd;
24385cae 2974#endif
2975 assert(ftable!=0);
fd99c415 2976 if(target==0)
2977 emit_movimm(addr,rs);
57871462 2978 emit_writeword(rs,(int)&address);
2979 //emit_pusha();
2980 save_regs(reglist);
0c1fe38b 2981#ifndef PCSX
2982 if((signed int)addr>=(signed int)0xC0000000) {
2983 // Theoretically we can have a pagefault here, if the TLB has never
2984 // been enabled and the address is outside the range 80000000..BFFFFFFF
2985 // Write out the registers so the pagefault can be handled. This is
2986 // a very rare case and likely represents a bug.
2987 int ds=regmap!=regs[i].regmap;
2988 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2989 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2990 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2991 }
2992#endif
57871462 2993 //emit_shrimm(rs,16,1);
2994 int cc=get_reg(regmap,CCREG);
2995 if(cc<0) {
2996 emit_loadreg(CCREG,2);
2997 }
2998 //emit_movimm(ftable,0);
2999 emit_movimm(((u_int *)ftable)[addr>>16],0);
3000 //emit_readword((int)&last_count,12);
3001 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 3002#ifndef PCSX
57871462 3003 if((signed int)addr>=(signed int)0xC0000000) {
3004 // Pagefault address
3005 int ds=regmap!=regs[i].regmap;
3006 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3007 }
f51dc36c 3008#endif
57871462 3009 //emit_add(12,2,2);
3010 //emit_writeword(2,(int)&Count);
3011 //emit_call(((u_int *)ftable)[addr>>16]);
3012 emit_call((int)&indirect_jump);
f51dc36c 3013#ifndef PCSX
57871462 3014 // We really shouldn't need to update the count here,
3015 // but not doing so causes random crashes...
3016 emit_readword((int)&Count,HOST_TEMPREG);
3017 emit_readword((int)&next_interupt,2);
3018 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
3019 emit_writeword(2,(int)&last_count);
3020 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3021 if(cc<0) {
3022 emit_storereg(CCREG,HOST_TEMPREG);
3023 }
f51dc36c 3024#endif
57871462 3025 //emit_popa();
3026 restore_regs(reglist);
fd99c415 3027 if(rt>=0) {
3028 if(type==LOADB_STUB)
3029 emit_movsbl((int)&readmem_dword,rt);
3030 if(type==LOADBU_STUB)
3031 emit_movzbl((int)&readmem_dword,rt);
3032 if(type==LOADH_STUB)
3033 emit_movswl((int)&readmem_dword,rt);
3034 if(type==LOADHU_STUB)
3035 emit_movzwl((int)&readmem_dword,rt);
3036 if(type==LOADW_STUB)
3037 emit_readword((int)&readmem_dword,rt);
3038 if(type==LOADD_STUB) {
3039 emit_readword((int)&readmem_dword,rt);
3040 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
3041 }
57871462 3042 }
c6c3b1b3 3043#endif // !PCSX
57871462 3044}
3045
3046do_writestub(int n)
3047{
3048 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
3049 literal_pool(256);
3050 set_jump_target(stubs[n][1],(int)out);
3051 int type=stubs[n][0];
3052 int i=stubs[n][3];
3053 int rs=stubs[n][4];
3054 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3055 u_int reglist=stubs[n][7];
3056 signed char *i_regmap=i_regs->regmap;
3057 int addr=get_reg(i_regmap,AGEN1+(i&1));
3058 int rth,rt,r;
3059 int ds;
b9b61529 3060 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 3061 rth=get_reg(i_regmap,FTEMP|64);
3062 rt=get_reg(i_regmap,r=FTEMP);
3063 }else{
3064 rth=get_reg(i_regmap,rs2[i]|64);
3065 rt=get_reg(i_regmap,r=rs2[i]);
3066 }
3067 assert(rs>=0);
3068 assert(rt>=0);
b96d3df7 3069#ifdef PCSX
3070 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
3071 int reglist2=reglist|(1<<rs)|(1<<rt);
3072 for(rtmp=0;rtmp<=12;rtmp++) {
3073 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
3074 temp=rtmp; break;
3075 }
3076 }
3077 if(temp==-1) {
3078 save_regs(reglist);
3079 regs_saved=1;
3080 for(rtmp=0;rtmp<=3;rtmp++)
3081 if(rtmp!=rs&&rtmp!=rt)
3082 {temp=rtmp;break;}
3083 }
3084 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
3085 temp2=3;
3086 emit_readword((int)&mem_wtab,temp);
3087 emit_shrimm(rs,12,temp2);
3088 emit_readword_dualindexedx4(temp,temp2,temp2);
3089 emit_lsls_imm(temp2,1,temp2);
3090 switch(type) {
3091 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
3092 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
3093 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
3094 default: assert(0);
3095 }
3096 if(regs_saved) {
3097 restore_jump=(int)out;
3098 emit_jcc(0); // jump to reg restore
3099 }
3100 else
3101 emit_jcc(stubs[n][2]); // return address (invcode check)
3102
3103 if(!regs_saved)
3104 save_regs(reglist);
3105 int handler=0;
3106 switch(type) {
3107 case STOREB_STUB: handler=(int)jump_handler_write8; break;
3108 case STOREH_STUB: handler=(int)jump_handler_write16; break;
3109 case STOREW_STUB: handler=(int)jump_handler_write32; break;
3110 }
3111 assert(handler!=0);
3112 pass_args(rs,rt);
3113 if(temp2!=3)
3114 emit_mov(temp2,3);
3115 int cc=get_reg(i_regmap,CCREG);
3116 if(cc<0)
3117 emit_loadreg(CCREG,2);
3118 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*stubs[n][6]+2,2);
3119 // returns new cycle_count
3120 emit_call(handler);
3121 emit_addimm(0,-CLOCK_DIVIDER*stubs[n][6]-2,cc<0?2:cc);
3122 if(cc<0)
3123 emit_storereg(CCREG,2);
3124 if(restore_jump)
3125 set_jump_target(restore_jump,(int)out);
3126 restore_regs(reglist);
3127 ra=stubs[n][2];
3128 if(!restore_jump) ra+=4*3; // skip invcode check
3129 emit_jmp(ra);
3130#else // if !PCSX
57871462 3131 if(addr<0) addr=get_reg(i_regmap,-1);
3132 assert(addr>=0);
3133 int ftable=0;
3134 if(type==STOREB_STUB)
3135 ftable=(int)writememb;
3136 if(type==STOREH_STUB)
3137 ftable=(int)writememh;
3138 if(type==STOREW_STUB)
3139 ftable=(int)writemem;
24385cae 3140#ifndef FORCE32
57871462 3141 if(type==STORED_STUB)
3142 ftable=(int)writememd;
24385cae 3143#endif
3144 assert(ftable!=0);
57871462 3145 emit_writeword(rs,(int)&address);
3146 //emit_shrimm(rs,16,rs);
3147 //emit_movmem_indexedx4(ftable,rs,rs);
3148 if(type==STOREB_STUB)
3149 emit_writebyte(rt,(int)&byte);
3150 if(type==STOREH_STUB)
3151 emit_writehword(rt,(int)&hword);
3152 if(type==STOREW_STUB)
3153 emit_writeword(rt,(int)&word);
3154 if(type==STORED_STUB) {
3d624f89 3155#ifndef FORCE32
57871462 3156 emit_writeword(rt,(int)&dword);
3157 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 3158#else
3159 printf("STORED_STUB\n");
3160#endif
57871462 3161 }
3162 //emit_pusha();
3163 save_regs(reglist);
97a238a6 3164#ifndef PCSX
57871462 3165 ds=i_regs!=&regs[i];
3166 int real_rs=get_reg(i_regmap,rs1[i]);
3167 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3168 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3169 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3170 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3171#endif
57871462 3172 emit_shrimm(rs,16,1);
3173 int cc=get_reg(i_regmap,CCREG);
3174 if(cc<0) {
3175 emit_loadreg(CCREG,2);
3176 }
3177 emit_movimm(ftable,0);
3178 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3179#ifndef PCSX
57871462 3180 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 3181#endif
57871462 3182 //emit_readword((int)&last_count,temp);
3183 //emit_addimm(cc,2*stubs[n][5]+2,cc);
3184 //emit_add(cc,temp,cc);
3185 //emit_writeword(cc,(int)&Count);
3186 emit_call((int)&indirect_jump_indexed);
3187 //emit_callreg(rs);
3188 emit_readword((int)&Count,HOST_TEMPREG);
3189 emit_readword((int)&next_interupt,2);
3190 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3191 emit_writeword(2,(int)&last_count);
3192 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3193 if(cc<0) {
3194 emit_storereg(CCREG,HOST_TEMPREG);
3195 }
3196 //emit_popa();
3197 restore_regs(reglist);
3198 //if((cc=get_reg(regmap,CCREG))>=0) {
3199 // emit_loadreg(CCREG,cc);
3200 //}
3201 emit_jmp(stubs[n][2]); // return address
b96d3df7 3202#endif // !PCSX
57871462 3203}
3204
3205inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3206{
3207 int rs=get_reg(regmap,-1);
3208 int rth=get_reg(regmap,target|64);
3209 int rt=get_reg(regmap,target);
3210 assert(rs>=0);
3211 assert(rt>=0);
cbbab9cd 3212#ifdef PCSX
b96d3df7 3213 u_int handler,host_addr=0;
cbbab9cd 3214 if(pcsx_direct_write(type,addr,rs,rt,regmap))
3215 return;
b96d3df7 3216 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
3217 if (handler==0) {
3218 if(target==0||addr!=host_addr)
3219 emit_movimm(host_addr,rs);
3220 switch(type) {
3221 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
3222 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
3223 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
3224 default: assert(0);
3225 }
3226 return;
3227 }
3228
3229 // call a memhandler
3230 save_regs(reglist);
3231 pass_args(target!=0?rs:-1,rt);
3232 if(target==0)
3233 emit_movimm(addr,0);
3234 int cc=get_reg(regmap,CCREG);
3235 if(cc<0)
3236 emit_loadreg(CCREG,2);
3237 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
3238 emit_movimm(handler,3);
3239 // returns new cycle_count
3240 emit_call((int)jump_handler_write_h);
3241 emit_addimm(0,-CLOCK_DIVIDER*(adj+1),cc<0?2:cc);
3242 if(cc<0)
3243 emit_storereg(CCREG,2);
3244 restore_regs(reglist);
3245#else // if !pcsx
57871462 3246 int ftable=0;
3247 if(type==STOREB_STUB)
3248 ftable=(int)writememb;
3249 if(type==STOREH_STUB)
3250 ftable=(int)writememh;
3251 if(type==STOREW_STUB)
3252 ftable=(int)writemem;
24385cae 3253#ifndef FORCE32
57871462 3254 if(type==STORED_STUB)
3255 ftable=(int)writememd;
24385cae 3256#endif
3257 assert(ftable!=0);
57871462 3258 emit_writeword(rs,(int)&address);
3259 //emit_shrimm(rs,16,rs);
3260 //emit_movmem_indexedx4(ftable,rs,rs);
3261 if(type==STOREB_STUB)
3262 emit_writebyte(rt,(int)&byte);
3263 if(type==STOREH_STUB)
3264 emit_writehword(rt,(int)&hword);
3265 if(type==STOREW_STUB)
3266 emit_writeword(rt,(int)&word);
3267 if(type==STORED_STUB) {
3d624f89 3268#ifndef FORCE32
57871462 3269 emit_writeword(rt,(int)&dword);
3270 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 3271#else
3272 printf("STORED_STUB\n");
3273#endif
57871462 3274 }
3275 //emit_pusha();
3276 save_regs(reglist);
0c1fe38b 3277#ifndef PCSX
3278 // rearmed note: load_all_consts prevents BIOS boot, some bug?
3279 if((signed int)addr>=(signed int)0xC0000000) {
3280 // Theoretically we can have a pagefault here, if the TLB has never
3281 // been enabled and the address is outside the range 80000000..BFFFFFFF
3282 // Write out the registers so the pagefault can be handled. This is
3283 // a very rare case and likely represents a bug.
3284 int ds=regmap!=regs[i].regmap;
3285 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3286 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3287 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3288 }
3289#endif
57871462 3290 //emit_shrimm(rs,16,1);
3291 int cc=get_reg(regmap,CCREG);
3292 if(cc<0) {
3293 emit_loadreg(CCREG,2);
3294 }
3295 //emit_movimm(ftable,0);
3296 emit_movimm(((u_int *)ftable)[addr>>16],0);
3297 //emit_readword((int)&last_count,12);
3298 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 3299#ifndef PCSX
57871462 3300 if((signed int)addr>=(signed int)0xC0000000) {
3301 // Pagefault address
3302 int ds=regmap!=regs[i].regmap;
3303 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3304 }
f51dc36c 3305#endif
57871462 3306 //emit_add(12,2,2);
3307 //emit_writeword(2,(int)&Count);
3308 //emit_call(((u_int *)ftable)[addr>>16]);
3309 emit_call((int)&indirect_jump);
3310 emit_readword((int)&Count,HOST_TEMPREG);
3311 emit_readword((int)&next_interupt,2);
3312 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
3313 emit_writeword(2,(int)&last_count);
3314 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3315 if(cc<0) {
3316 emit_storereg(CCREG,HOST_TEMPREG);
3317 }
3318 //emit_popa();
3319 restore_regs(reglist);
b96d3df7 3320#endif
57871462 3321}
3322
3323do_unalignedwritestub(int n)
3324{
b7918751 3325 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3326 literal_pool(256);
57871462 3327 set_jump_target(stubs[n][1],(int)out);
b7918751 3328
3329 int i=stubs[n][3];
3330 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3331 int addr=stubs[n][5];
3332 u_int reglist=stubs[n][7];
3333 signed char *i_regmap=i_regs->regmap;
3334 int temp2=get_reg(i_regmap,FTEMP);
3335 int rt;
3336 int ds, real_rs;
3337 rt=get_reg(i_regmap,rs2[i]);
3338 assert(rt>=0);
3339 assert(addr>=0);
3340 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3341 reglist|=(1<<addr);
3342 reglist&=~(1<<temp2);
3343
b96d3df7 3344#if 1
3345 // don't bother with it and call write handler
3346 save_regs(reglist);
3347 pass_args(addr,rt);
3348 int cc=get_reg(i_regmap,CCREG);
3349 if(cc<0)
3350 emit_loadreg(CCREG,2);
3351 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*stubs[n][6]+2,2);
3352 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
3353 emit_addimm(0,-CLOCK_DIVIDER*stubs[n][6]-2,cc<0?2:cc);
3354 if(cc<0)
3355 emit_storereg(CCREG,2);
3356 restore_regs(reglist);
3357 emit_jmp(stubs[n][2]); // return address
3358#else
b7918751 3359 emit_andimm(addr,0xfffffffc,temp2);
3360 emit_writeword(temp2,(int)&address);
3361
3362 save_regs(reglist);
97a238a6 3363#ifndef PCSX
b7918751 3364 ds=i_regs!=&regs[i];
3365 real_rs=get_reg(i_regmap,rs1[i]);
3366 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3367 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3368 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3369 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3370#endif
b7918751 3371 emit_shrimm(addr,16,1);
3372 int cc=get_reg(i_regmap,CCREG);
3373 if(cc<0) {
3374 emit_loadreg(CCREG,2);
3375 }
3376 emit_movimm((u_int)readmem,0);
3377 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3378#ifndef PCSX
3379 // pagefault address
3380 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3381#endif
b7918751 3382 emit_call((int)&indirect_jump_indexed);
3383 restore_regs(reglist);
3384
3385 emit_readword((int)&readmem_dword,temp2);
3386 int temp=addr; //hmh
3387 emit_shlimm(addr,3,temp);
3388 emit_andimm(temp,24,temp);
3389#ifdef BIG_ENDIAN_MIPS
3390 if (opcode[i]==0x2e) // SWR
3391#else
3392 if (opcode[i]==0x2a) // SWL
3393#endif
3394 emit_xorimm(temp,24,temp);
3395 emit_movimm(-1,HOST_TEMPREG);
55439448 3396 if (opcode[i]==0x2a) { // SWL
b7918751 3397 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3398 emit_orrshr(rt,temp,temp2);
3399 }else{
3400 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3401 emit_orrshl(rt,temp,temp2);
3402 }
3403 emit_readword((int)&address,addr);
3404 emit_writeword(temp2,(int)&word);
3405 //save_regs(reglist); // don't need to, no state changes
3406 emit_shrimm(addr,16,1);
3407 emit_movimm((u_int)writemem,0);
3408 //emit_call((int)&indirect_jump_indexed);
3409 emit_mov(15,14);
3410 emit_readword_dualindexedx4(0,1,15);
3411 emit_readword((int)&Count,HOST_TEMPREG);
3412 emit_readword((int)&next_interupt,2);
3413 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3414 emit_writeword(2,(int)&last_count);
3415 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3416 if(cc<0) {
3417 emit_storereg(CCREG,HOST_TEMPREG);
3418 }
3419 restore_regs(reglist);
57871462 3420 emit_jmp(stubs[n][2]); // return address
b96d3df7 3421#endif
57871462 3422}
3423
3424void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3425{
3426 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3427}
3428
3429do_invstub(int n)
3430{
3431 literal_pool(20);
3432 u_int reglist=stubs[n][3];
3433 set_jump_target(stubs[n][1],(int)out);
3434 save_regs(reglist);
3435 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3436 emit_call((int)&invalidate_addr);
3437 restore_regs(reglist);
3438 emit_jmp(stubs[n][2]); // return address
3439}
3440
3441int do_dirty_stub(int i)
3442{
3443 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3444 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3445 #ifdef PCSX
3446 addr=(u_int)source;
3447 #endif
57871462 3448 // Careful about the code output here, verify_dirty needs to parse it.
3449 #ifdef ARMv5_ONLY
ac545b3a 3450 emit_loadlp(addr,1);
57871462 3451 emit_loadlp((int)copy,2);
3452 emit_loadlp(slen*4,3);
3453 #else
ac545b3a 3454 emit_movw(addr&0x0000FFFF,1);
57871462 3455 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3456 emit_movt(addr&0xFFFF0000,1);
57871462 3457 emit_movt(((u_int)copy)&0xFFFF0000,2);
3458 emit_movw(slen*4,3);
3459 #endif
3460 emit_movimm(start+i*4,0);
3461 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3462 int entry=(int)out;
3463 load_regs_entry(i);
3464 if(entry==(int)out) entry=instr_addr[i];
3465 emit_jmp(instr_addr[i]);
3466 return entry;
3467}
3468
3469void do_dirty_stub_ds()
3470{
3471 // Careful about the code output here, verify_dirty needs to parse it.
3472 #ifdef ARMv5_ONLY
3473 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3474 emit_loadlp((int)copy,2);
3475 emit_loadlp(slen*4,3);
3476 #else
3477 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3478 emit_movw(((u_int)copy)&0x0000FFFF,2);
3479 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3480 emit_movt(((u_int)copy)&0xFFFF0000,2);
3481 emit_movw(slen*4,3);
3482 #endif
3483 emit_movimm(start+1,0);
3484 emit_call((int)&verify_code_ds);
3485}
3486
3487do_cop1stub(int n)
3488{
3489 literal_pool(256);
3490 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3491 set_jump_target(stubs[n][1],(int)out);
3492 int i=stubs[n][3];
3d624f89 3493// int rs=stubs[n][4];
57871462 3494 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3495 int ds=stubs[n][6];
3496 if(!ds) {
3497 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3498 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3499 }
3500 //else {printf("fp exception in delay slot\n");}
3501 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3502 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3503 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3504 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3505 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3506}
3507
3508/* TLB */
3509
3510int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3511{
3512 if(c) {
3513 if((signed int)addr>=(signed int)0xC0000000) {
3514 // address_generation already loaded the const
3515 emit_readword_dualindexedx4(FP,map,map);
3516 }
3517 else
3518 return -1; // No mapping
3519 }
3520 else {
3521 assert(s!=map);
3522 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3523 emit_addsr12(map,s,map);
3524 // Schedule this while we wait on the load
3525 //if(x) emit_xorimm(s,x,ar);
3526 if(shift>=0) emit_shlimm(s,3,shift);
3527 if(~a) emit_andimm(s,a,ar);
3528 emit_readword_dualindexedx4(FP,map,map);
3529 }
3530 return map;
3531}
3532int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3533{
3534 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3535 emit_test(map,map);
3536 *jaddr=(int)out;
3537 emit_js(0);
3538 }
3539 return map;
3540}
3541
3542int gen_tlb_addr_r(int ar, int map) {
3543 if(map>=0) {
3544 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3545 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3546 }
3547}
3548
3549int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3550{
3551 if(c) {
3552 if(addr<0x80800000||addr>=0xC0000000) {
3553 // address_generation already loaded the const
3554 emit_readword_dualindexedx4(FP,map,map);
3555 }
3556 else
3557 return -1; // No mapping
3558 }
3559 else {
3560 assert(s!=map);
3561 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3562 emit_addsr12(map,s,map);
3563 // Schedule this while we wait on the load
3564 //if(x) emit_xorimm(s,x,ar);
3565 emit_readword_dualindexedx4(FP,map,map);
3566 }
3567 return map;
3568}
3569int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3570{
3571 if(!c||addr<0x80800000||addr>=0xC0000000) {
3572 emit_testimm(map,0x40000000);
3573 *jaddr=(int)out;
3574 emit_jne(0);
3575 }
3576}
3577
3578int gen_tlb_addr_w(int ar, int map) {
3579 if(map>=0) {
3580 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3581 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3582 }
3583}
3584
3585// Generate the address of the memory_map entry, relative to dynarec_local
3586generate_map_const(u_int addr,int reg) {
3587 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3588 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3589}
3590
3591/* Special assem */
3592
3593void shift_assemble_arm(int i,struct regstat *i_regs)
3594{
3595 if(rt1[i]) {
3596 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3597 {
3598 signed char s,t,shift;
3599 t=get_reg(i_regs->regmap,rt1[i]);
3600 s=get_reg(i_regs->regmap,rs1[i]);
3601 shift=get_reg(i_regs->regmap,rs2[i]);
3602 if(t>=0){
3603 if(rs1[i]==0)
3604 {
3605 emit_zeroreg(t);
3606 }
3607 else if(rs2[i]==0)
3608 {
3609 assert(s>=0);
3610 if(s!=t) emit_mov(s,t);
3611 }
3612 else
3613 {
3614 emit_andimm(shift,31,HOST_TEMPREG);
3615 if(opcode2[i]==4) // SLLV
3616 {
3617 emit_shl(s,HOST_TEMPREG,t);
3618 }
3619 if(opcode2[i]==6) // SRLV
3620 {
3621 emit_shr(s,HOST_TEMPREG,t);
3622 }
3623 if(opcode2[i]==7) // SRAV
3624 {
3625 emit_sar(s,HOST_TEMPREG,t);
3626 }
3627 }
3628 }
3629 } else { // DSLLV/DSRLV/DSRAV
3630 signed char sh,sl,th,tl,shift;
3631 th=get_reg(i_regs->regmap,rt1[i]|64);
3632 tl=get_reg(i_regs->regmap,rt1[i]);
3633 sh=get_reg(i_regs->regmap,rs1[i]|64);
3634 sl=get_reg(i_regs->regmap,rs1[i]);
3635 shift=get_reg(i_regs->regmap,rs2[i]);
3636 if(tl>=0){
3637 if(rs1[i]==0)
3638 {
3639 emit_zeroreg(tl);
3640 if(th>=0) emit_zeroreg(th);
3641 }
3642 else if(rs2[i]==0)
3643 {
3644 assert(sl>=0);
3645 if(sl!=tl) emit_mov(sl,tl);
3646 if(th>=0&&sh!=th) emit_mov(sh,th);
3647 }
3648 else
3649 {
3650 // FIXME: What if shift==tl ?
3651 assert(shift!=tl);
3652 int temp=get_reg(i_regs->regmap,-1);
3653 int real_th=th;
3654 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3655 assert(sl>=0);
3656 assert(sh>=0);
3657 emit_andimm(shift,31,HOST_TEMPREG);
3658 if(opcode2[i]==0x14) // DSLLV
3659 {
3660 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3661 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3662 emit_orrshr(sl,HOST_TEMPREG,th);
3663 emit_andimm(shift,31,HOST_TEMPREG);
3664 emit_testimm(shift,32);
3665 emit_shl(sl,HOST_TEMPREG,tl);
3666 if(th>=0) emit_cmovne_reg(tl,th);
3667 emit_cmovne_imm(0,tl);
3668 }
3669 if(opcode2[i]==0x16) // DSRLV
3670 {
3671 assert(th>=0);
3672 emit_shr(sl,HOST_TEMPREG,tl);
3673 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3674 emit_orrshl(sh,HOST_TEMPREG,tl);
3675 emit_andimm(shift,31,HOST_TEMPREG);
3676 emit_testimm(shift,32);
3677 emit_shr(sh,HOST_TEMPREG,th);
3678 emit_cmovne_reg(th,tl);
3679 if(real_th>=0) emit_cmovne_imm(0,th);
3680 }
3681 if(opcode2[i]==0x17) // DSRAV
3682 {
3683 assert(th>=0);
3684 emit_shr(sl,HOST_TEMPREG,tl);
3685 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3686 if(real_th>=0) {
3687 assert(temp>=0);
3688 emit_sarimm(th,31,temp);
3689 }
3690 emit_orrshl(sh,HOST_TEMPREG,tl);
3691 emit_andimm(shift,31,HOST_TEMPREG);
3692 emit_testimm(shift,32);
3693 emit_sar(sh,HOST_TEMPREG,th);
3694 emit_cmovne_reg(th,tl);
3695 if(real_th>=0) emit_cmovne_reg(temp,th);
3696 }
3697 }
3698 }
3699 }
3700 }
3701}
ffb0b9e0 3702
3703#ifdef PCSX
3704static void speculate_mov(int rs,int rt)
3705{
3706 if(rt!=0) {
3707 smrv_strong_next|=1<<rt;
3708 smrv[rt]=smrv[rs];
3709 }
3710}
3711
3712static void speculate_mov_weak(int rs,int rt)
3713{
3714 if(rt!=0) {
3715 smrv_weak_next|=1<<rt;
3716 smrv[rt]=smrv[rs];
3717 }
3718}
3719
3720static void speculate_register_values(int i)
3721{
3722 if(i==0) {
3723 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3724 // gp,sp are likely to stay the same throughout the block
3725 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3726 smrv_weak_next=~smrv_strong_next;
3727 //printf(" llr %08x\n", smrv[4]);
3728 }
3729 smrv_strong=smrv_strong_next;
3730 smrv_weak=smrv_weak_next;
3731 switch(itype[i]) {
3732 case ALU:
3733 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3734 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3735 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3736 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3737 else {
3738 smrv_strong_next&=~(1<<rt1[i]);
3739 smrv_weak_next&=~(1<<rt1[i]);
3740 }
3741 break;
3742 case SHIFTIMM:
3743 smrv_strong_next&=~(1<<rt1[i]);
3744 smrv_weak_next&=~(1<<rt1[i]);
3745 // fallthrough
3746 case IMM16:
3747 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3748 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3749 if(hr>=0) {
3750 if(get_final_value(hr,i,&value))
3751 smrv[rt1[i]]=value;
3752 else smrv[rt1[i]]=constmap[i][hr];
3753 smrv_strong_next|=1<<rt1[i];
3754 }
3755 }
3756 else {
3757 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3758 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3759 }
3760 break;
3761 case LOAD:
3762 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3763 // special case for BIOS
3764 smrv[rt1[i]]=0xa0000000;
3765 smrv_strong_next|=1<<rt1[i];
3766 break;
3767 }
3768 // fallthrough
3769 case SHIFT:
3770 case LOADLR:
3771 case MOV:
3772 smrv_strong_next&=~(1<<rt1[i]);
3773 smrv_weak_next&=~(1<<rt1[i]);
3774 break;
3775 case COP0:
3776 case COP2:
3777 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3778 smrv_strong_next&=~(1<<rt1[i]);
3779 smrv_weak_next&=~(1<<rt1[i]);
3780 }
3781 break;
3782 case C2LS:
3783 if (opcode[i]==0x32) { // LWC2
3784 smrv_strong_next&=~(1<<rt1[i]);
3785 smrv_weak_next&=~(1<<rt1[i]);
3786 }
3787 break;
3788 }
3789#if 0
3790 int r=4;
3791 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3792 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3793#endif
3794}
3795
3796enum {
3797 MTYPE_8000 = 0,
3798 MTYPE_8020,
3799 MTYPE_0000,
3800 MTYPE_A000,
3801 MTYPE_1F80,
3802};
3803
3804static int get_ptr_mem_type(u_int a)
3805{
3806 if(a < 0x00200000) {
3807 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3808 // return wrong, must use memhandler for BIOS self-test to pass
3809 // 007 does similar stuff from a00 mirror, weird stuff
3810 return MTYPE_8000;
3811 return MTYPE_0000;
3812 }
3813 if(0x1f800000 <= a && a < 0x1f801000)
3814 return MTYPE_1F80;
3815 if(0x80200000 <= a && a < 0x80800000)
3816 return MTYPE_8020;
3817 if(0xa0000000 <= a && a < 0xa0200000)
3818 return MTYPE_A000;
3819 return MTYPE_8000;
3820}
3821#endif
3822
3823static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3824{
3825 int jaddr,type=0;
3826
3827#ifdef PCSX
3828 int mr=rs1[i];
3829 if(((smrv_strong|smrv_weak)>>mr)&1) {
3830 type=get_ptr_mem_type(smrv[mr]);
3831 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3832 }
3833 else {
3834 // use the mirror we are running on
3835 type=get_ptr_mem_type(start);
3836 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3837 }
3838
3839 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3840 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3841 addr=*addr_reg_override=HOST_TEMPREG;
3842 type=0;
3843 }
3844 else if(type==MTYPE_0000) { // RAM 0 mirror
3845 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3846 addr=*addr_reg_override=HOST_TEMPREG;
3847 type=0;
3848 }
3849 else if(type==MTYPE_A000) { // RAM A mirror
3850 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3851 addr=*addr_reg_override=HOST_TEMPREG;
3852 type=0;
3853 }
3854 else if(type==MTYPE_1F80) { // scratchpad
3855 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3856 emit_cmpimm(HOST_TEMPREG,0x1000);
3857 jaddr=(int)out;
3858 emit_jc(0);
3859 }
3860#endif
3861
3862 if(type==0)
3863 {
3864 emit_cmpimm(addr,RAM_SIZE);
3865 jaddr=(int)out;
3866 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3867 // Hint to branch predictor that the branch is unlikely to be taken
3868 if(rs1[i]>=28)
3869 emit_jno_unlikely(0);
3870 else
3871 #endif
3872 emit_jno(0);
3873 }
3874
3875 return jaddr;
3876}
3877
57871462 3878#define shift_assemble shift_assemble_arm
3879
3880void loadlr_assemble_arm(int i,struct regstat *i_regs)
3881{
3882 int s,th,tl,temp,temp2,addr,map=-1;
3883 int offset;
3884 int jaddr=0;
af4ee1fe 3885 int memtarget=0,c=0;
ffb0b9e0 3886 int fastload_reg_override=0;
57871462 3887 u_int hr,reglist=0;
3888 th=get_reg(i_regs->regmap,rt1[i]|64);
3889 tl=get_reg(i_regs->regmap,rt1[i]);
3890 s=get_reg(i_regs->regmap,rs1[i]);
3891 temp=get_reg(i_regs->regmap,-1);
3892 temp2=get_reg(i_regs->regmap,FTEMP);
3893 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3894 assert(addr<0);
3895 offset=imm[i];
3896 for(hr=0;hr<HOST_REGS;hr++) {
3897 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3898 }
3899 reglist|=1<<temp;
3900 if(offset||s<0||c) addr=temp2;
3901 else addr=s;
3902 if(s>=0) {
3903 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3904 if(c) {
3905 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3906 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3907 }
57871462 3908 }
535d208a 3909 if(!using_tlb) {
3910 if(!c) {
3911 #ifdef RAM_OFFSET
3912 map=get_reg(i_regs->regmap,ROREG);
3913 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3914 #endif
3915 emit_shlimm(addr,3,temp);
3916 if (opcode[i]==0x22||opcode[i]==0x26) {
3917 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 3918 }else{
535d208a 3919 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 3920 }
ffb0b9e0 3921 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
535d208a 3922 }
3923 else {
3924 if (opcode[i]==0x22||opcode[i]==0x26) {
3925 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3926 }else{
3927 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3928 }
57871462 3929 }
535d208a 3930 }else{ // using tlb
3931 int a;
3932 if(c) {
3933 a=-1;
3934 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3935 a=0xFFFFFFFC; // LWL/LWR
3936 }else{
3937 a=0xFFFFFFF8; // LDL/LDR
3938 }
3939 map=get_reg(i_regs->regmap,TLREG);
3940 assert(map>=0);
ea3d2e6e 3941 reglist&=~(1<<map);
535d208a 3942 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3943 if(c) {
3944 if (opcode[i]==0x22||opcode[i]==0x26) {
3945 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3946 }else{
3947 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3948 }
535d208a 3949 }
3950 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3951 }
3952 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3953 if(!c||memtarget) {
ffb0b9e0 3954 int a=temp2;
3955 if(fastload_reg_override) a=fastload_reg_override;
535d208a 3956 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 3957 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 3958 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3959 }
3960 else
3961 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3962 if(rt1[i]) {
3963 assert(tl>=0);
57871462 3964 emit_andimm(temp,24,temp);
2002a1db 3965#ifdef BIG_ENDIAN_MIPS
3966 if (opcode[i]==0x26) // LWR
3967#else
3968 if (opcode[i]==0x22) // LWL
3969#endif
3970 emit_xorimm(temp,24,temp);
57871462 3971 emit_movimm(-1,HOST_TEMPREG);
3972 if (opcode[i]==0x26) {
3973 emit_shr(temp2,temp,temp2);
3974 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3975 }else{
3976 emit_shl(temp2,temp,temp2);
3977 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3978 }
3979 emit_or(temp2,tl,tl);
57871462 3980 }
535d208a 3981 //emit_storereg(rt1[i],tl); // DEBUG
3982 }
3983 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 3984 // FIXME: little endian, fastload_reg_override
535d208a 3985 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3986 if(!c||memtarget) {
3987 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3988 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3989 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3990 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3991 }
3992 else
3993 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3994 if(rt1[i]) {
3995 assert(th>=0);
3996 assert(tl>=0);
57871462 3997 emit_testimm(temp,32);
3998 emit_andimm(temp,24,temp);
3999 if (opcode[i]==0x1A) { // LDL
4000 emit_rsbimm(temp,32,HOST_TEMPREG);
4001 emit_shl(temp2h,temp,temp2h);
4002 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
4003 emit_movimm(-1,HOST_TEMPREG);
4004 emit_shl(temp2,temp,temp2);
4005 emit_cmove_reg(temp2h,th);
4006 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
4007 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
4008 emit_orreq(temp2,tl,tl);
4009 emit_orrne(temp2,th,th);
4010 }
4011 if (opcode[i]==0x1B) { // LDR
4012 emit_xorimm(temp,24,temp);
4013 emit_rsbimm(temp,32,HOST_TEMPREG);
4014 emit_shr(temp2,temp,temp2);
4015 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
4016 emit_movimm(-1,HOST_TEMPREG);
4017 emit_shr(temp2h,temp,temp2h);
4018 emit_cmovne_reg(temp2,tl);
4019 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
4020 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
4021 emit_orrne(temp2h,th,th);
4022 emit_orreq(temp2h,tl,tl);
4023 }
4024 }
4025 }
4026}
4027#define loadlr_assemble loadlr_assemble_arm
4028
4029void cop0_assemble(int i,struct regstat *i_regs)
4030{
4031 if(opcode2[i]==0) // MFC0
4032 {
4033 signed char t=get_reg(i_regs->regmap,rt1[i]);
4034 char copr=(source[i]>>11)&0x1f;
4035 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 4036 if(t>=0&&rt1[i]!=0) {
7139f3c8 4037#ifdef MUPEN64
57871462 4038 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4039 emit_movimm((source[i]>>11)&0x1f,1);
4040 emit_writeword(0,(int)&PC);
4041 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
4042 if(copr==9) {
4043 emit_readword((int)&last_count,ECX);
4044 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
4045 emit_add(HOST_CCREG,ECX,HOST_CCREG);
4046 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
4047 emit_writeword(HOST_CCREG,(int)&Count);
4048 }
4049 emit_call((int)MFC0);
4050 emit_readword((int)&readmem_dword,t);
7139f3c8 4051#else
4052 emit_readword((int)&reg_cop0+copr*4,t);
4053#endif
57871462 4054 }
4055 }
4056 else if(opcode2[i]==4) // MTC0
4057 {
4058 signed char s=get_reg(i_regs->regmap,rs1[i]);
4059 char copr=(source[i]>>11)&0x1f;
4060 assert(s>=0);
4061 emit_writeword(s,(int)&readmem_dword);
4062 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
fca1aef2 4063#ifdef MUPEN64
57871462 4064 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4065 emit_movimm((source[i]>>11)&0x1f,1);
4066 emit_writeword(0,(int)&PC);
4067 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
7139f3c8 4068#endif
4069 if(copr==9||copr==11||copr==12||copr==13) {
57871462 4070 emit_readword((int)&last_count,ECX);
4071 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
4072 emit_add(HOST_CCREG,ECX,HOST_CCREG);
4073 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
4074 emit_writeword(HOST_CCREG,(int)&Count);
4075 }
4076 // What a mess. The status register (12) can enable interrupts,
4077 // so needs a special case to handle a pending interrupt.
4078 // The interrupt must be taken immediately, because a subsequent
4079 // instruction might disable interrupts again.
7139f3c8 4080 if(copr==12||copr==13) {
fca1aef2 4081#ifdef PCSX
4082 if (is_delayslot) {
4083 // burn cycles to cause cc_interrupt, which will
4084 // reschedule next_interupt. Relies on CCREG from above.
4085 assem_debug("MTC0 DS %d\n", copr);
4086 emit_writeword(HOST_CCREG,(int)&last_count);
4087 emit_movimm(0,HOST_CCREG);
4088 emit_storereg(CCREG,HOST_CCREG);
4089 emit_movimm(copr,0);
4090 emit_call((int)pcsx_mtc0_ds);
4091 return;
4092 }
4093#endif
57871462 4094 emit_movimm(start+i*4+4,0);
4095 emit_movimm(0,1);
4096 emit_writeword(0,(int)&pcaddr);
4097 emit_writeword(1,(int)&pending_exception);
4098 }
4099 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
4100 //else
fca1aef2 4101#ifdef PCSX
4102 emit_movimm(copr,0);
4103 emit_call((int)pcsx_mtc0);
4104#else
57871462 4105 emit_call((int)MTC0);
fca1aef2 4106#endif
7139f3c8 4107 if(copr==9||copr==11||copr==12||copr==13) {
57871462 4108 emit_readword((int)&Count,HOST_CCREG);
4109 emit_readword((int)&next_interupt,ECX);
4110 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
4111 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
4112 emit_writeword(ECX,(int)&last_count);
4113 emit_storereg(CCREG,HOST_CCREG);
4114 }
7139f3c8 4115 if(copr==12||copr==13) {
57871462 4116 assert(!is_delayslot);
4117 emit_readword((int)&pending_exception,14);
4118 }
4119 emit_loadreg(rs1[i],s);
4120 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
4121 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 4122 if(copr==12||copr==13) {
57871462 4123 emit_test(14,14);
4124 emit_jne((int)&do_interrupt);
4125 }
4126 cop1_usable=0;
4127 }
4128 else
4129 {
4130 assert(opcode2[i]==0x10);
3d624f89 4131#ifndef DISABLE_TLB
57871462 4132 if((source[i]&0x3f)==0x01) // TLBR
4133 emit_call((int)TLBR);
4134 if((source[i]&0x3f)==0x02) // TLBWI
4135 emit_call((int)TLBWI_new);
4136 if((source[i]&0x3f)==0x06) { // TLBWR
4137 // The TLB entry written by TLBWR is dependent on the count,
4138 // so update the cycle count
4139 emit_readword((int)&last_count,ECX);
4140 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
4141 emit_add(HOST_CCREG,ECX,HOST_CCREG);
4142 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
4143 emit_writeword(HOST_CCREG,(int)&Count);
4144 emit_call((int)TLBWR_new);
4145 }
4146 if((source[i]&0x3f)==0x08) // TLBP
4147 emit_call((int)TLBP);
3d624f89 4148#endif
576bbd8f 4149#ifdef PCSX
4150 if((source[i]&0x3f)==0x10) // RFE
4151 {
4152 emit_readword((int)&Status,0);
4153 emit_andimm(0,0x3c,1);
4154 emit_andimm(0,~0xf,0);
4155 emit_orrshr_imm(1,2,0);
4156 emit_writeword(0,(int)&Status);
4157 }
4158#else
57871462 4159 if((source[i]&0x3f)==0x18) // ERET
4160 {
4161 int count=ccadj[i];
4162 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
4163 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
4164 emit_jmp((int)jump_eret);
4165 }
576bbd8f 4166#endif
57871462 4167 }
4168}
4169
b9b61529 4170static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
4171{
4172 switch (copr) {
4173 case 1:
4174 case 3:
4175 case 5:
4176 case 8:
4177 case 9:
4178 case 10:
4179 case 11:
4180 emit_readword((int)&reg_cop2d[copr],tl);
4181 emit_signextend16(tl,tl);
4182 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
4183 break;
4184 case 7:
4185 case 16:
4186 case 17:
4187 case 18:
4188 case 19:
4189 emit_readword((int)&reg_cop2d[copr],tl);
4190 emit_andimm(tl,0xffff,tl);
4191 emit_writeword(tl,(int)&reg_cop2d[copr]);
4192 break;
4193 case 15:
4194 emit_readword((int)&reg_cop2d[14],tl); // SXY2
4195 emit_writeword(tl,(int)&reg_cop2d[copr]);
4196 break;
4197 case 28:
b9b61529 4198 case 29:
4199 emit_readword((int)&reg_cop2d[9],temp);
4200 emit_testimm(temp,0x8000); // do we need this?
4201 emit_andimm(temp,0xf80,temp);
4202 emit_andne_imm(temp,0,temp);
f70d384d 4203 emit_shrimm(temp,7,tl);
b9b61529 4204 emit_readword((int)&reg_cop2d[10],temp);
4205 emit_testimm(temp,0x8000);
4206 emit_andimm(temp,0xf80,temp);
4207 emit_andne_imm(temp,0,temp);
f70d384d 4208 emit_orrshr_imm(temp,2,tl);
b9b61529 4209 emit_readword((int)&reg_cop2d[11],temp);
4210 emit_testimm(temp,0x8000);
4211 emit_andimm(temp,0xf80,temp);
4212 emit_andne_imm(temp,0,temp);
f70d384d 4213 emit_orrshl_imm(temp,3,tl);
b9b61529 4214 emit_writeword(tl,(int)&reg_cop2d[copr]);
4215 break;
4216 default:
4217 emit_readword((int)&reg_cop2d[copr],tl);
4218 break;
4219 }
4220}
4221
4222static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
4223{
4224 switch (copr) {
4225 case 15:
4226 emit_readword((int)&reg_cop2d[13],temp); // SXY1
4227 emit_writeword(sl,(int)&reg_cop2d[copr]);
4228 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
4229 emit_readword((int)&reg_cop2d[14],temp); // SXY2
4230 emit_writeword(sl,(int)&reg_cop2d[14]);
4231 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
4232 break;
4233 case 28:
4234 emit_andimm(sl,0x001f,temp);
f70d384d 4235 emit_shlimm(temp,7,temp);
b9b61529 4236 emit_writeword(temp,(int)&reg_cop2d[9]);
4237 emit_andimm(sl,0x03e0,temp);
f70d384d 4238 emit_shlimm(temp,2,temp);
b9b61529 4239 emit_writeword(temp,(int)&reg_cop2d[10]);
4240 emit_andimm(sl,0x7c00,temp);
f70d384d 4241 emit_shrimm(temp,3,temp);
b9b61529 4242 emit_writeword(temp,(int)&reg_cop2d[11]);
4243 emit_writeword(sl,(int)&reg_cop2d[28]);
4244 break;
4245 case 30:
4246 emit_movs(sl,temp);
4247 emit_mvnmi(temp,temp);
4248 emit_clz(temp,temp);
4249 emit_writeword(sl,(int)&reg_cop2d[30]);
4250 emit_writeword(temp,(int)&reg_cop2d[31]);
4251 break;
b9b61529 4252 case 31:
4253 break;
4254 default:
4255 emit_writeword(sl,(int)&reg_cop2d[copr]);
4256 break;
4257 }
4258}
4259
4260void cop2_assemble(int i,struct regstat *i_regs)
4261{
4262 u_int copr=(source[i]>>11)&0x1f;
4263 signed char temp=get_reg(i_regs->regmap,-1);
4264 if (opcode2[i]==0) { // MFC2
4265 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4266 if(tl>=0&&rt1[i]!=0)
b9b61529 4267 cop2_get_dreg(copr,tl,temp);
4268 }
4269 else if (opcode2[i]==4) { // MTC2
4270 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4271 cop2_put_dreg(copr,sl,temp);
4272 }
4273 else if (opcode2[i]==2) // CFC2
4274 {
4275 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4276 if(tl>=0&&rt1[i]!=0)
b9b61529 4277 emit_readword((int)&reg_cop2c[copr],tl);
4278 }
4279 else if (opcode2[i]==6) // CTC2
4280 {
4281 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4282 switch(copr) {
4283 case 4:
4284 case 12:
4285 case 20:
4286 case 26:
4287 case 27:
4288 case 29:
4289 case 30:
4290 emit_signextend16(sl,temp);
4291 break;
4292 case 31:
4293 //value = value & 0x7ffff000;
4294 //if (value & 0x7f87e000) value |= 0x80000000;
4295 emit_shrimm(sl,12,temp);
4296 emit_shlimm(temp,12,temp);
4297 emit_testimm(temp,0x7f000000);
4298 emit_testeqimm(temp,0x00870000);
4299 emit_testeqimm(temp,0x0000e000);
4300 emit_orrne_imm(temp,0x80000000,temp);
4301 break;
4302 default:
4303 temp=sl;
4304 break;
4305 }
4306 emit_writeword(temp,(int)&reg_cop2c[copr]);
4307 assert(sl>=0);
4308 }
4309}
4310
4311void c2op_assemble(int i,struct regstat *i_regs)
4312{
4313 signed char temp=get_reg(i_regs->regmap,-1);
4314 u_int c2op=source[i]&0x3f;
4315 u_int hr,reglist=0;
bedfea38 4316 int need_flags;
b9b61529 4317 for(hr=0;hr<HOST_REGS;hr++) {
4318 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4319 }
4320 if(i==0||itype[i-1]!=C2OP)
4321 save_regs(reglist);
4322
4323 if (gte_handlers[c2op]!=NULL) {
4324 int cc=get_reg(i_regs->regmap,CCREG);
009faf24 4325 emit_movimm(source[i],1); // opcode
b9b61529 4326 if (cc>=0&&gte_cycletab[c2op])
009faf24 4327 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: could just adjust ccadj?
4328 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
4329 emit_writeword(1,(int)&psxRegs.code);
bedfea38 4330 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
4331 assem_debug("gte unneeded %016llx, need_flags %d\n",gte_unneeded[i+1],need_flags);
4332#ifdef ARMv5_ONLY
4333 // let's take more risk here
4334 need_flags=need_flags&&gte_reads_flags;
4335#endif
4336 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
b9b61529 4337 }
4338
4339 if(i>=slen-1||itype[i+1]!=C2OP)
4340 restore_regs(reglist);
4341}
4342
4343void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 4344{
4345 // XXX: should just just do the exception instead
4346 if(!cop1_usable) {
4347 int jaddr=(int)out;
4348 emit_jmp(0);
4349 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
4350 cop1_usable=1;
4351 }
4352}
4353
57871462 4354void cop1_assemble(int i,struct regstat *i_regs)
4355{
3d624f89 4356#ifndef DISABLE_COP1
57871462 4357 // Check cop1 unusable
4358 if(!cop1_usable) {
4359 signed char rs=get_reg(i_regs->regmap,CSREG);
4360 assert(rs>=0);
4361 emit_testimm(rs,0x20000000);
4362 int jaddr=(int)out;
4363 emit_jeq(0);
4364 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4365 cop1_usable=1;
4366 }
4367 if (opcode2[i]==0) { // MFC1
4368 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4369 if(tl>=0) {
4370 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
4371 emit_readword_indexed(0,tl,tl);
4372 }
4373 }
4374 else if (opcode2[i]==1) { // DMFC1
4375 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4376 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
4377 if(tl>=0) {
4378 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
4379 if(th>=0) emit_readword_indexed(4,tl,th);
4380 emit_readword_indexed(0,tl,tl);
4381 }
4382 }
4383 else if (opcode2[i]==4) { // MTC1
4384 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4385 signed char temp=get_reg(i_regs->regmap,-1);
4386 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4387 emit_writeword_indexed(sl,0,temp);
4388 }
4389 else if (opcode2[i]==5) { // DMTC1
4390 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4391 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
4392 signed char temp=get_reg(i_regs->regmap,-1);
4393 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4394 emit_writeword_indexed(sh,4,temp);
4395 emit_writeword_indexed(sl,0,temp);
4396 }
4397 else if (opcode2[i]==2) // CFC1
4398 {
4399 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4400 if(tl>=0) {
4401 u_int copr=(source[i]>>11)&0x1f;
4402 if(copr==0) emit_readword((int)&FCR0,tl);
4403 if(copr==31) emit_readword((int)&FCR31,tl);
4404 }
4405 }
4406 else if (opcode2[i]==6) // CTC1
4407 {
4408 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4409 u_int copr=(source[i]>>11)&0x1f;
4410 assert(sl>=0);
4411 if(copr==31)
4412 {
4413 emit_writeword(sl,(int)&FCR31);
4414 // Set the rounding mode
4415 //FIXME
4416 //char temp=get_reg(i_regs->regmap,-1);
4417 //emit_andimm(sl,3,temp);
4418 //emit_fldcw_indexed((int)&rounding_modes,temp);
4419 }
4420 }
3d624f89 4421#else
4422 cop1_unusable(i, i_regs);
4423#endif
57871462 4424}
4425
4426void fconv_assemble_arm(int i,struct regstat *i_regs)
4427{
3d624f89 4428#ifndef DISABLE_COP1
57871462 4429 signed char temp=get_reg(i_regs->regmap,-1);
4430 assert(temp>=0);
4431 // Check cop1 unusable
4432 if(!cop1_usable) {
4433 signed char rs=get_reg(i_regs->regmap,CSREG);
4434 assert(rs>=0);
4435 emit_testimm(rs,0x20000000);
4436 int jaddr=(int)out;
4437 emit_jeq(0);
4438 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4439 cop1_usable=1;
4440 }
4441
4442 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4443 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
4444 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4445 emit_flds(temp,15);
4446 emit_ftosizs(15,15); // float->int, truncate
4447 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4448 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4449 emit_fsts(15,temp);
4450 return;
4451 }
4452 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
4453 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4454 emit_vldr(temp,7);
4455 emit_ftosizd(7,13); // double->int, truncate
4456 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4457 emit_fsts(13,temp);
4458 return;
4459 }
4460
4461 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
4462 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4463 emit_flds(temp,13);
4464 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4465 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4466 emit_fsitos(13,15);
4467 emit_fsts(15,temp);
4468 return;
4469 }
4470 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
4471 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4472 emit_flds(temp,13);
4473 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4474 emit_fsitod(13,7);
4475 emit_vstr(7,temp);
4476 return;
4477 }
4478
4479 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
4480 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4481 emit_flds(temp,13);
4482 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4483 emit_fcvtds(13,7);
4484 emit_vstr(7,temp);
4485 return;
4486 }
4487 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
4488 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4489 emit_vldr(temp,7);
4490 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4491 emit_fcvtsd(7,13);
4492 emit_fsts(13,temp);
4493 return;
4494 }
4495 #endif
4496
4497 // C emulation code
4498
4499 u_int hr,reglist=0;
4500 for(hr=0;hr<HOST_REGS;hr++) {
4501 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4502 }
4503 save_regs(reglist);
4504
4505 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
4506 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4507 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4508 emit_call((int)cvt_s_w);
4509 }
4510 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
4511 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4512 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4513 emit_call((int)cvt_d_w);
4514 }
4515 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
4516 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4517 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4518 emit_call((int)cvt_s_l);
4519 }
4520 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
4521 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4522 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4523 emit_call((int)cvt_d_l);
4524 }
4525
4526 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
4527 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4528 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4529 emit_call((int)cvt_d_s);
4530 }
4531 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4532 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4533 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4534 emit_call((int)cvt_w_s);
4535 }
4536 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4537 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4538 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4539 emit_call((int)cvt_l_s);
4540 }
4541
4542 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4543 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4544 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4545 emit_call((int)cvt_s_d);
4546 }
4547 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4548 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4549 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4550 emit_call((int)cvt_w_d);
4551 }
4552 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4553 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4554 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4555 emit_call((int)cvt_l_d);
4556 }
4557
4558 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4559 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4560 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4561 emit_call((int)round_l_s);
4562 }
4563 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4564 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4565 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4566 emit_call((int)trunc_l_s);
4567 }
4568 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4569 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4570 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4571 emit_call((int)ceil_l_s);
4572 }
4573 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4574 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4575 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4576 emit_call((int)floor_l_s);
4577 }
4578 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4579 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4580 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4581 emit_call((int)round_w_s);
4582 }
4583 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4584 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4585 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4586 emit_call((int)trunc_w_s);
4587 }
4588 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4589 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4590 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4591 emit_call((int)ceil_w_s);
4592 }
4593 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4594 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4595 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4596 emit_call((int)floor_w_s);
4597 }
4598
4599 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4600 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4601 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4602 emit_call((int)round_l_d);
4603 }
4604 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4605 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4606 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4607 emit_call((int)trunc_l_d);
4608 }
4609 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4610 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4611 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4612 emit_call((int)ceil_l_d);
4613 }
4614 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4615 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4616 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4617 emit_call((int)floor_l_d);
4618 }
4619 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4620 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4621 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4622 emit_call((int)round_w_d);
4623 }
4624 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4625 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4626 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4627 emit_call((int)trunc_w_d);
4628 }
4629 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4630 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4631 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4632 emit_call((int)ceil_w_d);
4633 }
4634 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4635 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4636 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4637 emit_call((int)floor_w_d);
4638 }
4639
4640 restore_regs(reglist);
3d624f89 4641#else
4642 cop1_unusable(i, i_regs);
4643#endif
57871462 4644}
4645#define fconv_assemble fconv_assemble_arm
4646
4647void fcomp_assemble(int i,struct regstat *i_regs)
4648{
3d624f89 4649#ifndef DISABLE_COP1
57871462 4650 signed char fs=get_reg(i_regs->regmap,FSREG);
4651 signed char temp=get_reg(i_regs->regmap,-1);
4652 assert(temp>=0);
4653 // Check cop1 unusable
4654 if(!cop1_usable) {
4655 signed char cs=get_reg(i_regs->regmap,CSREG);
4656 assert(cs>=0);
4657 emit_testimm(cs,0x20000000);
4658 int jaddr=(int)out;
4659 emit_jeq(0);
4660 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4661 cop1_usable=1;
4662 }
4663
4664 if((source[i]&0x3f)==0x30) {
4665 emit_andimm(fs,~0x800000,fs);
4666 return;
4667 }
4668
4669 if((source[i]&0x3e)==0x38) {
4670 // sf/ngle - these should throw exceptions for NaNs
4671 emit_andimm(fs,~0x800000,fs);
4672 return;
4673 }
4674
4675 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4676 if(opcode2[i]==0x10) {
4677 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4678 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4679 emit_orimm(fs,0x800000,fs);
4680 emit_flds(temp,14);
4681 emit_flds(HOST_TEMPREG,15);
4682 emit_fcmps(14,15);
4683 emit_fmstat();
4684 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4685 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4686 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4687 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4688 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4689 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4690 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4691 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4692 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4693 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4694 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4695 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4696 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4697 return;
4698 }
4699 if(opcode2[i]==0x11) {
4700 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4701 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4702 emit_orimm(fs,0x800000,fs);
4703 emit_vldr(temp,6);
4704 emit_vldr(HOST_TEMPREG,7);
4705 emit_fcmpd(6,7);
4706 emit_fmstat();
4707 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4708 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4709 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4710 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4711 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4712 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4713 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4714 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4715 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4716 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4717 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4718 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4719 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4720 return;
4721 }
4722 #endif
4723
4724 // C only
4725
4726 u_int hr,reglist=0;
4727 for(hr=0;hr<HOST_REGS;hr++) {
4728 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4729 }
4730 reglist&=~(1<<fs);
4731 save_regs(reglist);
4732 if(opcode2[i]==0x10) {
4733 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4734 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4735 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4736 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4737 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4738 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4739 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4740 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4741 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4742 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4743 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4744 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4745 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4746 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4747 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4748 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4749 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4750 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4751 }
4752 if(opcode2[i]==0x11) {
4753 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4754 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4755 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4756 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4757 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4758 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4759 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4760 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4761 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4762 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4763 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4764 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4765 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4766 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4767 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4768 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4769 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4770 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4771 }
4772 restore_regs(reglist);
4773 emit_loadreg(FSREG,fs);
3d624f89 4774#else
4775 cop1_unusable(i, i_regs);
4776#endif
57871462 4777}
4778
4779void float_assemble(int i,struct regstat *i_regs)
4780{
3d624f89 4781#ifndef DISABLE_COP1
57871462 4782 signed char temp=get_reg(i_regs->regmap,-1);
4783 assert(temp>=0);
4784 // Check cop1 unusable
4785 if(!cop1_usable) {
4786 signed char cs=get_reg(i_regs->regmap,CSREG);
4787 assert(cs>=0);
4788 emit_testimm(cs,0x20000000);
4789 int jaddr=(int)out;
4790 emit_jeq(0);
4791 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4792 cop1_usable=1;
4793 }
4794
4795 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4796 if((source[i]&0x3f)==6) // mov
4797 {
4798 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4799 if(opcode2[i]==0x10) {
4800 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4801 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4802 emit_readword_indexed(0,temp,temp);
4803 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4804 }
4805 if(opcode2[i]==0x11) {
4806 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4807 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4808 emit_vldr(temp,7);
4809 emit_vstr(7,HOST_TEMPREG);
4810 }
4811 }
4812 return;
4813 }
4814
4815 if((source[i]&0x3f)>3)
4816 {
4817 if(opcode2[i]==0x10) {
4818 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4819 emit_flds(temp,15);
4820 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4821 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4822 }
4823 if((source[i]&0x3f)==4) // sqrt
4824 emit_fsqrts(15,15);
4825 if((source[i]&0x3f)==5) // abs
4826 emit_fabss(15,15);
4827 if((source[i]&0x3f)==7) // neg
4828 emit_fnegs(15,15);
4829 emit_fsts(15,temp);
4830 }
4831 if(opcode2[i]==0x11) {
4832 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4833 emit_vldr(temp,7);
4834 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4835 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4836 }
4837 if((source[i]&0x3f)==4) // sqrt
4838 emit_fsqrtd(7,7);
4839 if((source[i]&0x3f)==5) // abs
4840 emit_fabsd(7,7);
4841 if((source[i]&0x3f)==7) // neg
4842 emit_fnegd(7,7);
4843 emit_vstr(7,temp);
4844 }
4845 return;
4846 }
4847 if((source[i]&0x3f)<4)
4848 {
4849 if(opcode2[i]==0x10) {
4850 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4851 }
4852 if(opcode2[i]==0x11) {
4853 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4854 }
4855 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4856 if(opcode2[i]==0x10) {
4857 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4858 emit_flds(temp,15);
4859 emit_flds(HOST_TEMPREG,13);
4860 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4861 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4862 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4863 }
4864 }
4865 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4866 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4867 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4868 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4869 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4870 emit_fsts(15,HOST_TEMPREG);
4871 }else{
4872 emit_fsts(15,temp);
4873 }
4874 }
4875 else if(opcode2[i]==0x11) {
4876 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4877 emit_vldr(temp,7);
4878 emit_vldr(HOST_TEMPREG,6);
4879 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4880 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4881 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4882 }
4883 }
4884 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4885 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4886 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4887 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4888 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4889 emit_vstr(7,HOST_TEMPREG);
4890 }else{
4891 emit_vstr(7,temp);
4892 }
4893 }
4894 }
4895 else {
4896 if(opcode2[i]==0x10) {
4897 emit_flds(temp,15);
4898 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4899 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4900 }
4901 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4902 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4903 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4904 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4905 emit_fsts(15,temp);
4906 }
4907 else if(opcode2[i]==0x11) {
4908 emit_vldr(temp,7);
4909 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4910 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4911 }
4912 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4913 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4914 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4915 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4916 emit_vstr(7,temp);
4917 }
4918 }
4919 return;
4920 }
4921 #endif
4922
4923 u_int hr,reglist=0;
4924 for(hr=0;hr<HOST_REGS;hr++) {
4925 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4926 }
4927 if(opcode2[i]==0x10) { // Single precision
4928 save_regs(reglist);
4929 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4930 if((source[i]&0x3f)<4) {
4931 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4932 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4933 }else{
4934 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4935 }
4936 switch(source[i]&0x3f)
4937 {
4938 case 0x00: emit_call((int)add_s);break;
4939 case 0x01: emit_call((int)sub_s);break;
4940 case 0x02: emit_call((int)mul_s);break;
4941 case 0x03: emit_call((int)div_s);break;
4942 case 0x04: emit_call((int)sqrt_s);break;
4943 case 0x05: emit_call((int)abs_s);break;
4944 case 0x06: emit_call((int)mov_s);break;
4945 case 0x07: emit_call((int)neg_s);break;
4946 }
4947 restore_regs(reglist);
4948 }
4949 if(opcode2[i]==0x11) { // Double precision
4950 save_regs(reglist);
4951 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4952 if((source[i]&0x3f)<4) {
4953 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4954 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4955 }else{
4956 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4957 }
4958 switch(source[i]&0x3f)
4959 {
4960 case 0x00: emit_call((int)add_d);break;
4961 case 0x01: emit_call((int)sub_d);break;
4962 case 0x02: emit_call((int)mul_d);break;
4963 case 0x03: emit_call((int)div_d);break;
4964 case 0x04: emit_call((int)sqrt_d);break;
4965 case 0x05: emit_call((int)abs_d);break;
4966 case 0x06: emit_call((int)mov_d);break;
4967 case 0x07: emit_call((int)neg_d);break;
4968 }
4969 restore_regs(reglist);
4970 }
3d624f89 4971#else
4972 cop1_unusable(i, i_regs);
4973#endif
57871462 4974}
4975
4976void multdiv_assemble_arm(int i,struct regstat *i_regs)
4977{
4978 // case 0x18: MULT
4979 // case 0x19: MULTU
4980 // case 0x1A: DIV
4981 // case 0x1B: DIVU
4982 // case 0x1C: DMULT
4983 // case 0x1D: DMULTU
4984 // case 0x1E: DDIV
4985 // case 0x1F: DDIVU
4986 if(rs1[i]&&rs2[i])
4987 {
4988 if((opcode2[i]&4)==0) // 32-bit
4989 {
4990 if(opcode2[i]==0x18) // MULT
4991 {
4992 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4993 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4994 signed char hi=get_reg(i_regs->regmap,HIREG);
4995 signed char lo=get_reg(i_regs->regmap,LOREG);
4996 assert(m1>=0);
4997 assert(m2>=0);
4998 assert(hi>=0);
4999 assert(lo>=0);
5000 emit_smull(m1,m2,hi,lo);
5001 }
5002 if(opcode2[i]==0x19) // MULTU
5003 {
5004 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5005 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5006 signed char hi=get_reg(i_regs->regmap,HIREG);
5007 signed char lo=get_reg(i_regs->regmap,LOREG);
5008 assert(m1>=0);
5009 assert(m2>=0);
5010 assert(hi>=0);
5011 assert(lo>=0);
5012 emit_umull(m1,m2,hi,lo);
5013 }
5014 if(opcode2[i]==0x1A) // DIV
5015 {
5016 signed char d1=get_reg(i_regs->regmap,rs1[i]);
5017 signed char d2=get_reg(i_regs->regmap,rs2[i]);
5018 assert(d1>=0);
5019 assert(d2>=0);
5020 signed char quotient=get_reg(i_regs->regmap,LOREG);
5021 signed char remainder=get_reg(i_regs->regmap,HIREG);
5022 assert(quotient>=0);
5023 assert(remainder>=0);
5024 emit_movs(d1,remainder);
44a80f6a 5025 emit_movimm(0xffffffff,quotient);
5026 emit_negmi(quotient,quotient); // .. quotient and ..
5027 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 5028 emit_movs(d2,HOST_TEMPREG);
5029 emit_jeq((int)out+52); // Division by zero
5030 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
5031 emit_clz(HOST_TEMPREG,quotient);
5032 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
5033 emit_orimm(quotient,1<<31,quotient);
5034 emit_shr(quotient,quotient,quotient);
5035 emit_cmp(remainder,HOST_TEMPREG);
5036 emit_subcs(remainder,HOST_TEMPREG,remainder);
5037 emit_adcs(quotient,quotient,quotient);
5038 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
5039 emit_jcc((int)out-16); // -4
5040 emit_teq(d1,d2);
5041 emit_negmi(quotient,quotient);
5042 emit_test(d1,d1);
5043 emit_negmi(remainder,remainder);
5044 }
5045 if(opcode2[i]==0x1B) // DIVU
5046 {
5047 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
5048 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
5049 assert(d1>=0);
5050 assert(d2>=0);
5051 signed char quotient=get_reg(i_regs->regmap,LOREG);
5052 signed char remainder=get_reg(i_regs->regmap,HIREG);
5053 assert(quotient>=0);
5054 assert(remainder>=0);
44a80f6a 5055 emit_mov(d1,remainder);
5056 emit_movimm(0xffffffff,quotient); // div0 case
57871462 5057 emit_test(d2,d2);
44a80f6a 5058 emit_jeq((int)out+40); // Division by zero
57871462 5059 emit_clz(d2,HOST_TEMPREG);
5060 emit_movimm(1<<31,quotient);
5061 emit_shl(d2,HOST_TEMPREG,d2);
57871462 5062 emit_shr(quotient,HOST_TEMPREG,quotient);
5063 emit_cmp(remainder,d2);
5064 emit_subcs(remainder,d2,remainder);
5065 emit_adcs(quotient,quotient,quotient);
5066 emit_shrcc_imm(d2,1,d2);
5067 emit_jcc((int)out-16); // -4
5068 }
5069 }
5070 else // 64-bit
4600ba03 5071#ifndef FORCE32
57871462 5072 {
5073 if(opcode2[i]==0x1C) // DMULT
5074 {
5075 assert(opcode2[i]!=0x1C);
5076 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5077 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5078 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5079 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5080 assert(m1h>=0);
5081 assert(m2h>=0);
5082 assert(m1l>=0);
5083 assert(m2l>=0);
5084 emit_pushreg(m2h);
5085 emit_pushreg(m2l);
5086 emit_pushreg(m1h);
5087 emit_pushreg(m1l);
5088 emit_call((int)&mult64);
5089 emit_popreg(m1l);
5090 emit_popreg(m1h);
5091 emit_popreg(m2l);
5092 emit_popreg(m2h);
5093 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5094 signed char hil=get_reg(i_regs->regmap,HIREG);
5095 if(hih>=0) emit_loadreg(HIREG|64,hih);
5096 if(hil>=0) emit_loadreg(HIREG,hil);
5097 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5098 signed char lol=get_reg(i_regs->regmap,LOREG);
5099 if(loh>=0) emit_loadreg(LOREG|64,loh);
5100 if(lol>=0) emit_loadreg(LOREG,lol);
5101 }
5102 if(opcode2[i]==0x1D) // DMULTU
5103 {
5104 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5105 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5106 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5107 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5108 assert(m1h>=0);
5109 assert(m2h>=0);
5110 assert(m1l>=0);
5111 assert(m2l>=0);
5112 save_regs(0x100f);
5113 if(m1l!=0) emit_mov(m1l,0);
5114 if(m1h==0) emit_readword((int)&dynarec_local,1);
5115 else if(m1h>1) emit_mov(m1h,1);
5116 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
5117 else if(m2l>2) emit_mov(m2l,2);
5118 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
5119 else if(m2h>3) emit_mov(m2h,3);
5120 emit_call((int)&multu64);
5121 restore_regs(0x100f);
5122 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5123 signed char hil=get_reg(i_regs->regmap,HIREG);
5124 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5125 signed char lol=get_reg(i_regs->regmap,LOREG);
5126 /*signed char temp=get_reg(i_regs->regmap,-1);
5127 signed char rh=get_reg(i_regs->regmap,HIREG|64);
5128 signed char rl=get_reg(i_regs->regmap,HIREG);
5129 assert(m1h>=0);
5130 assert(m2h>=0);
5131 assert(m1l>=0);
5132 assert(m2l>=0);
5133 assert(temp>=0);
5134 //emit_mov(m1l,EAX);
5135 //emit_mul(m2l);
5136 emit_umull(rl,rh,m1l,m2l);
5137 emit_storereg(LOREG,rl);
5138 emit_mov(rh,temp);
5139 //emit_mov(m1h,EAX);
5140 //emit_mul(m2l);
5141 emit_umull(rl,rh,m1h,m2l);
5142 emit_adds(rl,temp,temp);
5143 emit_adcimm(rh,0,rh);
5144 emit_storereg(HIREG,rh);
5145 //emit_mov(m2h,EAX);
5146 //emit_mul(m1l);
5147 emit_umull(rl,rh,m1l,m2h);
5148 emit_adds(rl,temp,temp);
5149 emit_adcimm(rh,0,rh);
5150 emit_storereg(LOREG|64,temp);
5151 emit_mov(rh,temp);
5152 //emit_mov(m2h,EAX);
5153 //emit_mul(m1h);
5154 emit_umull(rl,rh,m1h,m2h);
5155 emit_adds(rl,temp,rl);
5156 emit_loadreg(HIREG,temp);
5157 emit_adcimm(rh,0,rh);
5158 emit_adds(rl,temp,rl);
5159 emit_adcimm(rh,0,rh);
5160 // DEBUG
5161 /*
5162 emit_pushreg(m2h);
5163 emit_pushreg(m2l);
5164 emit_pushreg(m1h);
5165 emit_pushreg(m1l);
5166 emit_call((int)&multu64);
5167 emit_popreg(m1l);
5168 emit_popreg(m1h);
5169 emit_popreg(m2l);
5170 emit_popreg(m2h);
5171 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5172 signed char hil=get_reg(i_regs->regmap,HIREG);
5173 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
5174 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
5175 */
5176 // Shouldn't be necessary
5177 //char loh=get_reg(i_regs->regmap,LOREG|64);
5178 //char lol=get_reg(i_regs->regmap,LOREG);
5179 //if(loh>=0) emit_loadreg(LOREG|64,loh);
5180 //if(lol>=0) emit_loadreg(LOREG,lol);
5181 }
5182 if(opcode2[i]==0x1E) // DDIV
5183 {
5184 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5185 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5186 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5187 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5188 assert(d1h>=0);
5189 assert(d2h>=0);
5190 assert(d1l>=0);
5191 assert(d2l>=0);
5192 save_regs(0x100f);
5193 if(d1l!=0) emit_mov(d1l,0);
5194 if(d1h==0) emit_readword((int)&dynarec_local,1);
5195 else if(d1h>1) emit_mov(d1h,1);
5196 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5197 else if(d2l>2) emit_mov(d2l,2);
5198 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5199 else if(d2h>3) emit_mov(d2h,3);
5200 emit_call((int)&div64);
5201 restore_regs(0x100f);
5202 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5203 signed char hil=get_reg(i_regs->regmap,HIREG);
5204 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5205 signed char lol=get_reg(i_regs->regmap,LOREG);
5206 if(hih>=0) emit_loadreg(HIREG|64,hih);
5207 if(hil>=0) emit_loadreg(HIREG,hil);
5208 if(loh>=0) emit_loadreg(LOREG|64,loh);
5209 if(lol>=0) emit_loadreg(LOREG,lol);
5210 }
5211 if(opcode2[i]==0x1F) // DDIVU
5212 {
5213 //u_int hr,reglist=0;
5214 //for(hr=0;hr<HOST_REGS;hr++) {
5215 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
5216 //}
5217 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5218 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5219 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5220 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5221 assert(d1h>=0);
5222 assert(d2h>=0);
5223 assert(d1l>=0);
5224 assert(d2l>=0);
5225 save_regs(0x100f);
5226 if(d1l!=0) emit_mov(d1l,0);
5227 if(d1h==0) emit_readword((int)&dynarec_local,1);
5228 else if(d1h>1) emit_mov(d1h,1);
5229 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5230 else if(d2l>2) emit_mov(d2l,2);
5231 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5232 else if(d2h>3) emit_mov(d2h,3);
5233 emit_call((int)&divu64);
5234 restore_regs(0x100f);
5235 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5236 signed char hil=get_reg(i_regs->regmap,HIREG);
5237 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5238 signed char lol=get_reg(i_regs->regmap,LOREG);
5239 if(hih>=0) emit_loadreg(HIREG|64,hih);
5240 if(hil>=0) emit_loadreg(HIREG,hil);
5241 if(loh>=0) emit_loadreg(LOREG|64,loh);
5242 if(lol>=0) emit_loadreg(LOREG,lol);
5243 }
5244 }
4600ba03 5245#else
5246 assert(0);
5247#endif
57871462 5248 }
5249 else
5250 {
5251 // Multiply by zero is zero.
5252 // MIPS does not have a divide by zero exception.
5253 // The result is undefined, we return zero.
5254 signed char hr=get_reg(i_regs->regmap,HIREG);
5255 signed char lr=get_reg(i_regs->regmap,LOREG);
5256 if(hr>=0) emit_zeroreg(hr);
5257 if(lr>=0) emit_zeroreg(lr);
5258 }
5259}
5260#define multdiv_assemble multdiv_assemble_arm
5261
5262void do_preload_rhash(int r) {
5263 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
5264 // register. On ARM the hash can be done with a single instruction (below)
5265}
5266
5267void do_preload_rhtbl(int ht) {
5268 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
5269}
5270
5271void do_rhash(int rs,int rh) {
5272 emit_andimm(rs,0xf8,rh);
5273}
5274
5275void do_miniht_load(int ht,int rh) {
5276 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
5277 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
5278}
5279
5280void do_miniht_jump(int rs,int rh,int ht) {
5281 emit_cmp(rh,rs);
5282 emit_ldreq_indexed(ht,4,15);
5283 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5284 emit_mov(rs,7);
5285 emit_jmp(jump_vaddr_reg[7]);
5286 #else
5287 emit_jmp(jump_vaddr_reg[rs]);
5288 #endif
5289}
5290
5291void do_miniht_insert(u_int return_address,int rt,int temp) {
5292 #ifdef ARMv5_ONLY
5293 emit_movimm(return_address,rt); // PC into link register
5294 add_to_linker((int)out,return_address,1);
5295 emit_pcreladdr(temp);
5296 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5297 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5298 #else
5299 emit_movw(return_address&0x0000FFFF,rt);
5300 add_to_linker((int)out,return_address,1);
5301 emit_pcreladdr(temp);
5302 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5303 emit_movt(return_address&0xFFFF0000,rt);
5304 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5305 #endif
5306}
5307
5308// Sign-extend to 64 bits and write out upper half of a register
5309// This is useful where we have a 32-bit value in a register, and want to
5310// keep it in a 32-bit register, but can't guarantee that it won't be read
5311// as a 64-bit value later.
5312void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
5313{
24385cae 5314#ifndef FORCE32
57871462 5315 if(is32_pre==is32) return;
5316 int hr,reg;
5317 for(hr=0;hr<HOST_REGS;hr++) {
5318 if(hr!=EXCLUDE_REG) {
5319 //if(pre[hr]==entry[hr]) {
5320 if((reg=pre[hr])>=0) {
5321 if((dirty>>hr)&1) {
5322 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
5323 emit_sarimm(hr,31,HOST_TEMPREG);
5324 emit_storereg(reg|64,HOST_TEMPREG);
5325 }
5326 }
5327 }
5328 //}
5329 }
5330 }
24385cae 5331#endif
57871462 5332}
5333
5334void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
5335{
5336 //if(dirty_pre==dirty) return;
5337 int hr,reg,new_hr;
5338 for(hr=0;hr<HOST_REGS;hr++) {
5339 if(hr!=EXCLUDE_REG) {
5340 reg=pre[hr];
5341 if(((~u)>>(reg&63))&1) {
f776eb14 5342 if(reg>0) {
57871462 5343 if(((dirty_pre&~dirty)>>hr)&1) {
5344 if(reg>0&&reg<34) {
5345 emit_storereg(reg,hr);
5346 if( ((is32_pre&~uu)>>reg)&1 ) {
5347 emit_sarimm(hr,31,HOST_TEMPREG);
5348 emit_storereg(reg|64,HOST_TEMPREG);
5349 }
5350 }
5351 else if(reg>=64) {
5352 emit_storereg(reg,hr);
5353 }
5354 }
5355 }
57871462 5356 }
5357 }
5358 }
5359}
5360
5361
5362/* using strd could possibly help but you'd have to allocate registers in pairs
5363void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
5364{
5365 int hr;
5366 int wrote=-1;
5367 for(hr=HOST_REGS-1;hr>=0;hr--) {
5368 if(hr!=EXCLUDE_REG) {
5369 if(pre[hr]!=entry[hr]) {
5370 if(pre[hr]>=0) {
5371 if((dirty>>hr)&1) {
5372 if(get_reg(entry,pre[hr])<0) {
5373 if(pre[hr]<64) {
5374 if(!((u>>pre[hr])&1)) {
5375 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
5376 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5377 emit_sarimm(hr,31,hr+1);
5378 emit_strdreg(pre[hr],hr);
5379 }
5380 else
5381 emit_storereg(pre[hr],hr);
5382 }else{
5383 emit_storereg(pre[hr],hr);
5384 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5385 emit_sarimm(hr,31,hr);
5386 emit_storereg(pre[hr]|64,hr);
5387 }
5388 }
5389 }
5390 }else{
5391 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
5392 emit_storereg(pre[hr],hr);
5393 }
5394 }
5395 wrote=hr;
5396 }
5397 }
5398 }
5399 }
5400 }
5401 }
5402 for(hr=0;hr<HOST_REGS;hr++) {
5403 if(hr!=EXCLUDE_REG) {
5404 if(pre[hr]!=entry[hr]) {
5405 if(pre[hr]>=0) {
5406 int nr;
5407 if((nr=get_reg(entry,pre[hr]))>=0) {
5408 emit_mov(hr,nr);
5409 }
5410 }
5411 }
5412 }
5413 }
5414}
5415#define wb_invalidate wb_invalidate_arm
5416*/
5417
dd3a91a1 5418// Clearing the cache is rather slow on ARM Linux, so mark the areas
5419// that need to be cleared, and then only clear these areas once.
5420void do_clear_cache()
5421{
5422 int i,j;
5423 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
5424 {
5425 u_int bitmap=needs_clear_cache[i];
5426 if(bitmap) {
5427 u_int start,end;
5428 for(j=0;j<32;j++)
5429 {
5430 if(bitmap&(1<<j)) {
5431 start=BASE_ADDR+i*131072+j*4096;
5432 end=start+4095;
5433 j++;
5434 while(j<32) {
5435 if(bitmap&(1<<j)) {
5436 end+=4096;
5437 j++;
5438 }else{
5439 __clear_cache((void *)start,(void *)end);
5440 break;
5441 }
5442 }
5443 }
5444 }
5445 needs_clear_cache[i]=0;
5446 }
5447 }
5448}
5449
57871462 5450// CPU-architecture-specific initialization
5451void arch_init() {
3d624f89 5452#ifndef DISABLE_COP1
57871462 5453 rounding_modes[0]=0x0<<22; // round
5454 rounding_modes[1]=0x3<<22; // trunc
5455 rounding_modes[2]=0x1<<22; // ceil
5456 rounding_modes[3]=0x2<<22; // floor
3d624f89 5457#endif
57871462 5458}
b9b61529 5459
5460// vim:shiftwidth=2:expandtab