frontend: warn about frameskip problems
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
0bbd1454 69void invalidate_addr_r0();
70void invalidate_addr_r1();
71void invalidate_addr_r2();
72void invalidate_addr_r3();
73void invalidate_addr_r4();
74void invalidate_addr_r5();
75void invalidate_addr_r6();
76void invalidate_addr_r7();
77void invalidate_addr_r8();
78void invalidate_addr_r9();
79void invalidate_addr_r10();
80void invalidate_addr_r12();
81
82const u_int invalidate_addr_reg[16] = {
83 (int)invalidate_addr_r0,
84 (int)invalidate_addr_r1,
85 (int)invalidate_addr_r2,
86 (int)invalidate_addr_r3,
87 (int)invalidate_addr_r4,
88 (int)invalidate_addr_r5,
89 (int)invalidate_addr_r6,
90 (int)invalidate_addr_r7,
91 (int)invalidate_addr_r8,
92 (int)invalidate_addr_r9,
93 (int)invalidate_addr_r10,
94 0,
95 (int)invalidate_addr_r12,
96 0,
97 0,
98 0};
99
57871462 100#include "fpu.h"
101
dd3a91a1 102unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
103
57871462 104/* Linker */
105
106void set_jump_target(int addr,u_int target)
107{
108 u_char *ptr=(u_char *)addr;
109 u_int *ptr2=(u_int *)ptr;
110 if(ptr[3]==0xe2) {
111 assert((target-(u_int)ptr2-8)<1024);
112 assert((addr&3)==0);
113 assert((target&3)==0);
114 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
115 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
116 }
117 else if(ptr[3]==0x72) {
118 // generated by emit_jno_unlikely
119 if((target-(u_int)ptr2-8)<1024) {
120 assert((addr&3)==0);
121 assert((target&3)==0);
122 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
123 }
124 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
125 assert((addr&3)==0);
126 assert((target&3)==0);
127 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
128 }
129 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
130 }
131 else {
132 assert((ptr[3]&0x0e)==0xa);
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137// This optionally copies the instruction from the target of the branch into
138// the space before the branch. Works, but the difference in speed is
139// usually insignificant.
140void set_jump_target_fillslot(int addr,u_int target,int copy)
141{
142 u_char *ptr=(u_char *)addr;
143 u_int *ptr2=(u_int *)ptr;
144 assert(!copy||ptr2[-1]==0xe28dd000);
145 if(ptr[3]==0xe2) {
146 assert(!copy);
147 assert((target-(u_int)ptr2-8)<4096);
148 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
149 }
150 else {
151 assert((ptr[3]&0x0e)==0xa);
152 u_int target_insn=*(u_int *)target;
153 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
154 copy=0;
155 }
156 if((target_insn&0x0c100000)==0x04100000) { // Load
157 copy=0;
158 }
159 if(target_insn&0x08000000) {
160 copy=0;
161 }
162 if(copy) {
163 ptr2[-1]=target_insn;
164 target+=4;
165 }
166 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
167 }
168}
169
170/* Literal pool */
171add_literal(int addr,int val)
172{
173 literals[literalcount][0]=addr;
174 literals[literalcount][1]=val;
175 literalcount++;
176}
177
f76eeef9 178void *kill_pointer(void *stub)
57871462 179{
180 int *ptr=(int *)(stub+4);
181 assert((*ptr&0x0ff00000)==0x05900000);
182 u_int offset=*ptr&0xfff;
183 int **l_ptr=(void *)ptr+offset+8;
184 int *i_ptr=*l_ptr;
185 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 186 return i_ptr;
57871462 187}
188
189int get_pointer(void *stub)
190{
191 //printf("get_pointer(%x)\n",(int)stub);
192 int *ptr=(int *)(stub+4);
193 assert((*ptr&0x0ff00000)==0x05900000);
194 u_int offset=*ptr&0xfff;
195 int **l_ptr=(void *)ptr+offset+8;
196 int *i_ptr=*l_ptr;
197 assert((*i_ptr&0x0f000000)==0x0a000000);
198 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
199}
200
201// Find the "clean" entry point from a "dirty" entry point
202// by skipping past the call to verify_code
203u_int get_clean_addr(int addr)
204{
205 int *ptr=(int *)addr;
206 #ifdef ARMv5_ONLY
207 ptr+=4;
208 #else
209 ptr+=6;
210 #endif
211 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
212 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
213 ptr++;
214 if((*ptr&0xFF000000)==0xea000000) {
215 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
216 }
217 return (u_int)ptr;
218}
219
220int verify_dirty(int addr)
221{
222 u_int *ptr=(u_int *)addr;
223 #ifdef ARMv5_ONLY
224 // get from literal pool
225 assert((*ptr&0xFFF00000)==0xe5900000);
226 u_int offset=*ptr&0xfff;
227 u_int *l_ptr=(void *)ptr+offset+8;
228 u_int source=l_ptr[0];
229 u_int copy=l_ptr[1];
230 u_int len=l_ptr[2];
231 ptr+=4;
232 #else
233 // ARMv7 movw/movt
234 assert((*ptr&0xFFF00000)==0xe3000000);
235 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
236 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
237 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
238 ptr+=6;
239 #endif
240 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
241 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 242 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 243 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
244 unsigned int page=source>>12;
245 unsigned int map_value=memory_map[page];
246 if(map_value>=0x80000000) return 0;
247 while(page<((source+len-1)>>12)) {
248 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
249 }
250 source = source+(map_value<<2);
251 }
252 //printf("verify_dirty: %x %x %x\n",source,copy,len);
253 return !memcmp((void *)source,(void *)copy,len);
254}
255
256// This doesn't necessarily find all clean entry points, just
257// guarantees that it's not dirty
258int isclean(int addr)
259{
260 #ifdef ARMv5_ONLY
261 int *ptr=((u_int *)addr)+4;
262 #else
263 int *ptr=((u_int *)addr)+6;
264 #endif
265 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
266 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
267 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
268 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
269 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
270 return 1;
271}
272
273void get_bounds(int addr,u_int *start,u_int *end)
274{
275 u_int *ptr=(u_int *)addr;
276 #ifdef ARMv5_ONLY
277 // get from literal pool
278 assert((*ptr&0xFFF00000)==0xe5900000);
279 u_int offset=*ptr&0xfff;
280 u_int *l_ptr=(void *)ptr+offset+8;
281 u_int source=l_ptr[0];
282 //u_int copy=l_ptr[1];
283 u_int len=l_ptr[2];
284 ptr+=4;
285 #else
286 // ARMv7 movw/movt
287 assert((*ptr&0xFFF00000)==0xe3000000);
288 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
289 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
290 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
291 ptr+=6;
292 #endif
293 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
294 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 295 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 296 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
297 if(memory_map[source>>12]>=0x80000000) source = 0;
298 else source = source+(memory_map[source>>12]<<2);
299 }
300 *start=source;
301 *end=source+len;
302}
303
304/* Register allocation */
305
306// Note: registers are allocated clean (unmodified state)
307// if you intend to modify the register, you must call dirty_reg().
308void alloc_reg(struct regstat *cur,int i,signed char reg)
309{
310 int r,hr;
311 int preferred_reg = (reg&7);
312 if(reg==CCREG) preferred_reg=HOST_CCREG;
313 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
314
315 // Don't allocate unused registers
316 if((cur->u>>reg)&1) return;
317
318 // see if it's already allocated
319 for(hr=0;hr<HOST_REGS;hr++)
320 {
321 if(cur->regmap[hr]==reg) return;
322 }
323
324 // Keep the same mapping if the register was already allocated in a loop
325 preferred_reg = loop_reg(i,reg,preferred_reg);
326
327 // Try to allocate the preferred register
328 if(cur->regmap[preferred_reg]==-1) {
329 cur->regmap[preferred_reg]=reg;
330 cur->dirty&=~(1<<preferred_reg);
331 cur->isconst&=~(1<<preferred_reg);
332 return;
333 }
334 r=cur->regmap[preferred_reg];
335 if(r<64&&((cur->u>>r)&1)) {
336 cur->regmap[preferred_reg]=reg;
337 cur->dirty&=~(1<<preferred_reg);
338 cur->isconst&=~(1<<preferred_reg);
339 return;
340 }
341 if(r>=64&&((cur->uu>>(r&63))&1)) {
342 cur->regmap[preferred_reg]=reg;
343 cur->dirty&=~(1<<preferred_reg);
344 cur->isconst&=~(1<<preferred_reg);
345 return;
346 }
347
348 // Clear any unneeded registers
349 // We try to keep the mapping consistent, if possible, because it
350 // makes branches easier (especially loops). So we try to allocate
351 // first (see above) before removing old mappings. If this is not
352 // possible then go ahead and clear out the registers that are no
353 // longer needed.
354 for(hr=0;hr<HOST_REGS;hr++)
355 {
356 r=cur->regmap[hr];
357 if(r>=0) {
358 if(r<64) {
359 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
360 }
361 else
362 {
363 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
364 }
365 }
366 }
367 // Try to allocate any available register, but prefer
368 // registers that have not been used recently.
369 if(i>0) {
370 for(hr=0;hr<HOST_REGS;hr++) {
371 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
372 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
373 cur->regmap[hr]=reg;
374 cur->dirty&=~(1<<hr);
375 cur->isconst&=~(1<<hr);
376 return;
377 }
378 }
379 }
380 }
381 // Try to allocate any available register
382 for(hr=0;hr<HOST_REGS;hr++) {
383 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
384 cur->regmap[hr]=reg;
385 cur->dirty&=~(1<<hr);
386 cur->isconst&=~(1<<hr);
387 return;
388 }
389 }
390
391 // Ok, now we have to evict someone
392 // Pick a register we hopefully won't need soon
393 u_char hsn[MAXREG+1];
394 memset(hsn,10,sizeof(hsn));
395 int j;
396 lsn(hsn,i,&preferred_reg);
397 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
398 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
399 if(i>0) {
400 // Don't evict the cycle count at entry points, otherwise the entry
401 // stub will have to write it.
402 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
403 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
404 for(j=10;j>=3;j--)
405 {
406 // Alloc preferred register if available
407 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
408 for(hr=0;hr<HOST_REGS;hr++) {
409 // Evict both parts of a 64-bit register
410 if((cur->regmap[hr]&63)==r) {
411 cur->regmap[hr]=-1;
412 cur->dirty&=~(1<<hr);
413 cur->isconst&=~(1<<hr);
414 }
415 }
416 cur->regmap[preferred_reg]=reg;
417 return;
418 }
419 for(r=1;r<=MAXREG;r++)
420 {
421 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
422 for(hr=0;hr<HOST_REGS;hr++) {
423 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
424 if(cur->regmap[hr]==r+64) {
425 cur->regmap[hr]=reg;
426 cur->dirty&=~(1<<hr);
427 cur->isconst&=~(1<<hr);
428 return;
429 }
430 }
431 }
432 for(hr=0;hr<HOST_REGS;hr++) {
433 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
434 if(cur->regmap[hr]==r) {
435 cur->regmap[hr]=reg;
436 cur->dirty&=~(1<<hr);
437 cur->isconst&=~(1<<hr);
438 return;
439 }
440 }
441 }
442 }
443 }
444 }
445 }
446 for(j=10;j>=0;j--)
447 {
448 for(r=1;r<=MAXREG;r++)
449 {
450 if(hsn[r]==j) {
451 for(hr=0;hr<HOST_REGS;hr++) {
452 if(cur->regmap[hr]==r+64) {
453 cur->regmap[hr]=reg;
454 cur->dirty&=~(1<<hr);
455 cur->isconst&=~(1<<hr);
456 return;
457 }
458 }
459 for(hr=0;hr<HOST_REGS;hr++) {
460 if(cur->regmap[hr]==r) {
461 cur->regmap[hr]=reg;
462 cur->dirty&=~(1<<hr);
463 cur->isconst&=~(1<<hr);
464 return;
465 }
466 }
467 }
468 }
469 }
470 printf("This shouldn't happen (alloc_reg)");exit(1);
471}
472
473void alloc_reg64(struct regstat *cur,int i,signed char reg)
474{
475 int preferred_reg = 8+(reg&1);
476 int r,hr;
477
478 // allocate the lower 32 bits
479 alloc_reg(cur,i,reg);
480
481 // Don't allocate unused registers
482 if((cur->uu>>reg)&1) return;
483
484 // see if the upper half is already allocated
485 for(hr=0;hr<HOST_REGS;hr++)
486 {
487 if(cur->regmap[hr]==reg+64) return;
488 }
489
490 // Keep the same mapping if the register was already allocated in a loop
491 preferred_reg = loop_reg(i,reg,preferred_reg);
492
493 // Try to allocate the preferred register
494 if(cur->regmap[preferred_reg]==-1) {
495 cur->regmap[preferred_reg]=reg|64;
496 cur->dirty&=~(1<<preferred_reg);
497 cur->isconst&=~(1<<preferred_reg);
498 return;
499 }
500 r=cur->regmap[preferred_reg];
501 if(r<64&&((cur->u>>r)&1)) {
502 cur->regmap[preferred_reg]=reg|64;
503 cur->dirty&=~(1<<preferred_reg);
504 cur->isconst&=~(1<<preferred_reg);
505 return;
506 }
507 if(r>=64&&((cur->uu>>(r&63))&1)) {
508 cur->regmap[preferred_reg]=reg|64;
509 cur->dirty&=~(1<<preferred_reg);
510 cur->isconst&=~(1<<preferred_reg);
511 return;
512 }
513
514 // Clear any unneeded registers
515 // We try to keep the mapping consistent, if possible, because it
516 // makes branches easier (especially loops). So we try to allocate
517 // first (see above) before removing old mappings. If this is not
518 // possible then go ahead and clear out the registers that are no
519 // longer needed.
520 for(hr=HOST_REGS-1;hr>=0;hr--)
521 {
522 r=cur->regmap[hr];
523 if(r>=0) {
524 if(r<64) {
525 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
526 }
527 else
528 {
529 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
530 }
531 }
532 }
533 // Try to allocate any available register, but prefer
534 // registers that have not been used recently.
535 if(i>0) {
536 for(hr=0;hr<HOST_REGS;hr++) {
537 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
538 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
539 cur->regmap[hr]=reg|64;
540 cur->dirty&=~(1<<hr);
541 cur->isconst&=~(1<<hr);
542 return;
543 }
544 }
545 }
546 }
547 // Try to allocate any available register
548 for(hr=0;hr<HOST_REGS;hr++) {
549 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
550 cur->regmap[hr]=reg|64;
551 cur->dirty&=~(1<<hr);
552 cur->isconst&=~(1<<hr);
553 return;
554 }
555 }
556
557 // Ok, now we have to evict someone
558 // Pick a register we hopefully won't need soon
559 u_char hsn[MAXREG+1];
560 memset(hsn,10,sizeof(hsn));
561 int j;
562 lsn(hsn,i,&preferred_reg);
563 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
564 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
565 if(i>0) {
566 // Don't evict the cycle count at entry points, otherwise the entry
567 // stub will have to write it.
568 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
569 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
570 for(j=10;j>=3;j--)
571 {
572 // Alloc preferred register if available
573 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
574 for(hr=0;hr<HOST_REGS;hr++) {
575 // Evict both parts of a 64-bit register
576 if((cur->regmap[hr]&63)==r) {
577 cur->regmap[hr]=-1;
578 cur->dirty&=~(1<<hr);
579 cur->isconst&=~(1<<hr);
580 }
581 }
582 cur->regmap[preferred_reg]=reg|64;
583 return;
584 }
585 for(r=1;r<=MAXREG;r++)
586 {
587 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
588 for(hr=0;hr<HOST_REGS;hr++) {
589 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
590 if(cur->regmap[hr]==r+64) {
591 cur->regmap[hr]=reg|64;
592 cur->dirty&=~(1<<hr);
593 cur->isconst&=~(1<<hr);
594 return;
595 }
596 }
597 }
598 for(hr=0;hr<HOST_REGS;hr++) {
599 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
600 if(cur->regmap[hr]==r) {
601 cur->regmap[hr]=reg|64;
602 cur->dirty&=~(1<<hr);
603 cur->isconst&=~(1<<hr);
604 return;
605 }
606 }
607 }
608 }
609 }
610 }
611 }
612 for(j=10;j>=0;j--)
613 {
614 for(r=1;r<=MAXREG;r++)
615 {
616 if(hsn[r]==j) {
617 for(hr=0;hr<HOST_REGS;hr++) {
618 if(cur->regmap[hr]==r+64) {
619 cur->regmap[hr]=reg|64;
620 cur->dirty&=~(1<<hr);
621 cur->isconst&=~(1<<hr);
622 return;
623 }
624 }
625 for(hr=0;hr<HOST_REGS;hr++) {
626 if(cur->regmap[hr]==r) {
627 cur->regmap[hr]=reg|64;
628 cur->dirty&=~(1<<hr);
629 cur->isconst&=~(1<<hr);
630 return;
631 }
632 }
633 }
634 }
635 }
636 printf("This shouldn't happen");exit(1);
637}
638
639// Allocate a temporary register. This is done without regard to
640// dirty status or whether the register we request is on the unneeded list
641// Note: This will only allocate one register, even if called multiple times
642void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
643{
644 int r,hr;
645 int preferred_reg = -1;
646
647 // see if it's already allocated
648 for(hr=0;hr<HOST_REGS;hr++)
649 {
650 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
651 }
652
653 // Try to allocate any available register
654 for(hr=HOST_REGS-1;hr>=0;hr--) {
655 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
656 cur->regmap[hr]=reg;
657 cur->dirty&=~(1<<hr);
658 cur->isconst&=~(1<<hr);
659 return;
660 }
661 }
662
663 // Find an unneeded register
664 for(hr=HOST_REGS-1;hr>=0;hr--)
665 {
666 r=cur->regmap[hr];
667 if(r>=0) {
668 if(r<64) {
669 if((cur->u>>r)&1) {
670 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
671 cur->regmap[hr]=reg;
672 cur->dirty&=~(1<<hr);
673 cur->isconst&=~(1<<hr);
674 return;
675 }
676 }
677 }
678 else
679 {
680 if((cur->uu>>(r&63))&1) {
681 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
682 cur->regmap[hr]=reg;
683 cur->dirty&=~(1<<hr);
684 cur->isconst&=~(1<<hr);
685 return;
686 }
687 }
688 }
689 }
690 }
691
692 // Ok, now we have to evict someone
693 // Pick a register we hopefully won't need soon
694 // TODO: we might want to follow unconditional jumps here
695 // TODO: get rid of dupe code and make this into a function
696 u_char hsn[MAXREG+1];
697 memset(hsn,10,sizeof(hsn));
698 int j;
699 lsn(hsn,i,&preferred_reg);
700 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
701 if(i>0) {
702 // Don't evict the cycle count at entry points, otherwise the entry
703 // stub will have to write it.
704 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
705 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
706 for(j=10;j>=3;j--)
707 {
708 for(r=1;r<=MAXREG;r++)
709 {
710 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
711 for(hr=0;hr<HOST_REGS;hr++) {
712 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
713 if(cur->regmap[hr]==r+64) {
714 cur->regmap[hr]=reg;
715 cur->dirty&=~(1<<hr);
716 cur->isconst&=~(1<<hr);
717 return;
718 }
719 }
720 }
721 for(hr=0;hr<HOST_REGS;hr++) {
722 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
723 if(cur->regmap[hr]==r) {
724 cur->regmap[hr]=reg;
725 cur->dirty&=~(1<<hr);
726 cur->isconst&=~(1<<hr);
727 return;
728 }
729 }
730 }
731 }
732 }
733 }
734 }
735 for(j=10;j>=0;j--)
736 {
737 for(r=1;r<=MAXREG;r++)
738 {
739 if(hsn[r]==j) {
740 for(hr=0;hr<HOST_REGS;hr++) {
741 if(cur->regmap[hr]==r+64) {
742 cur->regmap[hr]=reg;
743 cur->dirty&=~(1<<hr);
744 cur->isconst&=~(1<<hr);
745 return;
746 }
747 }
748 for(hr=0;hr<HOST_REGS;hr++) {
749 if(cur->regmap[hr]==r) {
750 cur->regmap[hr]=reg;
751 cur->dirty&=~(1<<hr);
752 cur->isconst&=~(1<<hr);
753 return;
754 }
755 }
756 }
757 }
758 }
759 printf("This shouldn't happen");exit(1);
760}
761// Allocate a specific ARM register.
762void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
763{
764 int n;
765
766 // see if it's already allocated (and dealloc it)
767 for(n=0;n<HOST_REGS;n++)
768 {
769 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
770 }
771
772 cur->regmap[hr]=reg;
773 cur->dirty&=~(1<<hr);
774 cur->isconst&=~(1<<hr);
775}
776
777// Alloc cycle count into dedicated register
778alloc_cc(struct regstat *cur,int i)
779{
780 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
781}
782
783/* Special alloc */
784
785
786/* Assembler */
787
788char regname[16][4] = {
789 "r0",
790 "r1",
791 "r2",
792 "r3",
793 "r4",
794 "r5",
795 "r6",
796 "r7",
797 "r8",
798 "r9",
799 "r10",
800 "fp",
801 "r12",
802 "sp",
803 "lr",
804 "pc"};
805
806void output_byte(u_char byte)
807{
808 *(out++)=byte;
809}
810void output_modrm(u_char mod,u_char rm,u_char ext)
811{
812 assert(mod<4);
813 assert(rm<8);
814 assert(ext<8);
815 u_char byte=(mod<<6)|(ext<<3)|rm;
816 *(out++)=byte;
817}
818void output_sib(u_char scale,u_char index,u_char base)
819{
820 assert(scale<4);
821 assert(index<8);
822 assert(base<8);
823 u_char byte=(scale<<6)|(index<<3)|base;
824 *(out++)=byte;
825}
826void output_w32(u_int word)
827{
828 *((u_int *)out)=word;
829 out+=4;
830}
831u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
832{
833 assert(rd<16);
834 assert(rn<16);
835 assert(rm<16);
836 return((rn<<16)|(rd<<12)|rm);
837}
838u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
839{
840 assert(rd<16);
841 assert(rn<16);
842 assert(imm<256);
843 assert((shift&1)==0);
844 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
845}
846u_int genimm(u_int imm,u_int *encoded)
847{
848 if(imm==0) {*encoded=0;return 1;}
849 int i=32;
850 while(i>0)
851 {
852 if(imm<256) {
853 *encoded=((i&30)<<7)|imm;
854 return 1;
855 }
856 imm=(imm>>2)|(imm<<30);i-=2;
857 }
858 return 0;
859}
cfbd3c6e 860void genimm_checked(u_int imm,u_int *encoded)
861{
862 u_int ret=genimm(imm,encoded);
863 assert(ret);
864}
57871462 865u_int genjmp(u_int addr)
866{
867 int offset=addr-(int)out-8;
e80343e2 868 if(offset<-33554432||offset>=33554432) {
869 if (addr>2) {
870 printf("genjmp: out of range: %08x\n", offset);
871 exit(1);
872 }
873 return 0;
874 }
57871462 875 return ((u_int)offset>>2)&0xffffff;
876}
877
878void emit_mov(int rs,int rt)
879{
880 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
881 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
882}
883
884void emit_movs(int rs,int rt)
885{
886 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
887 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
888}
889
890void emit_add(int rs1,int rs2,int rt)
891{
892 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
893 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
894}
895
896void emit_adds(int rs1,int rs2,int rt)
897{
898 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
899 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
900}
901
902void emit_adcs(int rs1,int rs2,int rt)
903{
904 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
905 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
906}
907
908void emit_sbc(int rs1,int rs2,int rt)
909{
910 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
911 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
912}
913
914void emit_sbcs(int rs1,int rs2,int rt)
915{
916 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
917 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
918}
919
920void emit_neg(int rs, int rt)
921{
922 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
923 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
924}
925
926void emit_negs(int rs, int rt)
927{
928 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
929 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
930}
931
932void emit_sub(int rs1,int rs2,int rt)
933{
934 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
935 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
936}
937
938void emit_subs(int rs1,int rs2,int rt)
939{
940 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
941 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
942}
943
944void emit_zeroreg(int rt)
945{
946 assem_debug("mov %s,#0\n",regname[rt]);
947 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
948}
949
790ee18e 950void emit_loadlp(u_int imm,u_int rt)
951{
952 add_literal((int)out,imm);
953 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
954 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
955}
956void emit_movw(u_int imm,u_int rt)
957{
958 assert(imm<65536);
959 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
960 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
961}
962void emit_movt(u_int imm,u_int rt)
963{
964 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
965 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
966}
967void emit_movimm(u_int imm,u_int rt)
968{
969 u_int armval;
970 if(genimm(imm,&armval)) {
971 assem_debug("mov %s,#%d\n",regname[rt],imm);
972 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
973 }else if(genimm(~imm,&armval)) {
974 assem_debug("mvn %s,#%d\n",regname[rt],imm);
975 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
976 }else if(imm<65536) {
977 #ifdef ARMv5_ONLY
978 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
979 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
980 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
981 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
982 #else
983 emit_movw(imm,rt);
984 #endif
985 }else{
986 #ifdef ARMv5_ONLY
987 emit_loadlp(imm,rt);
988 #else
989 emit_movw(imm&0x0000FFFF,rt);
990 emit_movt(imm&0xFFFF0000,rt);
991 #endif
992 }
993}
994void emit_pcreladdr(u_int rt)
995{
996 assem_debug("add %s,pc,#?\n",regname[rt]);
997 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
998}
999
57871462 1000void emit_loadreg(int r, int hr)
1001{
3d624f89 1002#ifdef FORCE32
1003 if(r&64) {
1004 printf("64bit load in 32bit mode!\n");
1005 exit(1);
1006 }
1007#endif
57871462 1008 if((r&63)==0)
1009 emit_zeroreg(hr);
1010 else {
3d624f89 1011 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1012 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1013 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1014 if(r==CCREG) addr=(int)&cycle_count;
1015 if(r==CSREG) addr=(int)&Status;
1016 if(r==FSREG) addr=(int)&FCR31;
1017 if(r==INVCP) addr=(int)&invc_ptr;
1018 u_int offset = addr-(u_int)&dynarec_local;
1019 assert(offset<4096);
1020 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1021 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1022 }
1023}
1024void emit_storereg(int r, int hr)
1025{
3d624f89 1026#ifdef FORCE32
1027 if(r&64) {
1028 printf("64bit store in 32bit mode!\n");
1029 exit(1);
1030 }
1031#endif
1032 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1033 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1034 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1035 if(r==CCREG) addr=(int)&cycle_count;
1036 if(r==FSREG) addr=(int)&FCR31;
1037 u_int offset = addr-(u_int)&dynarec_local;
1038 assert(offset<4096);
1039 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1040 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1041}
1042
1043void emit_test(int rs, int rt)
1044{
1045 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1046 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1047}
1048
1049void emit_testimm(int rs,int imm)
1050{
1051 u_int armval;
1052 assem_debug("tst %s,$%d\n",regname[rs],imm);
cfbd3c6e 1053 genimm_checked(imm,&armval);
57871462 1054 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1055}
1056
b9b61529 1057void emit_testeqimm(int rs,int imm)
1058{
1059 u_int armval;
1060 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1061 genimm_checked(imm,&armval);
b9b61529 1062 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1063}
1064
57871462 1065void emit_not(int rs,int rt)
1066{
1067 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1068 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1069}
1070
b9b61529 1071void emit_mvnmi(int rs,int rt)
1072{
1073 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1074 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1075}
1076
57871462 1077void emit_and(u_int rs1,u_int rs2,u_int rt)
1078{
1079 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1080 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1081}
1082
1083void emit_or(u_int rs1,u_int rs2,u_int rt)
1084{
1085 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1086 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1087}
1088void emit_or_and_set_flags(int rs1,int rs2,int rt)
1089{
1090 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1091 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1092}
1093
f70d384d 1094void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1095{
1096 assert(rs<16);
1097 assert(rt<16);
1098 assert(imm<32);
1099 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1100 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1101}
1102
576bbd8f 1103void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1104{
1105 assert(rs<16);
1106 assert(rt<16);
1107 assert(imm<32);
1108 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1109 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1110}
1111
57871462 1112void emit_xor(u_int rs1,u_int rs2,u_int rt)
1113{
1114 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1115 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1116}
1117
57871462 1118void emit_addimm(u_int rs,int imm,u_int rt)
1119{
1120 assert(rs<16);
1121 assert(rt<16);
1122 if(imm!=0) {
1123 assert(imm>-65536&&imm<65536);
1124 u_int armval;
1125 if(genimm(imm,&armval)) {
1126 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1127 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1128 }else if(genimm(-imm,&armval)) {
1129 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1130 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1131 }else if(imm<0) {
1132 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1133 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1134 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1135 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1136 }else{
1137 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1138 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1139 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1140 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1141 }
1142 }
1143 else if(rs!=rt) emit_mov(rs,rt);
1144}
1145
1146void emit_addimm_and_set_flags(int imm,int rt)
1147{
1148 assert(imm>-65536&&imm<65536);
1149 u_int armval;
1150 if(genimm(imm,&armval)) {
1151 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1152 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1153 }else if(genimm(-imm,&armval)) {
1154 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1155 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1156 }else if(imm<0) {
1157 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1158 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1159 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1160 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1161 }else{
1162 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1163 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1164 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1165 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1166 }
1167}
1168void emit_addimm_no_flags(u_int imm,u_int rt)
1169{
1170 emit_addimm(rt,imm,rt);
1171}
1172
1173void emit_addnop(u_int r)
1174{
1175 assert(r<16);
1176 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1177 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1178}
1179
1180void emit_adcimm(u_int rs,int imm,u_int rt)
1181{
1182 u_int armval;
cfbd3c6e 1183 genimm_checked(imm,&armval);
57871462 1184 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1185 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1186}
1187/*void emit_sbcimm(int imm,u_int rt)
1188{
1189 u_int armval;
cfbd3c6e 1190 genimm_checked(imm,&armval);
57871462 1191 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1192 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1193}*/
1194void emit_sbbimm(int imm,u_int rt)
1195{
1196 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1197 assert(rt<8);
1198 if(imm<128&&imm>=-128) {
1199 output_byte(0x83);
1200 output_modrm(3,rt,3);
1201 output_byte(imm);
1202 }
1203 else
1204 {
1205 output_byte(0x81);
1206 output_modrm(3,rt,3);
1207 output_w32(imm);
1208 }
1209}
1210void emit_rscimm(int rs,int imm,u_int rt)
1211{
1212 assert(0);
1213 u_int armval;
cfbd3c6e 1214 genimm_checked(imm,&armval);
57871462 1215 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1216 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1217}
1218
1219void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1220{
1221 // TODO: if(genimm(imm,&armval)) ...
1222 // else
1223 emit_movimm(imm,HOST_TEMPREG);
1224 emit_adds(HOST_TEMPREG,rsl,rtl);
1225 emit_adcimm(rsh,0,rth);
1226}
1227
1228void emit_sbb(int rs1,int rs2)
1229{
1230 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1231 output_byte(0x19);
1232 output_modrm(3,rs1,rs2);
1233}
1234
1235void emit_andimm(int rs,int imm,int rt)
1236{
1237 u_int armval;
790ee18e 1238 if(imm==0) {
1239 emit_zeroreg(rt);
1240 }else if(genimm(imm,&armval)) {
57871462 1241 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1242 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1243 }else if(genimm(~imm,&armval)) {
1244 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1245 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1246 }else if(imm==65535) {
1247 #ifdef ARMv5_ONLY
1248 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1249 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1250 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1251 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1252 #else
1253 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1254 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1255 #endif
1256 }else{
1257 assert(imm>0&&imm<65535);
1258 #ifdef ARMv5_ONLY
1259 assem_debug("mov r14,#%d\n",imm&0xFF00);
1260 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1261 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1262 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1263 #else
1264 emit_movw(imm,HOST_TEMPREG);
1265 #endif
1266 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1267 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1268 }
1269}
1270
1271void emit_orimm(int rs,int imm,int rt)
1272{
1273 u_int armval;
790ee18e 1274 if(imm==0) {
1275 if(rs!=rt) emit_mov(rs,rt);
1276 }else if(genimm(imm,&armval)) {
57871462 1277 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1278 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1279 }else{
1280 assert(imm>0&&imm<65536);
1281 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1282 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1283 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1284 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1285 }
1286}
1287
1288void emit_xorimm(int rs,int imm,int rt)
1289{
57871462 1290 u_int armval;
790ee18e 1291 if(imm==0) {
1292 if(rs!=rt) emit_mov(rs,rt);
1293 }else if(genimm(imm,&armval)) {
57871462 1294 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1295 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1296 }else{
514ed0d9 1297 assert(imm>0&&imm<65536);
57871462 1298 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1299 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1300 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1301 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1302 }
1303}
1304
1305void emit_shlimm(int rs,u_int imm,int rt)
1306{
1307 assert(imm>0);
1308 assert(imm<32);
1309 //if(imm==1) ...
1310 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1311 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1312}
1313
1314void emit_shrimm(int rs,u_int imm,int rt)
1315{
1316 assert(imm>0);
1317 assert(imm<32);
1318 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1319 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1320}
1321
1322void emit_sarimm(int rs,u_int imm,int rt)
1323{
1324 assert(imm>0);
1325 assert(imm<32);
1326 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1327 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1328}
1329
1330void emit_rorimm(int rs,u_int imm,int rt)
1331{
1332 assert(imm>0);
1333 assert(imm<32);
1334 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1335 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1336}
1337
1338void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1339{
1340 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1341 assert(imm>0);
1342 assert(imm<32);
1343 //if(imm==1) ...
1344 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1345 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1346 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1347 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1348}
1349
1350void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1351{
1352 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1353 assert(imm>0);
1354 assert(imm<32);
1355 //if(imm==1) ...
1356 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1357 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1358 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1359 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1360}
1361
b9b61529 1362void emit_signextend16(int rs,int rt)
1363{
1364 #ifdef ARMv5_ONLY
1365 emit_shlimm(rs,16,rt);
1366 emit_sarimm(rt,16,rt);
1367 #else
1368 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1369 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1370 #endif
1371}
1372
57871462 1373void emit_shl(u_int rs,u_int shift,u_int rt)
1374{
1375 assert(rs<16);
1376 assert(rt<16);
1377 assert(shift<16);
1378 //if(imm==1) ...
1379 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1380 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1381}
1382void emit_shr(u_int rs,u_int shift,u_int rt)
1383{
1384 assert(rs<16);
1385 assert(rt<16);
1386 assert(shift<16);
1387 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1388 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1389}
1390void emit_sar(u_int rs,u_int shift,u_int rt)
1391{
1392 assert(rs<16);
1393 assert(rt<16);
1394 assert(shift<16);
1395 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1396 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1397}
1398void emit_shlcl(int r)
1399{
1400 assem_debug("shl %%%s,%%cl\n",regname[r]);
1401 assert(0);
1402}
1403void emit_shrcl(int r)
1404{
1405 assem_debug("shr %%%s,%%cl\n",regname[r]);
1406 assert(0);
1407}
1408void emit_sarcl(int r)
1409{
1410 assem_debug("sar %%%s,%%cl\n",regname[r]);
1411 assert(0);
1412}
1413
1414void emit_shldcl(int r1,int r2)
1415{
1416 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1417 assert(0);
1418}
1419void emit_shrdcl(int r1,int r2)
1420{
1421 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1422 assert(0);
1423}
1424void emit_orrshl(u_int rs,u_int shift,u_int rt)
1425{
1426 assert(rs<16);
1427 assert(rt<16);
1428 assert(shift<16);
1429 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1430 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1431}
1432void emit_orrshr(u_int rs,u_int shift,u_int rt)
1433{
1434 assert(rs<16);
1435 assert(rt<16);
1436 assert(shift<16);
1437 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1438 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1439}
1440
1441void emit_cmpimm(int rs,int imm)
1442{
1443 u_int armval;
1444 if(genimm(imm,&armval)) {
1445 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1446 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1447 }else if(genimm(-imm,&armval)) {
1448 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1449 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1450 }else if(imm>0) {
1451 assert(imm<65536);
1452 #ifdef ARMv5_ONLY
1453 emit_movimm(imm,HOST_TEMPREG);
1454 #else
1455 emit_movw(imm,HOST_TEMPREG);
1456 #endif
1457 assem_debug("cmp %s,r14\n",regname[rs]);
1458 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1459 }else{
1460 assert(imm>-65536);
1461 #ifdef ARMv5_ONLY
1462 emit_movimm(-imm,HOST_TEMPREG);
1463 #else
1464 emit_movw(-imm,HOST_TEMPREG);
1465 #endif
1466 assem_debug("cmn %s,r14\n",regname[rs]);
1467 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1468 }
1469}
1470
1471void emit_cmovne(u_int *addr,int rt)
1472{
1473 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1474 assert(0);
1475}
1476void emit_cmovl(u_int *addr,int rt)
1477{
1478 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1479 assert(0);
1480}
1481void emit_cmovs(u_int *addr,int rt)
1482{
1483 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1484 assert(0);
1485}
1486void emit_cmovne_imm(int imm,int rt)
1487{
1488 assem_debug("movne %s,#%d\n",regname[rt],imm);
1489 u_int armval;
cfbd3c6e 1490 genimm_checked(imm,&armval);
57871462 1491 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1492}
1493void emit_cmovl_imm(int imm,int rt)
1494{
1495 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1496 u_int armval;
cfbd3c6e 1497 genimm_checked(imm,&armval);
57871462 1498 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1499}
1500void emit_cmovb_imm(int imm,int rt)
1501{
1502 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1503 u_int armval;
cfbd3c6e 1504 genimm_checked(imm,&armval);
57871462 1505 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1506}
1507void emit_cmovs_imm(int imm,int rt)
1508{
1509 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1510 u_int armval;
cfbd3c6e 1511 genimm_checked(imm,&armval);
57871462 1512 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1513}
1514void emit_cmove_reg(int rs,int rt)
1515{
1516 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1517 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1518}
1519void emit_cmovne_reg(int rs,int rt)
1520{
1521 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1522 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1523}
1524void emit_cmovl_reg(int rs,int rt)
1525{
1526 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1527 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1528}
1529void emit_cmovs_reg(int rs,int rt)
1530{
1531 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1532 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1533}
1534
1535void emit_slti32(int rs,int imm,int rt)
1536{
1537 if(rs!=rt) emit_zeroreg(rt);
1538 emit_cmpimm(rs,imm);
1539 if(rs==rt) emit_movimm(0,rt);
1540 emit_cmovl_imm(1,rt);
1541}
1542void emit_sltiu32(int rs,int imm,int rt)
1543{
1544 if(rs!=rt) emit_zeroreg(rt);
1545 emit_cmpimm(rs,imm);
1546 if(rs==rt) emit_movimm(0,rt);
1547 emit_cmovb_imm(1,rt);
1548}
1549void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1550{
1551 assert(rsh!=rt);
1552 emit_slti32(rsl,imm,rt);
1553 if(imm>=0)
1554 {
1555 emit_test(rsh,rsh);
1556 emit_cmovne_imm(0,rt);
1557 emit_cmovs_imm(1,rt);
1558 }
1559 else
1560 {
1561 emit_cmpimm(rsh,-1);
1562 emit_cmovne_imm(0,rt);
1563 emit_cmovl_imm(1,rt);
1564 }
1565}
1566void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1567{
1568 assert(rsh!=rt);
1569 emit_sltiu32(rsl,imm,rt);
1570 if(imm>=0)
1571 {
1572 emit_test(rsh,rsh);
1573 emit_cmovne_imm(0,rt);
1574 }
1575 else
1576 {
1577 emit_cmpimm(rsh,-1);
1578 emit_cmovne_imm(1,rt);
1579 }
1580}
1581
1582void emit_cmp(int rs,int rt)
1583{
1584 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1585 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1586}
1587void emit_set_gz32(int rs, int rt)
1588{
1589 //assem_debug("set_gz32\n");
1590 emit_cmpimm(rs,1);
1591 emit_movimm(1,rt);
1592 emit_cmovl_imm(0,rt);
1593}
1594void emit_set_nz32(int rs, int rt)
1595{
1596 //assem_debug("set_nz32\n");
1597 if(rs!=rt) emit_movs(rs,rt);
1598 else emit_test(rs,rs);
1599 emit_cmovne_imm(1,rt);
1600}
1601void emit_set_gz64_32(int rsh, int rsl, int rt)
1602{
1603 //assem_debug("set_gz64\n");
1604 emit_set_gz32(rsl,rt);
1605 emit_test(rsh,rsh);
1606 emit_cmovne_imm(1,rt);
1607 emit_cmovs_imm(0,rt);
1608}
1609void emit_set_nz64_32(int rsh, int rsl, int rt)
1610{
1611 //assem_debug("set_nz64\n");
1612 emit_or_and_set_flags(rsh,rsl,rt);
1613 emit_cmovne_imm(1,rt);
1614}
1615void emit_set_if_less32(int rs1, int rs2, int rt)
1616{
1617 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1618 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1619 emit_cmp(rs1,rs2);
1620 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1621 emit_cmovl_imm(1,rt);
1622}
1623void emit_set_if_carry32(int rs1, int rs2, int rt)
1624{
1625 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1626 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1627 emit_cmp(rs1,rs2);
1628 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1629 emit_cmovb_imm(1,rt);
1630}
1631void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1632{
1633 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1634 assert(u1!=rt);
1635 assert(u2!=rt);
1636 emit_cmp(l1,l2);
1637 emit_movimm(0,rt);
1638 emit_sbcs(u1,u2,HOST_TEMPREG);
1639 emit_cmovl_imm(1,rt);
1640}
1641void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1642{
1643 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1644 assert(u1!=rt);
1645 assert(u2!=rt);
1646 emit_cmp(l1,l2);
1647 emit_movimm(0,rt);
1648 emit_sbcs(u1,u2,HOST_TEMPREG);
1649 emit_cmovb_imm(1,rt);
1650}
1651
1652void emit_call(int a)
1653{
1654 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1655 u_int offset=genjmp(a);
1656 output_w32(0xeb000000|offset);
1657}
1658void emit_jmp(int a)
1659{
1660 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1661 u_int offset=genjmp(a);
1662 output_w32(0xea000000|offset);
1663}
1664void emit_jne(int a)
1665{
1666 assem_debug("bne %x\n",a);
1667 u_int offset=genjmp(a);
1668 output_w32(0x1a000000|offset);
1669}
1670void emit_jeq(int a)
1671{
1672 assem_debug("beq %x\n",a);
1673 u_int offset=genjmp(a);
1674 output_w32(0x0a000000|offset);
1675}
1676void emit_js(int a)
1677{
1678 assem_debug("bmi %x\n",a);
1679 u_int offset=genjmp(a);
1680 output_w32(0x4a000000|offset);
1681}
1682void emit_jns(int a)
1683{
1684 assem_debug("bpl %x\n",a);
1685 u_int offset=genjmp(a);
1686 output_w32(0x5a000000|offset);
1687}
1688void emit_jl(int a)
1689{
1690 assem_debug("blt %x\n",a);
1691 u_int offset=genjmp(a);
1692 output_w32(0xba000000|offset);
1693}
1694void emit_jge(int a)
1695{
1696 assem_debug("bge %x\n",a);
1697 u_int offset=genjmp(a);
1698 output_w32(0xaa000000|offset);
1699}
1700void emit_jno(int a)
1701{
1702 assem_debug("bvc %x\n",a);
1703 u_int offset=genjmp(a);
1704 output_w32(0x7a000000|offset);
1705}
1706void emit_jc(int a)
1707{
1708 assem_debug("bcs %x\n",a);
1709 u_int offset=genjmp(a);
1710 output_w32(0x2a000000|offset);
1711}
1712void emit_jcc(int a)
1713{
1714 assem_debug("bcc %x\n",a);
1715 u_int offset=genjmp(a);
1716 output_w32(0x3a000000|offset);
1717}
1718
1719void emit_pushimm(int imm)
1720{
1721 assem_debug("push $%x\n",imm);
1722 assert(0);
1723}
1724void emit_pusha()
1725{
1726 assem_debug("pusha\n");
1727 assert(0);
1728}
1729void emit_popa()
1730{
1731 assem_debug("popa\n");
1732 assert(0);
1733}
1734void emit_pushreg(u_int r)
1735{
1736 assem_debug("push %%%s\n",regname[r]);
1737 assert(0);
1738}
1739void emit_popreg(u_int r)
1740{
1741 assem_debug("pop %%%s\n",regname[r]);
1742 assert(0);
1743}
1744void emit_callreg(u_int r)
1745{
1746 assem_debug("call *%%%s\n",regname[r]);
1747 assert(0);
1748}
1749void emit_jmpreg(u_int r)
1750{
1751 assem_debug("mov pc,%s\n",regname[r]);
1752 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1753}
1754
1755void emit_readword_indexed(int offset, int rs, int rt)
1756{
1757 assert(offset>-4096&&offset<4096);
1758 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1759 if(offset>=0) {
1760 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1761 }else{
1762 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1763 }
1764}
1765void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1766{
1767 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1768 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1769}
1770void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1771{
1772 if(map<0) emit_readword_indexed(addr, rs, rt);
1773 else {
1774 assert(addr==0);
1775 emit_readword_dualindexedx4(rs, map, rt);
1776 }
1777}
1778void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1779{
1780 if(map<0) {
1781 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1782 emit_readword_indexed(addr+4, rs, rl);
1783 }else{
1784 assert(rh!=rs);
1785 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1786 emit_addimm(map,1,map);
1787 emit_readword_indexed_tlb(addr, rs, map, rl);
1788 }
1789}
1790void emit_movsbl_indexed(int offset, int rs, int rt)
1791{
1792 assert(offset>-256&&offset<256);
1793 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1794 if(offset>=0) {
1795 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1796 }else{
1797 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1798 }
1799}
1800void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1801{
1802 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1803 else {
1804 if(addr==0) {
1805 emit_shlimm(map,2,map);
1806 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1807 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1808 }else{
1809 assert(addr>-256&&addr<256);
1810 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1811 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1812 emit_movsbl_indexed(addr, rt, rt);
1813 }
1814 }
1815}
1816void emit_movswl_indexed(int offset, int rs, int rt)
1817{
1818 assert(offset>-256&&offset<256);
1819 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1820 if(offset>=0) {
1821 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1822 }else{
1823 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1824 }
1825}
1826void emit_movzbl_indexed(int offset, int rs, int rt)
1827{
1828 assert(offset>-4096&&offset<4096);
1829 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1830 if(offset>=0) {
1831 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1832 }else{
1833 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1834 }
1835}
1836void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1837{
1838 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1839 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1840}
1841void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1842{
1843 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1844 else {
1845 if(addr==0) {
1846 emit_movzbl_dualindexedx4(rs, map, rt);
1847 }else{
1848 emit_addimm(rs,addr,rt);
1849 emit_movzbl_dualindexedx4(rt, map, rt);
1850 }
1851 }
1852}
1853void emit_movzwl_indexed(int offset, int rs, int rt)
1854{
1855 assert(offset>-256&&offset<256);
1856 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1857 if(offset>=0) {
1858 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1859 }else{
1860 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1861 }
1862}
1863void emit_readword(int addr, int rt)
1864{
1865 u_int offset = addr-(u_int)&dynarec_local;
1866 assert(offset<4096);
1867 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1868 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1869}
1870void emit_movsbl(int addr, int rt)
1871{
1872 u_int offset = addr-(u_int)&dynarec_local;
1873 assert(offset<256);
1874 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1875 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1876}
1877void emit_movswl(int addr, int rt)
1878{
1879 u_int offset = addr-(u_int)&dynarec_local;
1880 assert(offset<256);
1881 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1882 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1883}
1884void emit_movzbl(int addr, int rt)
1885{
1886 u_int offset = addr-(u_int)&dynarec_local;
1887 assert(offset<4096);
1888 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1889 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1890}
1891void emit_movzwl(int addr, int rt)
1892{
1893 u_int offset = addr-(u_int)&dynarec_local;
1894 assert(offset<256);
1895 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1896 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1897}
1898void emit_movzwl_reg(int rs, int rt)
1899{
1900 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1901 assert(0);
1902}
1903
1904void emit_xchg(int rs, int rt)
1905{
1906 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1907 assert(0);
1908}
1909void emit_writeword_indexed(int rt, int offset, int rs)
1910{
1911 assert(offset>-4096&&offset<4096);
1912 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1913 if(offset>=0) {
1914 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1915 }else{
1916 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1917 }
1918}
1919void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1920{
1921 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1922 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1923}
1924void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1925{
1926 if(map<0) emit_writeword_indexed(rt, addr, rs);
1927 else {
1928 assert(addr==0);
1929 emit_writeword_dualindexedx4(rt, rs, map);
1930 }
1931}
1932void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1933{
1934 if(map<0) {
1935 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1936 emit_writeword_indexed(rl, addr+4, rs);
1937 }else{
1938 assert(rh>=0);
1939 if(temp!=rs) emit_addimm(map,1,temp);
1940 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1941 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1942 else {
1943 emit_addimm(rs,4,rs);
1944 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1945 }
1946 }
1947}
1948void emit_writehword_indexed(int rt, int offset, int rs)
1949{
1950 assert(offset>-256&&offset<256);
1951 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1952 if(offset>=0) {
1953 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1954 }else{
1955 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1956 }
1957}
1958void emit_writebyte_indexed(int rt, int offset, int rs)
1959{
1960 assert(offset>-4096&&offset<4096);
1961 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1962 if(offset>=0) {
1963 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1964 }else{
1965 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1966 }
1967}
1968void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1969{
1970 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1971 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1972}
1973void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1974{
1975 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1976 else {
1977 if(addr==0) {
1978 emit_writebyte_dualindexedx4(rt, rs, map);
1979 }else{
1980 emit_addimm(rs,addr,temp);
1981 emit_writebyte_dualindexedx4(rt, temp, map);
1982 }
1983 }
1984}
1985void emit_writeword(int rt, int addr)
1986{
1987 u_int offset = addr-(u_int)&dynarec_local;
1988 assert(offset<4096);
1989 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1990 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1991}
1992void emit_writehword(int rt, int addr)
1993{
1994 u_int offset = addr-(u_int)&dynarec_local;
1995 assert(offset<256);
1996 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1997 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1998}
1999void emit_writebyte(int rt, int addr)
2000{
2001 u_int offset = addr-(u_int)&dynarec_local;
2002 assert(offset<4096);
74426039 2003 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2004 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2005}
2006void emit_writeword_imm(int imm, int addr)
2007{
2008 assem_debug("movl $%x,%x\n",imm,addr);
2009 assert(0);
2010}
2011void emit_writebyte_imm(int imm, int addr)
2012{
2013 assem_debug("movb $%x,%x\n",imm,addr);
2014 assert(0);
2015}
2016
2017void emit_mul(int rs)
2018{
2019 assem_debug("mul %%%s\n",regname[rs]);
2020 assert(0);
2021}
2022void emit_imul(int rs)
2023{
2024 assem_debug("imul %%%s\n",regname[rs]);
2025 assert(0);
2026}
2027void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2028{
2029 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2030 assert(rs1<16);
2031 assert(rs2<16);
2032 assert(hi<16);
2033 assert(lo<16);
2034 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2035}
2036void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2037{
2038 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2039 assert(rs1<16);
2040 assert(rs2<16);
2041 assert(hi<16);
2042 assert(lo<16);
2043 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2044}
2045
2046void emit_div(int rs)
2047{
2048 assem_debug("div %%%s\n",regname[rs]);
2049 assert(0);
2050}
2051void emit_idiv(int rs)
2052{
2053 assem_debug("idiv %%%s\n",regname[rs]);
2054 assert(0);
2055}
2056void emit_cdq()
2057{
2058 assem_debug("cdq\n");
2059 assert(0);
2060}
2061
2062void emit_clz(int rs,int rt)
2063{
2064 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2065 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2066}
2067
2068void emit_subcs(int rs1,int rs2,int rt)
2069{
2070 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2071 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2072}
2073
2074void emit_shrcc_imm(int rs,u_int imm,int rt)
2075{
2076 assert(imm>0);
2077 assert(imm<32);
2078 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2079 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2080}
2081
2082void emit_negmi(int rs, int rt)
2083{
2084 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2085 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2086}
2087
2088void emit_negsmi(int rs, int rt)
2089{
2090 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2091 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2092}
2093
2094void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2095{
2096 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2097 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2098}
2099
2100void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2101{
2102 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2103 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2104}
2105
2106void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2107{
2108 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2109 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2110}
2111
2112void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2113{
2114 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2115 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2116}
2117
2118void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2119{
2120 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2121 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2122}
2123
2124void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2125{
2126 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2127 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2128}
2129
2130void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2131{
2132 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2133 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2134}
2135
2136void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2137{
2138 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2139 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2140}
2141
2142void emit_teq(int rs, int rt)
2143{
2144 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2145 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2146}
2147
2148void emit_rsbimm(int rs, int imm, int rt)
2149{
2150 u_int armval;
cfbd3c6e 2151 genimm_checked(imm,&armval);
57871462 2152 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2153 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2154}
2155
2156// Load 2 immediates optimizing for small code size
2157void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2158{
2159 emit_movimm(imm1,rt1);
2160 u_int armval;
2161 if(genimm(imm2-imm1,&armval)) {
2162 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2163 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2164 }else if(genimm(imm1-imm2,&armval)) {
2165 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2166 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2167 }
2168 else emit_movimm(imm2,rt2);
2169}
2170
2171// Conditionally select one of two immediates, optimizing for small code size
2172// This will only be called if HAVE_CMOV_IMM is defined
2173void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2174{
2175 u_int armval;
2176 if(genimm(imm2-imm1,&armval)) {
2177 emit_movimm(imm1,rt);
2178 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2179 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2180 }else if(genimm(imm1-imm2,&armval)) {
2181 emit_movimm(imm1,rt);
2182 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2183 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2184 }
2185 else {
2186 #ifdef ARMv5_ONLY
2187 emit_movimm(imm1,rt);
2188 add_literal((int)out,imm2);
2189 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2190 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2191 #else
2192 emit_movw(imm1&0x0000FFFF,rt);
2193 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2194 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2195 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2196 }
2197 emit_movt(imm1&0xFFFF0000,rt);
2198 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2199 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2200 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2201 }
2202 #endif
2203 }
2204}
2205
2206// special case for checking invalid_code
2207void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2208{
2209 assert(0);
2210}
2211
2212// special case for checking invalid_code
2213void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2214{
2215 assert(imm<128&&imm>=0);
2216 assert(r>=0&&r<16);
2217 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2218 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2219 emit_cmpimm(HOST_TEMPREG,imm);
2220}
2221
2222// special case for tlb mapping
2223void emit_addsr12(int rs1,int rs2,int rt)
2224{
2225 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2226 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2227}
2228
0bbd1454 2229void emit_callne(int a)
2230{
2231 assem_debug("blne %x\n",a);
2232 u_int offset=genjmp(a);
2233 output_w32(0x1b000000|offset);
2234}
2235
57871462 2236// Used to preload hash table entries
2237void emit_prefetch(void *addr)
2238{
2239 assem_debug("prefetch %x\n",(int)addr);
2240 output_byte(0x0F);
2241 output_byte(0x18);
2242 output_modrm(0,5,1);
2243 output_w32((int)addr);
2244}
2245void emit_prefetchreg(int r)
2246{
2247 assem_debug("pld %s\n",regname[r]);
2248 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2249}
2250
2251// Special case for mini_ht
2252void emit_ldreq_indexed(int rs, u_int offset, int rt)
2253{
2254 assert(offset<4096);
2255 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2256 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2257}
2258
2259void emit_flds(int r,int sr)
2260{
2261 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2262 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2263}
2264
2265void emit_vldr(int r,int vr)
2266{
2267 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2268 output_w32(0xed900b00|(vr<<12)|(r<<16));
2269}
2270
2271void emit_fsts(int sr,int r)
2272{
2273 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2274 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2275}
2276
2277void emit_vstr(int vr,int r)
2278{
2279 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2280 output_w32(0xed800b00|(vr<<12)|(r<<16));
2281}
2282
2283void emit_ftosizs(int s,int d)
2284{
2285 assem_debug("ftosizs s%d,s%d\n",d,s);
2286 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2287}
2288
2289void emit_ftosizd(int s,int d)
2290{
2291 assem_debug("ftosizd s%d,d%d\n",d,s);
2292 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2293}
2294
2295void emit_fsitos(int s,int d)
2296{
2297 assem_debug("fsitos s%d,s%d\n",d,s);
2298 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2299}
2300
2301void emit_fsitod(int s,int d)
2302{
2303 assem_debug("fsitod d%d,s%d\n",d,s);
2304 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2305}
2306
2307void emit_fcvtds(int s,int d)
2308{
2309 assem_debug("fcvtds d%d,s%d\n",d,s);
2310 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2311}
2312
2313void emit_fcvtsd(int s,int d)
2314{
2315 assem_debug("fcvtsd s%d,d%d\n",d,s);
2316 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2317}
2318
2319void emit_fsqrts(int s,int d)
2320{
2321 assem_debug("fsqrts d%d,s%d\n",d,s);
2322 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2323}
2324
2325void emit_fsqrtd(int s,int d)
2326{
2327 assem_debug("fsqrtd s%d,d%d\n",d,s);
2328 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2329}
2330
2331void emit_fabss(int s,int d)
2332{
2333 assem_debug("fabss d%d,s%d\n",d,s);
2334 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2335}
2336
2337void emit_fabsd(int s,int d)
2338{
2339 assem_debug("fabsd s%d,d%d\n",d,s);
2340 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2341}
2342
2343void emit_fnegs(int s,int d)
2344{
2345 assem_debug("fnegs d%d,s%d\n",d,s);
2346 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2347}
2348
2349void emit_fnegd(int s,int d)
2350{
2351 assem_debug("fnegd s%d,d%d\n",d,s);
2352 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2353}
2354
2355void emit_fadds(int s1,int s2,int d)
2356{
2357 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2358 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2359}
2360
2361void emit_faddd(int s1,int s2,int d)
2362{
2363 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2364 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2365}
2366
2367void emit_fsubs(int s1,int s2,int d)
2368{
2369 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2370 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2371}
2372
2373void emit_fsubd(int s1,int s2,int d)
2374{
2375 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2376 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2377}
2378
2379void emit_fmuls(int s1,int s2,int d)
2380{
2381 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2382 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2383}
2384
2385void emit_fmuld(int s1,int s2,int d)
2386{
2387 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2388 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2389}
2390
2391void emit_fdivs(int s1,int s2,int d)
2392{
2393 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2394 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2395}
2396
2397void emit_fdivd(int s1,int s2,int d)
2398{
2399 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2400 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2401}
2402
2403void emit_fcmps(int x,int y)
2404{
2405 assem_debug("fcmps s14, s15\n");
2406 output_w32(0xeeb47a67);
2407}
2408
2409void emit_fcmpd(int x,int y)
2410{
2411 assem_debug("fcmpd d6, d7\n");
2412 output_w32(0xeeb46b47);
2413}
2414
2415void emit_fmstat()
2416{
2417 assem_debug("fmstat\n");
2418 output_w32(0xeef1fa10);
2419}
2420
2421void emit_bicne_imm(int rs,int imm,int rt)
2422{
2423 u_int armval;
cfbd3c6e 2424 genimm_checked(imm,&armval);
57871462 2425 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2426 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2427}
2428
2429void emit_biccs_imm(int rs,int imm,int rt)
2430{
2431 u_int armval;
cfbd3c6e 2432 genimm_checked(imm,&armval);
57871462 2433 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2434 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2435}
2436
2437void emit_bicvc_imm(int rs,int imm,int rt)
2438{
2439 u_int armval;
cfbd3c6e 2440 genimm_checked(imm,&armval);
57871462 2441 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2442 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2443}
2444
2445void emit_bichi_imm(int rs,int imm,int rt)
2446{
2447 u_int armval;
cfbd3c6e 2448 genimm_checked(imm,&armval);
57871462 2449 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2450 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2451}
2452
2453void emit_orrvs_imm(int rs,int imm,int rt)
2454{
2455 u_int armval;
cfbd3c6e 2456 genimm_checked(imm,&armval);
57871462 2457 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2458 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2459}
2460
b9b61529 2461void emit_orrne_imm(int rs,int imm,int rt)
2462{
2463 u_int armval;
cfbd3c6e 2464 genimm_checked(imm,&armval);
b9b61529 2465 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2466 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2467}
2468
2469void emit_andne_imm(int rs,int imm,int rt)
2470{
2471 u_int armval;
cfbd3c6e 2472 genimm_checked(imm,&armval);
b9b61529 2473 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2474 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2475}
2476
57871462 2477void emit_jno_unlikely(int a)
2478{
2479 //emit_jno(a);
2480 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2481 output_w32(0x72800000|rd_rn_rm(15,15,0));
2482}
2483
2484// Save registers before function call
2485void save_regs(u_int reglist)
2486{
2487 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2488 if(!reglist) return;
2489 assem_debug("stmia fp,{");
2490 if(reglist&1) assem_debug("r0, ");
2491 if(reglist&2) assem_debug("r1, ");
2492 if(reglist&4) assem_debug("r2, ");
2493 if(reglist&8) assem_debug("r3, ");
2494 if(reglist&0x1000) assem_debug("r12");
2495 assem_debug("}\n");
2496 output_w32(0xe88b0000|reglist);
2497}
2498// Restore registers after function call
2499void restore_regs(u_int reglist)
2500{
2501 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2502 if(!reglist) return;
2503 assem_debug("ldmia fp,{");
2504 if(reglist&1) assem_debug("r0, ");
2505 if(reglist&2) assem_debug("r1, ");
2506 if(reglist&4) assem_debug("r2, ");
2507 if(reglist&8) assem_debug("r3, ");
2508 if(reglist&0x1000) assem_debug("r12");
2509 assem_debug("}\n");
2510 output_w32(0xe89b0000|reglist);
2511}
2512
2513// Write back consts using r14 so we don't disturb the other registers
2514void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2515{
2516 int hr;
2517 for(hr=0;hr<HOST_REGS;hr++) {
2518 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2519 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2520 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2521 int value=constmap[i][hr];
2522 if(value==0) {
2523 emit_zeroreg(HOST_TEMPREG);
2524 }
2525 else {
2526 emit_movimm(value,HOST_TEMPREG);
2527 }
2528 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2529#ifndef FORCE32
57871462 2530 if((i_is32>>i_regmap[hr])&1) {
2531 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2532 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2533 }
24385cae 2534#endif
57871462 2535 }
2536 }
2537 }
2538 }
2539}
2540
2541/* Stubs/epilogue */
2542
2543void literal_pool(int n)
2544{
2545 if(!literalcount) return;
2546 if(n) {
2547 if((int)out-literals[0][0]<4096-n) return;
2548 }
2549 u_int *ptr;
2550 int i;
2551 for(i=0;i<literalcount;i++)
2552 {
2553 ptr=(u_int *)literals[i][0];
2554 u_int offset=(u_int)out-(u_int)ptr-8;
2555 assert(offset<4096);
2556 assert(!(offset&3));
2557 *ptr|=offset;
2558 output_w32(literals[i][1]);
2559 }
2560 literalcount=0;
2561}
2562
2563void literal_pool_jumpover(int n)
2564{
2565 if(!literalcount) return;
2566 if(n) {
2567 if((int)out-literals[0][0]<4096-n) return;
2568 }
2569 int jaddr=(int)out;
2570 emit_jmp(0);
2571 literal_pool(0);
2572 set_jump_target(jaddr,(int)out);
2573}
2574
2575emit_extjump2(int addr, int target, int linker)
2576{
2577 u_char *ptr=(u_char *)addr;
2578 assert((ptr[3]&0x0e)==0xa);
2579 emit_loadlp(target,0);
2580 emit_loadlp(addr,1);
24385cae 2581 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2582 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2583//DEBUG >
2584#ifdef DEBUG_CYCLE_COUNT
2585 emit_readword((int)&last_count,ECX);
2586 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2587 emit_readword((int)&next_interupt,ECX);
2588 emit_writeword(HOST_CCREG,(int)&Count);
2589 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2590 emit_writeword(ECX,(int)&last_count);
2591#endif
2592//DEBUG <
2593 emit_jmp(linker);
2594}
2595
2596emit_extjump(int addr, int target)
2597{
2598 emit_extjump2(addr, target, (int)dyna_linker);
2599}
2600emit_extjump_ds(int addr, int target)
2601{
2602 emit_extjump2(addr, target, (int)dyna_linker_ds);
2603}
2604
cbbab9cd 2605#ifdef PCSX
2606#include "pcsxmem_inline.c"
2607#endif
2608
57871462 2609do_readstub(int n)
2610{
2611 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2612 literal_pool(256);
2613 set_jump_target(stubs[n][1],(int)out);
2614 int type=stubs[n][0];
2615 int i=stubs[n][3];
2616 int rs=stubs[n][4];
2617 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2618 u_int reglist=stubs[n][7];
2619 signed char *i_regmap=i_regs->regmap;
2620 int addr=get_reg(i_regmap,AGEN1+(i&1));
2621 int rth,rt;
2622 int ds;
b9b61529 2623 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2624 rth=get_reg(i_regmap,FTEMP|64);
2625 rt=get_reg(i_regmap,FTEMP);
2626 }else{
2627 rth=get_reg(i_regmap,rt1[i]|64);
2628 rt=get_reg(i_regmap,rt1[i]);
2629 }
2630 assert(rs>=0);
57871462 2631 if(addr<0) addr=rt;
535d208a 2632 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2633 assert(addr>=0);
2634 int ftable=0;
2635 if(type==LOADB_STUB||type==LOADBU_STUB)
2636 ftable=(int)readmemb;
2637 if(type==LOADH_STUB||type==LOADHU_STUB)
2638 ftable=(int)readmemh;
2639 if(type==LOADW_STUB)
2640 ftable=(int)readmem;
24385cae 2641#ifndef FORCE32
57871462 2642 if(type==LOADD_STUB)
2643 ftable=(int)readmemd;
24385cae 2644#endif
2645 assert(ftable!=0);
57871462 2646 emit_writeword(rs,(int)&address);
2647 //emit_pusha();
2648 save_regs(reglist);
97a238a6 2649#ifndef PCSX
57871462 2650 ds=i_regs!=&regs[i];
2651 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2652 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2653 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2654 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2655 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2656#endif
57871462 2657 emit_shrimm(rs,16,1);
2658 int cc=get_reg(i_regmap,CCREG);
2659 if(cc<0) {
2660 emit_loadreg(CCREG,2);
2661 }
2662 emit_movimm(ftable,0);
2663 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2664#ifndef PCSX
57871462 2665 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2666#endif
57871462 2667 //emit_readword((int)&last_count,temp);
2668 //emit_add(cc,temp,cc);
2669 //emit_writeword(cc,(int)&Count);
2670 //emit_mov(15,14);
2671 emit_call((int)&indirect_jump_indexed);
2672 //emit_callreg(rs);
2673 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2674#ifndef PCSX
57871462 2675 // We really shouldn't need to update the count here,
2676 // but not doing so causes random crashes...
2677 emit_readword((int)&Count,HOST_TEMPREG);
2678 emit_readword((int)&next_interupt,2);
2679 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2680 emit_writeword(2,(int)&last_count);
2681 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2682 if(cc<0) {
2683 emit_storereg(CCREG,HOST_TEMPREG);
2684 }
f51dc36c 2685#endif
57871462 2686 //emit_popa();
2687 restore_regs(reglist);
2688 //if((cc=get_reg(regmap,CCREG))>=0) {
2689 // emit_loadreg(CCREG,cc);
2690 //}
f18c0f46 2691 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2692 assert(rt>=0);
2693 if(type==LOADB_STUB)
2694 emit_movsbl((int)&readmem_dword,rt);
2695 if(type==LOADBU_STUB)
2696 emit_movzbl((int)&readmem_dword,rt);
2697 if(type==LOADH_STUB)
2698 emit_movswl((int)&readmem_dword,rt);
2699 if(type==LOADHU_STUB)
2700 emit_movzwl((int)&readmem_dword,rt);
2701 if(type==LOADW_STUB)
2702 emit_readword((int)&readmem_dword,rt);
2703 if(type==LOADD_STUB) {
2704 emit_readword((int)&readmem_dword,rt);
2705 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2706 }
57871462 2707 }
2708 emit_jmp(stubs[n][2]); // return address
2709}
2710
2711inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2712{
2713 int rs=get_reg(regmap,target);
2714 int rth=get_reg(regmap,target|64);
2715 int rt=get_reg(regmap,target);
535d208a 2716 if(rs<0) rs=get_reg(regmap,-1);
57871462 2717 assert(rs>=0);
57871462 2718 int ftable=0;
2719 if(type==LOADB_STUB||type==LOADBU_STUB)
2720 ftable=(int)readmemb;
2721 if(type==LOADH_STUB||type==LOADHU_STUB)
2722 ftable=(int)readmemh;
2723 if(type==LOADW_STUB)
2724 ftable=(int)readmem;
24385cae 2725#ifndef FORCE32
57871462 2726 if(type==LOADD_STUB)
2727 ftable=(int)readmemd;
24385cae 2728#endif
2729 assert(ftable!=0);
cbbab9cd 2730#ifdef PCSX
2731 if(pcsx_direct_read(type,addr,target?rs:-1,rt))
2732 return;
2733#endif
fd99c415 2734 if(target==0)
2735 emit_movimm(addr,rs);
57871462 2736 emit_writeword(rs,(int)&address);
2737 //emit_pusha();
2738 save_regs(reglist);
2739 //emit_shrimm(rs,16,1);
2740 int cc=get_reg(regmap,CCREG);
2741 if(cc<0) {
2742 emit_loadreg(CCREG,2);
2743 }
2744 //emit_movimm(ftable,0);
2745 emit_movimm(((u_int *)ftable)[addr>>16],0);
2746 //emit_readword((int)&last_count,12);
2747 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2748#ifndef PCSX
57871462 2749 if((signed int)addr>=(signed int)0xC0000000) {
2750 // Pagefault address
2751 int ds=regmap!=regs[i].regmap;
2752 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2753 }
f51dc36c 2754#endif
57871462 2755 //emit_add(12,2,2);
2756 //emit_writeword(2,(int)&Count);
2757 //emit_call(((u_int *)ftable)[addr>>16]);
2758 emit_call((int)&indirect_jump);
f51dc36c 2759#ifndef PCSX
57871462 2760 // We really shouldn't need to update the count here,
2761 // but not doing so causes random crashes...
2762 emit_readword((int)&Count,HOST_TEMPREG);
2763 emit_readword((int)&next_interupt,2);
2764 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2765 emit_writeword(2,(int)&last_count);
2766 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2767 if(cc<0) {
2768 emit_storereg(CCREG,HOST_TEMPREG);
2769 }
f51dc36c 2770#endif
57871462 2771 //emit_popa();
2772 restore_regs(reglist);
fd99c415 2773 if(rt>=0) {
2774 if(type==LOADB_STUB)
2775 emit_movsbl((int)&readmem_dword,rt);
2776 if(type==LOADBU_STUB)
2777 emit_movzbl((int)&readmem_dword,rt);
2778 if(type==LOADH_STUB)
2779 emit_movswl((int)&readmem_dword,rt);
2780 if(type==LOADHU_STUB)
2781 emit_movzwl((int)&readmem_dword,rt);
2782 if(type==LOADW_STUB)
2783 emit_readword((int)&readmem_dword,rt);
2784 if(type==LOADD_STUB) {
2785 emit_readword((int)&readmem_dword,rt);
2786 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2787 }
57871462 2788 }
2789}
2790
2791do_writestub(int n)
2792{
2793 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2794 literal_pool(256);
2795 set_jump_target(stubs[n][1],(int)out);
2796 int type=stubs[n][0];
2797 int i=stubs[n][3];
2798 int rs=stubs[n][4];
2799 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2800 u_int reglist=stubs[n][7];
2801 signed char *i_regmap=i_regs->regmap;
2802 int addr=get_reg(i_regmap,AGEN1+(i&1));
2803 int rth,rt,r;
2804 int ds;
b9b61529 2805 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2806 rth=get_reg(i_regmap,FTEMP|64);
2807 rt=get_reg(i_regmap,r=FTEMP);
2808 }else{
2809 rth=get_reg(i_regmap,rs2[i]|64);
2810 rt=get_reg(i_regmap,r=rs2[i]);
2811 }
2812 assert(rs>=0);
2813 assert(rt>=0);
2814 if(addr<0) addr=get_reg(i_regmap,-1);
2815 assert(addr>=0);
2816 int ftable=0;
2817 if(type==STOREB_STUB)
2818 ftable=(int)writememb;
2819 if(type==STOREH_STUB)
2820 ftable=(int)writememh;
2821 if(type==STOREW_STUB)
2822 ftable=(int)writemem;
24385cae 2823#ifndef FORCE32
57871462 2824 if(type==STORED_STUB)
2825 ftable=(int)writememd;
24385cae 2826#endif
2827 assert(ftable!=0);
57871462 2828 emit_writeword(rs,(int)&address);
2829 //emit_shrimm(rs,16,rs);
2830 //emit_movmem_indexedx4(ftable,rs,rs);
2831 if(type==STOREB_STUB)
2832 emit_writebyte(rt,(int)&byte);
2833 if(type==STOREH_STUB)
2834 emit_writehword(rt,(int)&hword);
2835 if(type==STOREW_STUB)
2836 emit_writeword(rt,(int)&word);
2837 if(type==STORED_STUB) {
3d624f89 2838#ifndef FORCE32
57871462 2839 emit_writeword(rt,(int)&dword);
2840 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2841#else
2842 printf("STORED_STUB\n");
2843#endif
57871462 2844 }
2845 //emit_pusha();
2846 save_regs(reglist);
97a238a6 2847#ifndef PCSX
57871462 2848 ds=i_regs!=&regs[i];
2849 int real_rs=get_reg(i_regmap,rs1[i]);
2850 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2851 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2852 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2853 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2854#endif
57871462 2855 emit_shrimm(rs,16,1);
2856 int cc=get_reg(i_regmap,CCREG);
2857 if(cc<0) {
2858 emit_loadreg(CCREG,2);
2859 }
2860 emit_movimm(ftable,0);
2861 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2862#ifndef PCSX
57871462 2863 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2864#endif
57871462 2865 //emit_readword((int)&last_count,temp);
2866 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2867 //emit_add(cc,temp,cc);
2868 //emit_writeword(cc,(int)&Count);
2869 emit_call((int)&indirect_jump_indexed);
2870 //emit_callreg(rs);
2871 emit_readword((int)&Count,HOST_TEMPREG);
2872 emit_readword((int)&next_interupt,2);
2873 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2874 emit_writeword(2,(int)&last_count);
2875 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2876 if(cc<0) {
2877 emit_storereg(CCREG,HOST_TEMPREG);
2878 }
2879 //emit_popa();
2880 restore_regs(reglist);
2881 //if((cc=get_reg(regmap,CCREG))>=0) {
2882 // emit_loadreg(CCREG,cc);
2883 //}
2884 emit_jmp(stubs[n][2]); // return address
2885}
2886
2887inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2888{
2889 int rs=get_reg(regmap,-1);
2890 int rth=get_reg(regmap,target|64);
2891 int rt=get_reg(regmap,target);
2892 assert(rs>=0);
2893 assert(rt>=0);
cbbab9cd 2894#ifdef PCSX
2895 if(pcsx_direct_write(type,addr,rs,rt,regmap))
2896 return;
2897#endif
57871462 2898 int ftable=0;
2899 if(type==STOREB_STUB)
2900 ftable=(int)writememb;
2901 if(type==STOREH_STUB)
2902 ftable=(int)writememh;
2903 if(type==STOREW_STUB)
2904 ftable=(int)writemem;
24385cae 2905#ifndef FORCE32
57871462 2906 if(type==STORED_STUB)
2907 ftable=(int)writememd;
24385cae 2908#endif
2909 assert(ftable!=0);
57871462 2910 emit_writeword(rs,(int)&address);
2911 //emit_shrimm(rs,16,rs);
2912 //emit_movmem_indexedx4(ftable,rs,rs);
2913 if(type==STOREB_STUB)
2914 emit_writebyte(rt,(int)&byte);
2915 if(type==STOREH_STUB)
2916 emit_writehword(rt,(int)&hword);
2917 if(type==STOREW_STUB)
2918 emit_writeword(rt,(int)&word);
2919 if(type==STORED_STUB) {
3d624f89 2920#ifndef FORCE32
57871462 2921 emit_writeword(rt,(int)&dword);
2922 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2923#else
2924 printf("STORED_STUB\n");
2925#endif
57871462 2926 }
2927 //emit_pusha();
2928 save_regs(reglist);
2929 //emit_shrimm(rs,16,1);
2930 int cc=get_reg(regmap,CCREG);
2931 if(cc<0) {
2932 emit_loadreg(CCREG,2);
2933 }
2934 //emit_movimm(ftable,0);
2935 emit_movimm(((u_int *)ftable)[addr>>16],0);
2936 //emit_readword((int)&last_count,12);
2937 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2938#ifndef PCSX
57871462 2939 if((signed int)addr>=(signed int)0xC0000000) {
2940 // Pagefault address
2941 int ds=regmap!=regs[i].regmap;
2942 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2943 }
f51dc36c 2944#endif
57871462 2945 //emit_add(12,2,2);
2946 //emit_writeword(2,(int)&Count);
2947 //emit_call(((u_int *)ftable)[addr>>16]);
2948 emit_call((int)&indirect_jump);
2949 emit_readword((int)&Count,HOST_TEMPREG);
2950 emit_readword((int)&next_interupt,2);
2951 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2952 emit_writeword(2,(int)&last_count);
2953 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2954 if(cc<0) {
2955 emit_storereg(CCREG,HOST_TEMPREG);
2956 }
2957 //emit_popa();
2958 restore_regs(reglist);
2959}
2960
2961do_unalignedwritestub(int n)
2962{
b7918751 2963 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2964 literal_pool(256);
57871462 2965 set_jump_target(stubs[n][1],(int)out);
b7918751 2966
2967 int i=stubs[n][3];
2968 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2969 int addr=stubs[n][5];
2970 u_int reglist=stubs[n][7];
2971 signed char *i_regmap=i_regs->regmap;
2972 int temp2=get_reg(i_regmap,FTEMP);
2973 int rt;
2974 int ds, real_rs;
2975 rt=get_reg(i_regmap,rs2[i]);
2976 assert(rt>=0);
2977 assert(addr>=0);
2978 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2979 reglist|=(1<<addr);
2980 reglist&=~(1<<temp2);
2981
2982 emit_andimm(addr,0xfffffffc,temp2);
2983 emit_writeword(temp2,(int)&address);
2984
2985 save_regs(reglist);
97a238a6 2986#ifndef PCSX
b7918751 2987 ds=i_regs!=&regs[i];
2988 real_rs=get_reg(i_regmap,rs1[i]);
2989 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2990 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2991 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2992 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2993#endif
b7918751 2994 emit_shrimm(addr,16,1);
2995 int cc=get_reg(i_regmap,CCREG);
2996 if(cc<0) {
2997 emit_loadreg(CCREG,2);
2998 }
2999 emit_movimm((u_int)readmem,0);
3000 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3001#ifndef PCSX
3002 // pagefault address
3003 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3004#endif
b7918751 3005 emit_call((int)&indirect_jump_indexed);
3006 restore_regs(reglist);
3007
3008 emit_readword((int)&readmem_dword,temp2);
3009 int temp=addr; //hmh
3010 emit_shlimm(addr,3,temp);
3011 emit_andimm(temp,24,temp);
3012#ifdef BIG_ENDIAN_MIPS
3013 if (opcode[i]==0x2e) // SWR
3014#else
3015 if (opcode[i]==0x2a) // SWL
3016#endif
3017 emit_xorimm(temp,24,temp);
3018 emit_movimm(-1,HOST_TEMPREG);
55439448 3019 if (opcode[i]==0x2a) { // SWL
b7918751 3020 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3021 emit_orrshr(rt,temp,temp2);
3022 }else{
3023 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3024 emit_orrshl(rt,temp,temp2);
3025 }
3026 emit_readword((int)&address,addr);
3027 emit_writeword(temp2,(int)&word);
3028 //save_regs(reglist); // don't need to, no state changes
3029 emit_shrimm(addr,16,1);
3030 emit_movimm((u_int)writemem,0);
3031 //emit_call((int)&indirect_jump_indexed);
3032 emit_mov(15,14);
3033 emit_readword_dualindexedx4(0,1,15);
3034 emit_readword((int)&Count,HOST_TEMPREG);
3035 emit_readword((int)&next_interupt,2);
3036 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3037 emit_writeword(2,(int)&last_count);
3038 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3039 if(cc<0) {
3040 emit_storereg(CCREG,HOST_TEMPREG);
3041 }
3042 restore_regs(reglist);
57871462 3043 emit_jmp(stubs[n][2]); // return address
3044}
3045
3046void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3047{
3048 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3049}
3050
3051do_invstub(int n)
3052{
3053 literal_pool(20);
3054 u_int reglist=stubs[n][3];
3055 set_jump_target(stubs[n][1],(int)out);
3056 save_regs(reglist);
3057 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3058 emit_call((int)&invalidate_addr);
3059 restore_regs(reglist);
3060 emit_jmp(stubs[n][2]); // return address
3061}
3062
3063int do_dirty_stub(int i)
3064{
3065 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3066 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3067 #ifdef PCSX
3068 addr=(u_int)source;
3069 #endif
57871462 3070 // Careful about the code output here, verify_dirty needs to parse it.
3071 #ifdef ARMv5_ONLY
ac545b3a 3072 emit_loadlp(addr,1);
57871462 3073 emit_loadlp((int)copy,2);
3074 emit_loadlp(slen*4,3);
3075 #else
ac545b3a 3076 emit_movw(addr&0x0000FFFF,1);
57871462 3077 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3078 emit_movt(addr&0xFFFF0000,1);
57871462 3079 emit_movt(((u_int)copy)&0xFFFF0000,2);
3080 emit_movw(slen*4,3);
3081 #endif
3082 emit_movimm(start+i*4,0);
3083 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3084 int entry=(int)out;
3085 load_regs_entry(i);
3086 if(entry==(int)out) entry=instr_addr[i];
3087 emit_jmp(instr_addr[i]);
3088 return entry;
3089}
3090
3091void do_dirty_stub_ds()
3092{
3093 // Careful about the code output here, verify_dirty needs to parse it.
3094 #ifdef ARMv5_ONLY
3095 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3096 emit_loadlp((int)copy,2);
3097 emit_loadlp(slen*4,3);
3098 #else
3099 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3100 emit_movw(((u_int)copy)&0x0000FFFF,2);
3101 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3102 emit_movt(((u_int)copy)&0xFFFF0000,2);
3103 emit_movw(slen*4,3);
3104 #endif
3105 emit_movimm(start+1,0);
3106 emit_call((int)&verify_code_ds);
3107}
3108
3109do_cop1stub(int n)
3110{
3111 literal_pool(256);
3112 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3113 set_jump_target(stubs[n][1],(int)out);
3114 int i=stubs[n][3];
3d624f89 3115// int rs=stubs[n][4];
57871462 3116 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3117 int ds=stubs[n][6];
3118 if(!ds) {
3119 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3120 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3121 }
3122 //else {printf("fp exception in delay slot\n");}
3123 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3124 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3125 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3126 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3127 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3128}
3129
3130/* TLB */
3131
3132int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3133{
3134 if(c) {
3135 if((signed int)addr>=(signed int)0xC0000000) {
3136 // address_generation already loaded the const
3137 emit_readword_dualindexedx4(FP,map,map);
3138 }
3139 else
3140 return -1; // No mapping
3141 }
3142 else {
3143 assert(s!=map);
3144 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3145 emit_addsr12(map,s,map);
3146 // Schedule this while we wait on the load
3147 //if(x) emit_xorimm(s,x,ar);
3148 if(shift>=0) emit_shlimm(s,3,shift);
3149 if(~a) emit_andimm(s,a,ar);
3150 emit_readword_dualindexedx4(FP,map,map);
3151 }
3152 return map;
3153}
3154int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3155{
3156 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3157 emit_test(map,map);
3158 *jaddr=(int)out;
3159 emit_js(0);
3160 }
3161 return map;
3162}
3163
3164int gen_tlb_addr_r(int ar, int map) {
3165 if(map>=0) {
3166 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3167 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3168 }
3169}
3170
3171int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3172{
3173 if(c) {
3174 if(addr<0x80800000||addr>=0xC0000000) {
3175 // address_generation already loaded the const
3176 emit_readword_dualindexedx4(FP,map,map);
3177 }
3178 else
3179 return -1; // No mapping
3180 }
3181 else {
3182 assert(s!=map);
3183 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3184 emit_addsr12(map,s,map);
3185 // Schedule this while we wait on the load
3186 //if(x) emit_xorimm(s,x,ar);
3187 emit_readword_dualindexedx4(FP,map,map);
3188 }
3189 return map;
3190}
3191int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3192{
3193 if(!c||addr<0x80800000||addr>=0xC0000000) {
3194 emit_testimm(map,0x40000000);
3195 *jaddr=(int)out;
3196 emit_jne(0);
3197 }
3198}
3199
3200int gen_tlb_addr_w(int ar, int map) {
3201 if(map>=0) {
3202 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3203 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3204 }
3205}
3206
3207// Generate the address of the memory_map entry, relative to dynarec_local
3208generate_map_const(u_int addr,int reg) {
3209 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3210 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3211}
3212
3213/* Special assem */
3214
3215void shift_assemble_arm(int i,struct regstat *i_regs)
3216{
3217 if(rt1[i]) {
3218 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3219 {
3220 signed char s,t,shift;
3221 t=get_reg(i_regs->regmap,rt1[i]);
3222 s=get_reg(i_regs->regmap,rs1[i]);
3223 shift=get_reg(i_regs->regmap,rs2[i]);
3224 if(t>=0){
3225 if(rs1[i]==0)
3226 {
3227 emit_zeroreg(t);
3228 }
3229 else if(rs2[i]==0)
3230 {
3231 assert(s>=0);
3232 if(s!=t) emit_mov(s,t);
3233 }
3234 else
3235 {
3236 emit_andimm(shift,31,HOST_TEMPREG);
3237 if(opcode2[i]==4) // SLLV
3238 {
3239 emit_shl(s,HOST_TEMPREG,t);
3240 }
3241 if(opcode2[i]==6) // SRLV
3242 {
3243 emit_shr(s,HOST_TEMPREG,t);
3244 }
3245 if(opcode2[i]==7) // SRAV
3246 {
3247 emit_sar(s,HOST_TEMPREG,t);
3248 }
3249 }
3250 }
3251 } else { // DSLLV/DSRLV/DSRAV
3252 signed char sh,sl,th,tl,shift;
3253 th=get_reg(i_regs->regmap,rt1[i]|64);
3254 tl=get_reg(i_regs->regmap,rt1[i]);
3255 sh=get_reg(i_regs->regmap,rs1[i]|64);
3256 sl=get_reg(i_regs->regmap,rs1[i]);
3257 shift=get_reg(i_regs->regmap,rs2[i]);
3258 if(tl>=0){
3259 if(rs1[i]==0)
3260 {
3261 emit_zeroreg(tl);
3262 if(th>=0) emit_zeroreg(th);
3263 }
3264 else if(rs2[i]==0)
3265 {
3266 assert(sl>=0);
3267 if(sl!=tl) emit_mov(sl,tl);
3268 if(th>=0&&sh!=th) emit_mov(sh,th);
3269 }
3270 else
3271 {
3272 // FIXME: What if shift==tl ?
3273 assert(shift!=tl);
3274 int temp=get_reg(i_regs->regmap,-1);
3275 int real_th=th;
3276 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3277 assert(sl>=0);
3278 assert(sh>=0);
3279 emit_andimm(shift,31,HOST_TEMPREG);
3280 if(opcode2[i]==0x14) // DSLLV
3281 {
3282 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3283 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3284 emit_orrshr(sl,HOST_TEMPREG,th);
3285 emit_andimm(shift,31,HOST_TEMPREG);
3286 emit_testimm(shift,32);
3287 emit_shl(sl,HOST_TEMPREG,tl);
3288 if(th>=0) emit_cmovne_reg(tl,th);
3289 emit_cmovne_imm(0,tl);
3290 }
3291 if(opcode2[i]==0x16) // DSRLV
3292 {
3293 assert(th>=0);
3294 emit_shr(sl,HOST_TEMPREG,tl);
3295 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3296 emit_orrshl(sh,HOST_TEMPREG,tl);
3297 emit_andimm(shift,31,HOST_TEMPREG);
3298 emit_testimm(shift,32);
3299 emit_shr(sh,HOST_TEMPREG,th);
3300 emit_cmovne_reg(th,tl);
3301 if(real_th>=0) emit_cmovne_imm(0,th);
3302 }
3303 if(opcode2[i]==0x17) // DSRAV
3304 {
3305 assert(th>=0);
3306 emit_shr(sl,HOST_TEMPREG,tl);
3307 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3308 if(real_th>=0) {
3309 assert(temp>=0);
3310 emit_sarimm(th,31,temp);
3311 }
3312 emit_orrshl(sh,HOST_TEMPREG,tl);
3313 emit_andimm(shift,31,HOST_TEMPREG);
3314 emit_testimm(shift,32);
3315 emit_sar(sh,HOST_TEMPREG,th);
3316 emit_cmovne_reg(th,tl);
3317 if(real_th>=0) emit_cmovne_reg(temp,th);
3318 }
3319 }
3320 }
3321 }
3322 }
3323}
3324#define shift_assemble shift_assemble_arm
3325
3326void loadlr_assemble_arm(int i,struct regstat *i_regs)
3327{
3328 int s,th,tl,temp,temp2,addr,map=-1;
3329 int offset;
3330 int jaddr=0;
af4ee1fe 3331 int memtarget=0,c=0;
57871462 3332 u_int hr,reglist=0;
3333 th=get_reg(i_regs->regmap,rt1[i]|64);
3334 tl=get_reg(i_regs->regmap,rt1[i]);
3335 s=get_reg(i_regs->regmap,rs1[i]);
3336 temp=get_reg(i_regs->regmap,-1);
3337 temp2=get_reg(i_regs->regmap,FTEMP);
3338 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3339 assert(addr<0);
3340 offset=imm[i];
3341 for(hr=0;hr<HOST_REGS;hr++) {
3342 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3343 }
3344 reglist|=1<<temp;
3345 if(offset||s<0||c) addr=temp2;
3346 else addr=s;
3347 if(s>=0) {
3348 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3349 if(c) {
3350 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3351 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3352 }
57871462 3353 }
535d208a 3354 if(!using_tlb) {
3355 if(!c) {
3356 #ifdef RAM_OFFSET
3357 map=get_reg(i_regs->regmap,ROREG);
3358 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3359 #endif
3360 emit_shlimm(addr,3,temp);
3361 if (opcode[i]==0x22||opcode[i]==0x26) {
3362 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 3363 }else{
535d208a 3364 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 3365 }
535d208a 3366 emit_cmpimm(addr,RAM_SIZE);
3367 jaddr=(int)out;
3368 emit_jno(0);
3369 }
3370 else {
3371 if (opcode[i]==0x22||opcode[i]==0x26) {
3372 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3373 }else{
3374 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3375 }
57871462 3376 }
535d208a 3377 }else{ // using tlb
3378 int a;
3379 if(c) {
3380 a=-1;
3381 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3382 a=0xFFFFFFFC; // LWL/LWR
3383 }else{
3384 a=0xFFFFFFF8; // LDL/LDR
3385 }
3386 map=get_reg(i_regs->regmap,TLREG);
3387 assert(map>=0);
3388 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3389 if(c) {
3390 if (opcode[i]==0x22||opcode[i]==0x26) {
3391 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3392 }else{
3393 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3394 }
535d208a 3395 }
3396 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3397 }
3398 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3399 if(!c||memtarget) {
3400 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3401 emit_readword_indexed_tlb(0,temp2,map,temp2);
3402 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3403 }
3404 else
3405 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3406 if(rt1[i]) {
3407 assert(tl>=0);
57871462 3408 emit_andimm(temp,24,temp);
2002a1db 3409#ifdef BIG_ENDIAN_MIPS
3410 if (opcode[i]==0x26) // LWR
3411#else
3412 if (opcode[i]==0x22) // LWL
3413#endif
3414 emit_xorimm(temp,24,temp);
57871462 3415 emit_movimm(-1,HOST_TEMPREG);
3416 if (opcode[i]==0x26) {
3417 emit_shr(temp2,temp,temp2);
3418 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3419 }else{
3420 emit_shl(temp2,temp,temp2);
3421 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3422 }
3423 emit_or(temp2,tl,tl);
57871462 3424 }
535d208a 3425 //emit_storereg(rt1[i],tl); // DEBUG
3426 }
3427 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3428 // FIXME: little endian
3429 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3430 if(!c||memtarget) {
3431 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3432 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3433 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3434 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3435 }
3436 else
3437 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3438 if(rt1[i]) {
3439 assert(th>=0);
3440 assert(tl>=0);
57871462 3441 emit_testimm(temp,32);
3442 emit_andimm(temp,24,temp);
3443 if (opcode[i]==0x1A) { // LDL
3444 emit_rsbimm(temp,32,HOST_TEMPREG);
3445 emit_shl(temp2h,temp,temp2h);
3446 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3447 emit_movimm(-1,HOST_TEMPREG);
3448 emit_shl(temp2,temp,temp2);
3449 emit_cmove_reg(temp2h,th);
3450 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3451 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3452 emit_orreq(temp2,tl,tl);
3453 emit_orrne(temp2,th,th);
3454 }
3455 if (opcode[i]==0x1B) { // LDR
3456 emit_xorimm(temp,24,temp);
3457 emit_rsbimm(temp,32,HOST_TEMPREG);
3458 emit_shr(temp2,temp,temp2);
3459 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3460 emit_movimm(-1,HOST_TEMPREG);
3461 emit_shr(temp2h,temp,temp2h);
3462 emit_cmovne_reg(temp2,tl);
3463 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3464 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3465 emit_orrne(temp2h,th,th);
3466 emit_orreq(temp2h,tl,tl);
3467 }
3468 }
3469 }
3470}
3471#define loadlr_assemble loadlr_assemble_arm
3472
3473void cop0_assemble(int i,struct regstat *i_regs)
3474{
3475 if(opcode2[i]==0) // MFC0
3476 {
3477 signed char t=get_reg(i_regs->regmap,rt1[i]);
3478 char copr=(source[i]>>11)&0x1f;
3479 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3480 if(t>=0&&rt1[i]!=0) {
7139f3c8 3481#ifdef MUPEN64
57871462 3482 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3483 emit_movimm((source[i]>>11)&0x1f,1);
3484 emit_writeword(0,(int)&PC);
3485 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3486 if(copr==9) {
3487 emit_readword((int)&last_count,ECX);
3488 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3489 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3490 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3491 emit_writeword(HOST_CCREG,(int)&Count);
3492 }
3493 emit_call((int)MFC0);
3494 emit_readword((int)&readmem_dword,t);
7139f3c8 3495#else
3496 emit_readword((int)&reg_cop0+copr*4,t);
3497#endif
57871462 3498 }
3499 }
3500 else if(opcode2[i]==4) // MTC0
3501 {
3502 signed char s=get_reg(i_regs->regmap,rs1[i]);
3503 char copr=(source[i]>>11)&0x1f;
3504 assert(s>=0);
3505 emit_writeword(s,(int)&readmem_dword);
3506 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
fca1aef2 3507#ifdef MUPEN64
57871462 3508 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3509 emit_movimm((source[i]>>11)&0x1f,1);
3510 emit_writeword(0,(int)&PC);
3511 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
7139f3c8 3512#endif
3513 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3514 emit_readword((int)&last_count,ECX);
3515 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3516 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3517 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3518 emit_writeword(HOST_CCREG,(int)&Count);
3519 }
3520 // What a mess. The status register (12) can enable interrupts,
3521 // so needs a special case to handle a pending interrupt.
3522 // The interrupt must be taken immediately, because a subsequent
3523 // instruction might disable interrupts again.
7139f3c8 3524 if(copr==12||copr==13) {
fca1aef2 3525#ifdef PCSX
3526 if (is_delayslot) {
3527 // burn cycles to cause cc_interrupt, which will
3528 // reschedule next_interupt. Relies on CCREG from above.
3529 assem_debug("MTC0 DS %d\n", copr);
3530 emit_writeword(HOST_CCREG,(int)&last_count);
3531 emit_movimm(0,HOST_CCREG);
3532 emit_storereg(CCREG,HOST_CCREG);
3533 emit_movimm(copr,0);
3534 emit_call((int)pcsx_mtc0_ds);
3535 return;
3536 }
3537#endif
57871462 3538 emit_movimm(start+i*4+4,0);
3539 emit_movimm(0,1);
3540 emit_writeword(0,(int)&pcaddr);
3541 emit_writeword(1,(int)&pending_exception);
3542 }
3543 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3544 //else
fca1aef2 3545#ifdef PCSX
3546 emit_movimm(copr,0);
3547 emit_call((int)pcsx_mtc0);
3548#else
57871462 3549 emit_call((int)MTC0);
fca1aef2 3550#endif
7139f3c8 3551 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3552 emit_readword((int)&Count,HOST_CCREG);
3553 emit_readword((int)&next_interupt,ECX);
3554 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3555 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3556 emit_writeword(ECX,(int)&last_count);
3557 emit_storereg(CCREG,HOST_CCREG);
3558 }
7139f3c8 3559 if(copr==12||copr==13) {
57871462 3560 assert(!is_delayslot);
3561 emit_readword((int)&pending_exception,14);
3562 }
3563 emit_loadreg(rs1[i],s);
3564 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3565 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 3566 if(copr==12||copr==13) {
57871462 3567 emit_test(14,14);
3568 emit_jne((int)&do_interrupt);
3569 }
3570 cop1_usable=0;
3571 }
3572 else
3573 {
3574 assert(opcode2[i]==0x10);
3d624f89 3575#ifndef DISABLE_TLB
57871462 3576 if((source[i]&0x3f)==0x01) // TLBR
3577 emit_call((int)TLBR);
3578 if((source[i]&0x3f)==0x02) // TLBWI
3579 emit_call((int)TLBWI_new);
3580 if((source[i]&0x3f)==0x06) { // TLBWR
3581 // The TLB entry written by TLBWR is dependent on the count,
3582 // so update the cycle count
3583 emit_readword((int)&last_count,ECX);
3584 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3585 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3586 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3587 emit_writeword(HOST_CCREG,(int)&Count);
3588 emit_call((int)TLBWR_new);
3589 }
3590 if((source[i]&0x3f)==0x08) // TLBP
3591 emit_call((int)TLBP);
3d624f89 3592#endif
576bbd8f 3593#ifdef PCSX
3594 if((source[i]&0x3f)==0x10) // RFE
3595 {
3596 emit_readword((int)&Status,0);
3597 emit_andimm(0,0x3c,1);
3598 emit_andimm(0,~0xf,0);
3599 emit_orrshr_imm(1,2,0);
3600 emit_writeword(0,(int)&Status);
3601 }
3602#else
57871462 3603 if((source[i]&0x3f)==0x18) // ERET
3604 {
3605 int count=ccadj[i];
3606 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3607 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3608 emit_jmp((int)jump_eret);
3609 }
576bbd8f 3610#endif
57871462 3611 }
3612}
3613
b9b61529 3614static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3615{
3616 switch (copr) {
3617 case 1:
3618 case 3:
3619 case 5:
3620 case 8:
3621 case 9:
3622 case 10:
3623 case 11:
3624 emit_readword((int)&reg_cop2d[copr],tl);
3625 emit_signextend16(tl,tl);
3626 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3627 break;
3628 case 7:
3629 case 16:
3630 case 17:
3631 case 18:
3632 case 19:
3633 emit_readword((int)&reg_cop2d[copr],tl);
3634 emit_andimm(tl,0xffff,tl);
3635 emit_writeword(tl,(int)&reg_cop2d[copr]);
3636 break;
3637 case 15:
3638 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3639 emit_writeword(tl,(int)&reg_cop2d[copr]);
3640 break;
3641 case 28:
b9b61529 3642 case 29:
3643 emit_readword((int)&reg_cop2d[9],temp);
3644 emit_testimm(temp,0x8000); // do we need this?
3645 emit_andimm(temp,0xf80,temp);
3646 emit_andne_imm(temp,0,temp);
f70d384d 3647 emit_shrimm(temp,7,tl);
b9b61529 3648 emit_readword((int)&reg_cop2d[10],temp);
3649 emit_testimm(temp,0x8000);
3650 emit_andimm(temp,0xf80,temp);
3651 emit_andne_imm(temp,0,temp);
f70d384d 3652 emit_orrshr_imm(temp,2,tl);
b9b61529 3653 emit_readword((int)&reg_cop2d[11],temp);
3654 emit_testimm(temp,0x8000);
3655 emit_andimm(temp,0xf80,temp);
3656 emit_andne_imm(temp,0,temp);
f70d384d 3657 emit_orrshl_imm(temp,3,tl);
b9b61529 3658 emit_writeword(tl,(int)&reg_cop2d[copr]);
3659 break;
3660 default:
3661 emit_readword((int)&reg_cop2d[copr],tl);
3662 break;
3663 }
3664}
3665
3666static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3667{
3668 switch (copr) {
3669 case 15:
3670 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3671 emit_writeword(sl,(int)&reg_cop2d[copr]);
3672 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3673 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3674 emit_writeword(sl,(int)&reg_cop2d[14]);
3675 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3676 break;
3677 case 28:
3678 emit_andimm(sl,0x001f,temp);
f70d384d 3679 emit_shlimm(temp,7,temp);
b9b61529 3680 emit_writeword(temp,(int)&reg_cop2d[9]);
3681 emit_andimm(sl,0x03e0,temp);
f70d384d 3682 emit_shlimm(temp,2,temp);
b9b61529 3683 emit_writeword(temp,(int)&reg_cop2d[10]);
3684 emit_andimm(sl,0x7c00,temp);
f70d384d 3685 emit_shrimm(temp,3,temp);
b9b61529 3686 emit_writeword(temp,(int)&reg_cop2d[11]);
3687 emit_writeword(sl,(int)&reg_cop2d[28]);
3688 break;
3689 case 30:
3690 emit_movs(sl,temp);
3691 emit_mvnmi(temp,temp);
3692 emit_clz(temp,temp);
3693 emit_writeword(sl,(int)&reg_cop2d[30]);
3694 emit_writeword(temp,(int)&reg_cop2d[31]);
3695 break;
b9b61529 3696 case 31:
3697 break;
3698 default:
3699 emit_writeword(sl,(int)&reg_cop2d[copr]);
3700 break;
3701 }
3702}
3703
3704void cop2_assemble(int i,struct regstat *i_regs)
3705{
3706 u_int copr=(source[i]>>11)&0x1f;
3707 signed char temp=get_reg(i_regs->regmap,-1);
3708 if (opcode2[i]==0) { // MFC2
3709 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3710 if(tl>=0&&rt1[i]!=0)
b9b61529 3711 cop2_get_dreg(copr,tl,temp);
3712 }
3713 else if (opcode2[i]==4) { // MTC2
3714 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3715 cop2_put_dreg(copr,sl,temp);
3716 }
3717 else if (opcode2[i]==2) // CFC2
3718 {
3719 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3720 if(tl>=0&&rt1[i]!=0)
b9b61529 3721 emit_readword((int)&reg_cop2c[copr],tl);
3722 }
3723 else if (opcode2[i]==6) // CTC2
3724 {
3725 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3726 switch(copr) {
3727 case 4:
3728 case 12:
3729 case 20:
3730 case 26:
3731 case 27:
3732 case 29:
3733 case 30:
3734 emit_signextend16(sl,temp);
3735 break;
3736 case 31:
3737 //value = value & 0x7ffff000;
3738 //if (value & 0x7f87e000) value |= 0x80000000;
3739 emit_shrimm(sl,12,temp);
3740 emit_shlimm(temp,12,temp);
3741 emit_testimm(temp,0x7f000000);
3742 emit_testeqimm(temp,0x00870000);
3743 emit_testeqimm(temp,0x0000e000);
3744 emit_orrne_imm(temp,0x80000000,temp);
3745 break;
3746 default:
3747 temp=sl;
3748 break;
3749 }
3750 emit_writeword(temp,(int)&reg_cop2c[copr]);
3751 assert(sl>=0);
3752 }
3753}
3754
3755void c2op_assemble(int i,struct regstat *i_regs)
3756{
3757 signed char temp=get_reg(i_regs->regmap,-1);
3758 u_int c2op=source[i]&0x3f;
3759 u_int hr,reglist=0;
3760 for(hr=0;hr<HOST_REGS;hr++) {
3761 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3762 }
3763 if(i==0||itype[i-1]!=C2OP)
3764 save_regs(reglist);
3765
3766 if (gte_handlers[c2op]!=NULL) {
3767 int cc=get_reg(i_regs->regmap,CCREG);
009faf24 3768 emit_movimm(source[i],1); // opcode
b9b61529 3769 if (cc>=0&&gte_cycletab[c2op])
009faf24 3770 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: could just adjust ccadj?
3771 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3772 emit_writeword(1,(int)&psxRegs.code);
b9b61529 3773 emit_call((int)gte_handlers[c2op]);
3774 }
3775
3776 if(i>=slen-1||itype[i+1]!=C2OP)
3777 restore_regs(reglist);
3778}
3779
3780void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3781{
3782 // XXX: should just just do the exception instead
3783 if(!cop1_usable) {
3784 int jaddr=(int)out;
3785 emit_jmp(0);
3786 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3787 cop1_usable=1;
3788 }
3789}
3790
57871462 3791void cop1_assemble(int i,struct regstat *i_regs)
3792{
3d624f89 3793#ifndef DISABLE_COP1
57871462 3794 // Check cop1 unusable
3795 if(!cop1_usable) {
3796 signed char rs=get_reg(i_regs->regmap,CSREG);
3797 assert(rs>=0);
3798 emit_testimm(rs,0x20000000);
3799 int jaddr=(int)out;
3800 emit_jeq(0);
3801 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3802 cop1_usable=1;
3803 }
3804 if (opcode2[i]==0) { // MFC1
3805 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3806 if(tl>=0) {
3807 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3808 emit_readword_indexed(0,tl,tl);
3809 }
3810 }
3811 else if (opcode2[i]==1) { // DMFC1
3812 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3813 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3814 if(tl>=0) {
3815 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3816 if(th>=0) emit_readword_indexed(4,tl,th);
3817 emit_readword_indexed(0,tl,tl);
3818 }
3819 }
3820 else if (opcode2[i]==4) { // MTC1
3821 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3822 signed char temp=get_reg(i_regs->regmap,-1);
3823 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3824 emit_writeword_indexed(sl,0,temp);
3825 }
3826 else if (opcode2[i]==5) { // DMTC1
3827 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3828 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3829 signed char temp=get_reg(i_regs->regmap,-1);
3830 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3831 emit_writeword_indexed(sh,4,temp);
3832 emit_writeword_indexed(sl,0,temp);
3833 }
3834 else if (opcode2[i]==2) // CFC1
3835 {
3836 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3837 if(tl>=0) {
3838 u_int copr=(source[i]>>11)&0x1f;
3839 if(copr==0) emit_readword((int)&FCR0,tl);
3840 if(copr==31) emit_readword((int)&FCR31,tl);
3841 }
3842 }
3843 else if (opcode2[i]==6) // CTC1
3844 {
3845 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3846 u_int copr=(source[i]>>11)&0x1f;
3847 assert(sl>=0);
3848 if(copr==31)
3849 {
3850 emit_writeword(sl,(int)&FCR31);
3851 // Set the rounding mode
3852 //FIXME
3853 //char temp=get_reg(i_regs->regmap,-1);
3854 //emit_andimm(sl,3,temp);
3855 //emit_fldcw_indexed((int)&rounding_modes,temp);
3856 }
3857 }
3d624f89 3858#else
3859 cop1_unusable(i, i_regs);
3860#endif
57871462 3861}
3862
3863void fconv_assemble_arm(int i,struct regstat *i_regs)
3864{
3d624f89 3865#ifndef DISABLE_COP1
57871462 3866 signed char temp=get_reg(i_regs->regmap,-1);
3867 assert(temp>=0);
3868 // Check cop1 unusable
3869 if(!cop1_usable) {
3870 signed char rs=get_reg(i_regs->regmap,CSREG);
3871 assert(rs>=0);
3872 emit_testimm(rs,0x20000000);
3873 int jaddr=(int)out;
3874 emit_jeq(0);
3875 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3876 cop1_usable=1;
3877 }
3878
3879 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3880 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3881 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3882 emit_flds(temp,15);
3883 emit_ftosizs(15,15); // float->int, truncate
3884 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3885 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3886 emit_fsts(15,temp);
3887 return;
3888 }
3889 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3890 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3891 emit_vldr(temp,7);
3892 emit_ftosizd(7,13); // double->int, truncate
3893 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3894 emit_fsts(13,temp);
3895 return;
3896 }
3897
3898 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3899 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3900 emit_flds(temp,13);
3901 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3902 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3903 emit_fsitos(13,15);
3904 emit_fsts(15,temp);
3905 return;
3906 }
3907 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3908 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3909 emit_flds(temp,13);
3910 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3911 emit_fsitod(13,7);
3912 emit_vstr(7,temp);
3913 return;
3914 }
3915
3916 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3917 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3918 emit_flds(temp,13);
3919 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3920 emit_fcvtds(13,7);
3921 emit_vstr(7,temp);
3922 return;
3923 }
3924 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3925 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3926 emit_vldr(temp,7);
3927 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3928 emit_fcvtsd(7,13);
3929 emit_fsts(13,temp);
3930 return;
3931 }
3932 #endif
3933
3934 // C emulation code
3935
3936 u_int hr,reglist=0;
3937 for(hr=0;hr<HOST_REGS;hr++) {
3938 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3939 }
3940 save_regs(reglist);
3941
3942 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3943 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3944 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3945 emit_call((int)cvt_s_w);
3946 }
3947 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3948 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3949 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3950 emit_call((int)cvt_d_w);
3951 }
3952 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3953 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3954 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3955 emit_call((int)cvt_s_l);
3956 }
3957 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3958 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3959 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3960 emit_call((int)cvt_d_l);
3961 }
3962
3963 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3964 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3965 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3966 emit_call((int)cvt_d_s);
3967 }
3968 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3969 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3970 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3971 emit_call((int)cvt_w_s);
3972 }
3973 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3974 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3975 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3976 emit_call((int)cvt_l_s);
3977 }
3978
3979 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3980 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3981 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3982 emit_call((int)cvt_s_d);
3983 }
3984 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3985 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3986 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3987 emit_call((int)cvt_w_d);
3988 }
3989 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3990 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3991 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3992 emit_call((int)cvt_l_d);
3993 }
3994
3995 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3996 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3997 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3998 emit_call((int)round_l_s);
3999 }
4000 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4001 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4002 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4003 emit_call((int)trunc_l_s);
4004 }
4005 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4006 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4007 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4008 emit_call((int)ceil_l_s);
4009 }
4010 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4011 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4012 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4013 emit_call((int)floor_l_s);
4014 }
4015 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4016 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4017 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4018 emit_call((int)round_w_s);
4019 }
4020 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4021 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4022 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4023 emit_call((int)trunc_w_s);
4024 }
4025 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4026 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4027 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4028 emit_call((int)ceil_w_s);
4029 }
4030 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4031 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4032 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4033 emit_call((int)floor_w_s);
4034 }
4035
4036 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4037 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4038 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4039 emit_call((int)round_l_d);
4040 }
4041 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4042 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4043 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4044 emit_call((int)trunc_l_d);
4045 }
4046 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4047 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4048 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4049 emit_call((int)ceil_l_d);
4050 }
4051 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4052 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4053 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4054 emit_call((int)floor_l_d);
4055 }
4056 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4057 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4058 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4059 emit_call((int)round_w_d);
4060 }
4061 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4062 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4063 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4064 emit_call((int)trunc_w_d);
4065 }
4066 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4067 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4068 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4069 emit_call((int)ceil_w_d);
4070 }
4071 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4072 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4073 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4074 emit_call((int)floor_w_d);
4075 }
4076
4077 restore_regs(reglist);
3d624f89 4078#else
4079 cop1_unusable(i, i_regs);
4080#endif
57871462 4081}
4082#define fconv_assemble fconv_assemble_arm
4083
4084void fcomp_assemble(int i,struct regstat *i_regs)
4085{
3d624f89 4086#ifndef DISABLE_COP1
57871462 4087 signed char fs=get_reg(i_regs->regmap,FSREG);
4088 signed char temp=get_reg(i_regs->regmap,-1);
4089 assert(temp>=0);
4090 // Check cop1 unusable
4091 if(!cop1_usable) {
4092 signed char cs=get_reg(i_regs->regmap,CSREG);
4093 assert(cs>=0);
4094 emit_testimm(cs,0x20000000);
4095 int jaddr=(int)out;
4096 emit_jeq(0);
4097 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4098 cop1_usable=1;
4099 }
4100
4101 if((source[i]&0x3f)==0x30) {
4102 emit_andimm(fs,~0x800000,fs);
4103 return;
4104 }
4105
4106 if((source[i]&0x3e)==0x38) {
4107 // sf/ngle - these should throw exceptions for NaNs
4108 emit_andimm(fs,~0x800000,fs);
4109 return;
4110 }
4111
4112 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4113 if(opcode2[i]==0x10) {
4114 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4115 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4116 emit_orimm(fs,0x800000,fs);
4117 emit_flds(temp,14);
4118 emit_flds(HOST_TEMPREG,15);
4119 emit_fcmps(14,15);
4120 emit_fmstat();
4121 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4122 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4123 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4124 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4125 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4126 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4127 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4128 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4129 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4130 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4131 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4132 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4133 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4134 return;
4135 }
4136 if(opcode2[i]==0x11) {
4137 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4138 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4139 emit_orimm(fs,0x800000,fs);
4140 emit_vldr(temp,6);
4141 emit_vldr(HOST_TEMPREG,7);
4142 emit_fcmpd(6,7);
4143 emit_fmstat();
4144 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4145 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4146 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4147 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4148 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4149 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4150 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4151 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4152 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4153 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4154 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4155 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4156 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4157 return;
4158 }
4159 #endif
4160
4161 // C only
4162
4163 u_int hr,reglist=0;
4164 for(hr=0;hr<HOST_REGS;hr++) {
4165 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4166 }
4167 reglist&=~(1<<fs);
4168 save_regs(reglist);
4169 if(opcode2[i]==0x10) {
4170 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4171 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4172 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4173 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4174 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4175 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4176 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4177 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4178 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4179 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4180 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4181 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4182 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4183 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4184 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4185 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4186 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4187 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4188 }
4189 if(opcode2[i]==0x11) {
4190 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4191 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4192 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4193 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4194 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4195 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4196 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4197 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4198 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4199 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4200 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4201 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4202 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4203 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4204 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4205 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4206 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4207 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4208 }
4209 restore_regs(reglist);
4210 emit_loadreg(FSREG,fs);
3d624f89 4211#else
4212 cop1_unusable(i, i_regs);
4213#endif
57871462 4214}
4215
4216void float_assemble(int i,struct regstat *i_regs)
4217{
3d624f89 4218#ifndef DISABLE_COP1
57871462 4219 signed char temp=get_reg(i_regs->regmap,-1);
4220 assert(temp>=0);
4221 // Check cop1 unusable
4222 if(!cop1_usable) {
4223 signed char cs=get_reg(i_regs->regmap,CSREG);
4224 assert(cs>=0);
4225 emit_testimm(cs,0x20000000);
4226 int jaddr=(int)out;
4227 emit_jeq(0);
4228 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4229 cop1_usable=1;
4230 }
4231
4232 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4233 if((source[i]&0x3f)==6) // mov
4234 {
4235 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4236 if(opcode2[i]==0x10) {
4237 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4238 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4239 emit_readword_indexed(0,temp,temp);
4240 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4241 }
4242 if(opcode2[i]==0x11) {
4243 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4244 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4245 emit_vldr(temp,7);
4246 emit_vstr(7,HOST_TEMPREG);
4247 }
4248 }
4249 return;
4250 }
4251
4252 if((source[i]&0x3f)>3)
4253 {
4254 if(opcode2[i]==0x10) {
4255 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4256 emit_flds(temp,15);
4257 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4258 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4259 }
4260 if((source[i]&0x3f)==4) // sqrt
4261 emit_fsqrts(15,15);
4262 if((source[i]&0x3f)==5) // abs
4263 emit_fabss(15,15);
4264 if((source[i]&0x3f)==7) // neg
4265 emit_fnegs(15,15);
4266 emit_fsts(15,temp);
4267 }
4268 if(opcode2[i]==0x11) {
4269 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4270 emit_vldr(temp,7);
4271 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4272 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4273 }
4274 if((source[i]&0x3f)==4) // sqrt
4275 emit_fsqrtd(7,7);
4276 if((source[i]&0x3f)==5) // abs
4277 emit_fabsd(7,7);
4278 if((source[i]&0x3f)==7) // neg
4279 emit_fnegd(7,7);
4280 emit_vstr(7,temp);
4281 }
4282 return;
4283 }
4284 if((source[i]&0x3f)<4)
4285 {
4286 if(opcode2[i]==0x10) {
4287 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4288 }
4289 if(opcode2[i]==0x11) {
4290 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4291 }
4292 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4293 if(opcode2[i]==0x10) {
4294 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4295 emit_flds(temp,15);
4296 emit_flds(HOST_TEMPREG,13);
4297 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4298 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4299 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4300 }
4301 }
4302 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4303 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4304 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4305 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4306 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4307 emit_fsts(15,HOST_TEMPREG);
4308 }else{
4309 emit_fsts(15,temp);
4310 }
4311 }
4312 else if(opcode2[i]==0x11) {
4313 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4314 emit_vldr(temp,7);
4315 emit_vldr(HOST_TEMPREG,6);
4316 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4317 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4318 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4319 }
4320 }
4321 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4322 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4323 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4324 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4325 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4326 emit_vstr(7,HOST_TEMPREG);
4327 }else{
4328 emit_vstr(7,temp);
4329 }
4330 }
4331 }
4332 else {
4333 if(opcode2[i]==0x10) {
4334 emit_flds(temp,15);
4335 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4336 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4337 }
4338 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4339 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4340 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4341 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4342 emit_fsts(15,temp);
4343 }
4344 else if(opcode2[i]==0x11) {
4345 emit_vldr(temp,7);
4346 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4347 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4348 }
4349 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4350 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4351 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4352 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4353 emit_vstr(7,temp);
4354 }
4355 }
4356 return;
4357 }
4358 #endif
4359
4360 u_int hr,reglist=0;
4361 for(hr=0;hr<HOST_REGS;hr++) {
4362 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4363 }
4364 if(opcode2[i]==0x10) { // Single precision
4365 save_regs(reglist);
4366 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4367 if((source[i]&0x3f)<4) {
4368 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4369 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4370 }else{
4371 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4372 }
4373 switch(source[i]&0x3f)
4374 {
4375 case 0x00: emit_call((int)add_s);break;
4376 case 0x01: emit_call((int)sub_s);break;
4377 case 0x02: emit_call((int)mul_s);break;
4378 case 0x03: emit_call((int)div_s);break;
4379 case 0x04: emit_call((int)sqrt_s);break;
4380 case 0x05: emit_call((int)abs_s);break;
4381 case 0x06: emit_call((int)mov_s);break;
4382 case 0x07: emit_call((int)neg_s);break;
4383 }
4384 restore_regs(reglist);
4385 }
4386 if(opcode2[i]==0x11) { // Double precision
4387 save_regs(reglist);
4388 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4389 if((source[i]&0x3f)<4) {
4390 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4391 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4392 }else{
4393 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4394 }
4395 switch(source[i]&0x3f)
4396 {
4397 case 0x00: emit_call((int)add_d);break;
4398 case 0x01: emit_call((int)sub_d);break;
4399 case 0x02: emit_call((int)mul_d);break;
4400 case 0x03: emit_call((int)div_d);break;
4401 case 0x04: emit_call((int)sqrt_d);break;
4402 case 0x05: emit_call((int)abs_d);break;
4403 case 0x06: emit_call((int)mov_d);break;
4404 case 0x07: emit_call((int)neg_d);break;
4405 }
4406 restore_regs(reglist);
4407 }
3d624f89 4408#else
4409 cop1_unusable(i, i_regs);
4410#endif
57871462 4411}
4412
4413void multdiv_assemble_arm(int i,struct regstat *i_regs)
4414{
4415 // case 0x18: MULT
4416 // case 0x19: MULTU
4417 // case 0x1A: DIV
4418 // case 0x1B: DIVU
4419 // case 0x1C: DMULT
4420 // case 0x1D: DMULTU
4421 // case 0x1E: DDIV
4422 // case 0x1F: DDIVU
4423 if(rs1[i]&&rs2[i])
4424 {
4425 if((opcode2[i]&4)==0) // 32-bit
4426 {
4427 if(opcode2[i]==0x18) // MULT
4428 {
4429 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4430 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4431 signed char hi=get_reg(i_regs->regmap,HIREG);
4432 signed char lo=get_reg(i_regs->regmap,LOREG);
4433 assert(m1>=0);
4434 assert(m2>=0);
4435 assert(hi>=0);
4436 assert(lo>=0);
4437 emit_smull(m1,m2,hi,lo);
4438 }
4439 if(opcode2[i]==0x19) // MULTU
4440 {
4441 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4442 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4443 signed char hi=get_reg(i_regs->regmap,HIREG);
4444 signed char lo=get_reg(i_regs->regmap,LOREG);
4445 assert(m1>=0);
4446 assert(m2>=0);
4447 assert(hi>=0);
4448 assert(lo>=0);
4449 emit_umull(m1,m2,hi,lo);
4450 }
4451 if(opcode2[i]==0x1A) // DIV
4452 {
4453 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4454 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4455 assert(d1>=0);
4456 assert(d2>=0);
4457 signed char quotient=get_reg(i_regs->regmap,LOREG);
4458 signed char remainder=get_reg(i_regs->regmap,HIREG);
4459 assert(quotient>=0);
4460 assert(remainder>=0);
4461 emit_movs(d1,remainder);
4462 emit_negmi(remainder,remainder);
4463 emit_movs(d2,HOST_TEMPREG);
4464 emit_jeq((int)out+52); // Division by zero
4465 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4466 emit_clz(HOST_TEMPREG,quotient);
4467 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4468 emit_orimm(quotient,1<<31,quotient);
4469 emit_shr(quotient,quotient,quotient);
4470 emit_cmp(remainder,HOST_TEMPREG);
4471 emit_subcs(remainder,HOST_TEMPREG,remainder);
4472 emit_adcs(quotient,quotient,quotient);
4473 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4474 emit_jcc((int)out-16); // -4
4475 emit_teq(d1,d2);
4476 emit_negmi(quotient,quotient);
4477 emit_test(d1,d1);
4478 emit_negmi(remainder,remainder);
4479 }
4480 if(opcode2[i]==0x1B) // DIVU
4481 {
4482 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4483 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4484 assert(d1>=0);
4485 assert(d2>=0);
4486 signed char quotient=get_reg(i_regs->regmap,LOREG);
4487 signed char remainder=get_reg(i_regs->regmap,HIREG);
4488 assert(quotient>=0);
4489 assert(remainder>=0);
4490 emit_test(d2,d2);
4491 emit_jeq((int)out+44); // Division by zero
4492 emit_clz(d2,HOST_TEMPREG);
4493 emit_movimm(1<<31,quotient);
4494 emit_shl(d2,HOST_TEMPREG,d2);
4495 emit_mov(d1,remainder);
4496 emit_shr(quotient,HOST_TEMPREG,quotient);
4497 emit_cmp(remainder,d2);
4498 emit_subcs(remainder,d2,remainder);
4499 emit_adcs(quotient,quotient,quotient);
4500 emit_shrcc_imm(d2,1,d2);
4501 emit_jcc((int)out-16); // -4
4502 }
4503 }
4504 else // 64-bit
4505 {
4506 if(opcode2[i]==0x1C) // DMULT
4507 {
4508 assert(opcode2[i]!=0x1C);
4509 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4510 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4511 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4512 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4513 assert(m1h>=0);
4514 assert(m2h>=0);
4515 assert(m1l>=0);
4516 assert(m2l>=0);
4517 emit_pushreg(m2h);
4518 emit_pushreg(m2l);
4519 emit_pushreg(m1h);
4520 emit_pushreg(m1l);
4521 emit_call((int)&mult64);
4522 emit_popreg(m1l);
4523 emit_popreg(m1h);
4524 emit_popreg(m2l);
4525 emit_popreg(m2h);
4526 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4527 signed char hil=get_reg(i_regs->regmap,HIREG);
4528 if(hih>=0) emit_loadreg(HIREG|64,hih);
4529 if(hil>=0) emit_loadreg(HIREG,hil);
4530 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4531 signed char lol=get_reg(i_regs->regmap,LOREG);
4532 if(loh>=0) emit_loadreg(LOREG|64,loh);
4533 if(lol>=0) emit_loadreg(LOREG,lol);
4534 }
4535 if(opcode2[i]==0x1D) // DMULTU
4536 {
4537 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4538 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4539 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4540 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4541 assert(m1h>=0);
4542 assert(m2h>=0);
4543 assert(m1l>=0);
4544 assert(m2l>=0);
4545 save_regs(0x100f);
4546 if(m1l!=0) emit_mov(m1l,0);
4547 if(m1h==0) emit_readword((int)&dynarec_local,1);
4548 else if(m1h>1) emit_mov(m1h,1);
4549 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4550 else if(m2l>2) emit_mov(m2l,2);
4551 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4552 else if(m2h>3) emit_mov(m2h,3);
4553 emit_call((int)&multu64);
4554 restore_regs(0x100f);
4555 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4556 signed char hil=get_reg(i_regs->regmap,HIREG);
4557 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4558 signed char lol=get_reg(i_regs->regmap,LOREG);
4559 /*signed char temp=get_reg(i_regs->regmap,-1);
4560 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4561 signed char rl=get_reg(i_regs->regmap,HIREG);
4562 assert(m1h>=0);
4563 assert(m2h>=0);
4564 assert(m1l>=0);
4565 assert(m2l>=0);
4566 assert(temp>=0);
4567 //emit_mov(m1l,EAX);
4568 //emit_mul(m2l);
4569 emit_umull(rl,rh,m1l,m2l);
4570 emit_storereg(LOREG,rl);
4571 emit_mov(rh,temp);
4572 //emit_mov(m1h,EAX);
4573 //emit_mul(m2l);
4574 emit_umull(rl,rh,m1h,m2l);
4575 emit_adds(rl,temp,temp);
4576 emit_adcimm(rh,0,rh);
4577 emit_storereg(HIREG,rh);
4578 //emit_mov(m2h,EAX);
4579 //emit_mul(m1l);
4580 emit_umull(rl,rh,m1l,m2h);
4581 emit_adds(rl,temp,temp);
4582 emit_adcimm(rh,0,rh);
4583 emit_storereg(LOREG|64,temp);
4584 emit_mov(rh,temp);
4585 //emit_mov(m2h,EAX);
4586 //emit_mul(m1h);
4587 emit_umull(rl,rh,m1h,m2h);
4588 emit_adds(rl,temp,rl);
4589 emit_loadreg(HIREG,temp);
4590 emit_adcimm(rh,0,rh);
4591 emit_adds(rl,temp,rl);
4592 emit_adcimm(rh,0,rh);
4593 // DEBUG
4594 /*
4595 emit_pushreg(m2h);
4596 emit_pushreg(m2l);
4597 emit_pushreg(m1h);
4598 emit_pushreg(m1l);
4599 emit_call((int)&multu64);
4600 emit_popreg(m1l);
4601 emit_popreg(m1h);
4602 emit_popreg(m2l);
4603 emit_popreg(m2h);
4604 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4605 signed char hil=get_reg(i_regs->regmap,HIREG);
4606 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4607 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4608 */
4609 // Shouldn't be necessary
4610 //char loh=get_reg(i_regs->regmap,LOREG|64);
4611 //char lol=get_reg(i_regs->regmap,LOREG);
4612 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4613 //if(lol>=0) emit_loadreg(LOREG,lol);
4614 }
4615 if(opcode2[i]==0x1E) // DDIV
4616 {
4617 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4618 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4619 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4620 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4621 assert(d1h>=0);
4622 assert(d2h>=0);
4623 assert(d1l>=0);
4624 assert(d2l>=0);
4625 save_regs(0x100f);
4626 if(d1l!=0) emit_mov(d1l,0);
4627 if(d1h==0) emit_readword((int)&dynarec_local,1);
4628 else if(d1h>1) emit_mov(d1h,1);
4629 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4630 else if(d2l>2) emit_mov(d2l,2);
4631 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4632 else if(d2h>3) emit_mov(d2h,3);
4633 emit_call((int)&div64);
4634 restore_regs(0x100f);
4635 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4636 signed char hil=get_reg(i_regs->regmap,HIREG);
4637 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4638 signed char lol=get_reg(i_regs->regmap,LOREG);
4639 if(hih>=0) emit_loadreg(HIREG|64,hih);
4640 if(hil>=0) emit_loadreg(HIREG,hil);
4641 if(loh>=0) emit_loadreg(LOREG|64,loh);
4642 if(lol>=0) emit_loadreg(LOREG,lol);
4643 }
4644 if(opcode2[i]==0x1F) // DDIVU
4645 {
4646 //u_int hr,reglist=0;
4647 //for(hr=0;hr<HOST_REGS;hr++) {
4648 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4649 //}
4650 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4651 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4652 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4653 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4654 assert(d1h>=0);
4655 assert(d2h>=0);
4656 assert(d1l>=0);
4657 assert(d2l>=0);
4658 save_regs(0x100f);
4659 if(d1l!=0) emit_mov(d1l,0);
4660 if(d1h==0) emit_readword((int)&dynarec_local,1);
4661 else if(d1h>1) emit_mov(d1h,1);
4662 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4663 else if(d2l>2) emit_mov(d2l,2);
4664 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4665 else if(d2h>3) emit_mov(d2h,3);
4666 emit_call((int)&divu64);
4667 restore_regs(0x100f);
4668 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4669 signed char hil=get_reg(i_regs->regmap,HIREG);
4670 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4671 signed char lol=get_reg(i_regs->regmap,LOREG);
4672 if(hih>=0) emit_loadreg(HIREG|64,hih);
4673 if(hil>=0) emit_loadreg(HIREG,hil);
4674 if(loh>=0) emit_loadreg(LOREG|64,loh);
4675 if(lol>=0) emit_loadreg(LOREG,lol);
4676 }
4677 }
4678 }
4679 else
4680 {
4681 // Multiply by zero is zero.
4682 // MIPS does not have a divide by zero exception.
4683 // The result is undefined, we return zero.
4684 signed char hr=get_reg(i_regs->regmap,HIREG);
4685 signed char lr=get_reg(i_regs->regmap,LOREG);
4686 if(hr>=0) emit_zeroreg(hr);
4687 if(lr>=0) emit_zeroreg(lr);
4688 }
4689}
4690#define multdiv_assemble multdiv_assemble_arm
4691
4692void do_preload_rhash(int r) {
4693 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4694 // register. On ARM the hash can be done with a single instruction (below)
4695}
4696
4697void do_preload_rhtbl(int ht) {
4698 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4699}
4700
4701void do_rhash(int rs,int rh) {
4702 emit_andimm(rs,0xf8,rh);
4703}
4704
4705void do_miniht_load(int ht,int rh) {
4706 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4707 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4708}
4709
4710void do_miniht_jump(int rs,int rh,int ht) {
4711 emit_cmp(rh,rs);
4712 emit_ldreq_indexed(ht,4,15);
4713 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4714 emit_mov(rs,7);
4715 emit_jmp(jump_vaddr_reg[7]);
4716 #else
4717 emit_jmp(jump_vaddr_reg[rs]);
4718 #endif
4719}
4720
4721void do_miniht_insert(u_int return_address,int rt,int temp) {
4722 #ifdef ARMv5_ONLY
4723 emit_movimm(return_address,rt); // PC into link register
4724 add_to_linker((int)out,return_address,1);
4725 emit_pcreladdr(temp);
4726 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4727 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4728 #else
4729 emit_movw(return_address&0x0000FFFF,rt);
4730 add_to_linker((int)out,return_address,1);
4731 emit_pcreladdr(temp);
4732 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4733 emit_movt(return_address&0xFFFF0000,rt);
4734 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4735 #endif
4736}
4737
4738// Sign-extend to 64 bits and write out upper half of a register
4739// This is useful where we have a 32-bit value in a register, and want to
4740// keep it in a 32-bit register, but can't guarantee that it won't be read
4741// as a 64-bit value later.
4742void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4743{
24385cae 4744#ifndef FORCE32
57871462 4745 if(is32_pre==is32) return;
4746 int hr,reg;
4747 for(hr=0;hr<HOST_REGS;hr++) {
4748 if(hr!=EXCLUDE_REG) {
4749 //if(pre[hr]==entry[hr]) {
4750 if((reg=pre[hr])>=0) {
4751 if((dirty>>hr)&1) {
4752 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4753 emit_sarimm(hr,31,HOST_TEMPREG);
4754 emit_storereg(reg|64,HOST_TEMPREG);
4755 }
4756 }
4757 }
4758 //}
4759 }
4760 }
24385cae 4761#endif
57871462 4762}
4763
4764void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4765{
4766 //if(dirty_pre==dirty) return;
4767 int hr,reg,new_hr;
4768 for(hr=0;hr<HOST_REGS;hr++) {
4769 if(hr!=EXCLUDE_REG) {
4770 reg=pre[hr];
4771 if(((~u)>>(reg&63))&1) {
4772 if(reg==entry[hr]||(reg>0&&entry[hr]<0)) {
4773 if(((dirty_pre&~dirty)>>hr)&1) {
4774 if(reg>0&&reg<34) {
4775 emit_storereg(reg,hr);
4776 if( ((is32_pre&~uu)>>reg)&1 ) {
4777 emit_sarimm(hr,31,HOST_TEMPREG);
4778 emit_storereg(reg|64,HOST_TEMPREG);
4779 }
4780 }
4781 else if(reg>=64) {
4782 emit_storereg(reg,hr);
4783 }
4784 }
4785 }
4786 else // Check if register moved to a different register
4787 if((new_hr=get_reg(entry,reg))>=0) {
4788 if((dirty_pre>>hr)&(~dirty>>new_hr)&1) {
4789 if(reg>0&&reg<34) {
4790 emit_storereg(reg,hr);
4791 if( ((is32_pre&~uu)>>reg)&1 ) {
4792 emit_sarimm(hr,31,HOST_TEMPREG);
4793 emit_storereg(reg|64,HOST_TEMPREG);
4794 }
4795 }
4796 else if(reg>=64) {
4797 emit_storereg(reg,hr);
4798 }
4799 }
4800 }
4801 }
4802 }
4803 }
4804}
4805
4806
4807/* using strd could possibly help but you'd have to allocate registers in pairs
4808void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4809{
4810 int hr;
4811 int wrote=-1;
4812 for(hr=HOST_REGS-1;hr>=0;hr--) {
4813 if(hr!=EXCLUDE_REG) {
4814 if(pre[hr]!=entry[hr]) {
4815 if(pre[hr]>=0) {
4816 if((dirty>>hr)&1) {
4817 if(get_reg(entry,pre[hr])<0) {
4818 if(pre[hr]<64) {
4819 if(!((u>>pre[hr])&1)) {
4820 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4821 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4822 emit_sarimm(hr,31,hr+1);
4823 emit_strdreg(pre[hr],hr);
4824 }
4825 else
4826 emit_storereg(pre[hr],hr);
4827 }else{
4828 emit_storereg(pre[hr],hr);
4829 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4830 emit_sarimm(hr,31,hr);
4831 emit_storereg(pre[hr]|64,hr);
4832 }
4833 }
4834 }
4835 }else{
4836 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4837 emit_storereg(pre[hr],hr);
4838 }
4839 }
4840 wrote=hr;
4841 }
4842 }
4843 }
4844 }
4845 }
4846 }
4847 for(hr=0;hr<HOST_REGS;hr++) {
4848 if(hr!=EXCLUDE_REG) {
4849 if(pre[hr]!=entry[hr]) {
4850 if(pre[hr]>=0) {
4851 int nr;
4852 if((nr=get_reg(entry,pre[hr]))>=0) {
4853 emit_mov(hr,nr);
4854 }
4855 }
4856 }
4857 }
4858 }
4859}
4860#define wb_invalidate wb_invalidate_arm
4861*/
4862
dd3a91a1 4863// Clearing the cache is rather slow on ARM Linux, so mark the areas
4864// that need to be cleared, and then only clear these areas once.
4865void do_clear_cache()
4866{
4867 int i,j;
4868 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4869 {
4870 u_int bitmap=needs_clear_cache[i];
4871 if(bitmap) {
4872 u_int start,end;
4873 for(j=0;j<32;j++)
4874 {
4875 if(bitmap&(1<<j)) {
4876 start=BASE_ADDR+i*131072+j*4096;
4877 end=start+4095;
4878 j++;
4879 while(j<32) {
4880 if(bitmap&(1<<j)) {
4881 end+=4096;
4882 j++;
4883 }else{
4884 __clear_cache((void *)start,(void *)end);
4885 break;
4886 }
4887 }
4888 }
4889 }
4890 needs_clear_cache[i]=0;
4891 }
4892 }
4893}
4894
57871462 4895// CPU-architecture-specific initialization
4896void arch_init() {
3d624f89 4897#ifndef DISABLE_COP1
57871462 4898 rounding_modes[0]=0x0<<22; // round
4899 rounding_modes[1]=0x3<<22; // trunc
4900 rounding_modes[2]=0x1<<22; // ceil
4901 rounding_modes[3]=0x2<<22; // floor
3d624f89 4902#endif
57871462 4903}
b9b61529 4904
4905// vim:shiftwidth=2:expandtab