drc: merge Ari64's patch: 13_dummy_loads
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
57871462 4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
0bbd1454 69void invalidate_addr_r0();
70void invalidate_addr_r1();
71void invalidate_addr_r2();
72void invalidate_addr_r3();
73void invalidate_addr_r4();
74void invalidate_addr_r5();
75void invalidate_addr_r6();
76void invalidate_addr_r7();
77void invalidate_addr_r8();
78void invalidate_addr_r9();
79void invalidate_addr_r10();
80void invalidate_addr_r12();
81
82const u_int invalidate_addr_reg[16] = {
83 (int)invalidate_addr_r0,
84 (int)invalidate_addr_r1,
85 (int)invalidate_addr_r2,
86 (int)invalidate_addr_r3,
87 (int)invalidate_addr_r4,
88 (int)invalidate_addr_r5,
89 (int)invalidate_addr_r6,
90 (int)invalidate_addr_r7,
91 (int)invalidate_addr_r8,
92 (int)invalidate_addr_r9,
93 (int)invalidate_addr_r10,
94 0,
95 (int)invalidate_addr_r12,
96 0,
97 0,
98 0};
99
57871462 100#include "fpu.h"
101
dd3a91a1 102unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
103
57871462 104/* Linker */
105
106void set_jump_target(int addr,u_int target)
107{
108 u_char *ptr=(u_char *)addr;
109 u_int *ptr2=(u_int *)ptr;
110 if(ptr[3]==0xe2) {
111 assert((target-(u_int)ptr2-8)<1024);
112 assert((addr&3)==0);
113 assert((target&3)==0);
114 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
115 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
116 }
117 else if(ptr[3]==0x72) {
118 // generated by emit_jno_unlikely
119 if((target-(u_int)ptr2-8)<1024) {
120 assert((addr&3)==0);
121 assert((target&3)==0);
122 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
123 }
124 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
125 assert((addr&3)==0);
126 assert((target&3)==0);
127 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
128 }
129 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
130 }
131 else {
132 assert((ptr[3]&0x0e)==0xa);
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137// This optionally copies the instruction from the target of the branch into
138// the space before the branch. Works, but the difference in speed is
139// usually insignificant.
140void set_jump_target_fillslot(int addr,u_int target,int copy)
141{
142 u_char *ptr=(u_char *)addr;
143 u_int *ptr2=(u_int *)ptr;
144 assert(!copy||ptr2[-1]==0xe28dd000);
145 if(ptr[3]==0xe2) {
146 assert(!copy);
147 assert((target-(u_int)ptr2-8)<4096);
148 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
149 }
150 else {
151 assert((ptr[3]&0x0e)==0xa);
152 u_int target_insn=*(u_int *)target;
153 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
154 copy=0;
155 }
156 if((target_insn&0x0c100000)==0x04100000) { // Load
157 copy=0;
158 }
159 if(target_insn&0x08000000) {
160 copy=0;
161 }
162 if(copy) {
163 ptr2[-1]=target_insn;
164 target+=4;
165 }
166 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
167 }
168}
169
170/* Literal pool */
171add_literal(int addr,int val)
172{
173 literals[literalcount][0]=addr;
174 literals[literalcount][1]=val;
175 literalcount++;
176}
177
f76eeef9 178void *kill_pointer(void *stub)
57871462 179{
180 int *ptr=(int *)(stub+4);
181 assert((*ptr&0x0ff00000)==0x05900000);
182 u_int offset=*ptr&0xfff;
183 int **l_ptr=(void *)ptr+offset+8;
184 int *i_ptr=*l_ptr;
185 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 186 return i_ptr;
57871462 187}
188
189int get_pointer(void *stub)
190{
191 //printf("get_pointer(%x)\n",(int)stub);
192 int *ptr=(int *)(stub+4);
193 assert((*ptr&0x0ff00000)==0x05900000);
194 u_int offset=*ptr&0xfff;
195 int **l_ptr=(void *)ptr+offset+8;
196 int *i_ptr=*l_ptr;
197 assert((*i_ptr&0x0f000000)==0x0a000000);
198 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
199}
200
201// Find the "clean" entry point from a "dirty" entry point
202// by skipping past the call to verify_code
203u_int get_clean_addr(int addr)
204{
205 int *ptr=(int *)addr;
206 #ifdef ARMv5_ONLY
207 ptr+=4;
208 #else
209 ptr+=6;
210 #endif
211 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
212 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
213 ptr++;
214 if((*ptr&0xFF000000)==0xea000000) {
215 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
216 }
217 return (u_int)ptr;
218}
219
220int verify_dirty(int addr)
221{
222 u_int *ptr=(u_int *)addr;
223 #ifdef ARMv5_ONLY
224 // get from literal pool
225 assert((*ptr&0xFFF00000)==0xe5900000);
226 u_int offset=*ptr&0xfff;
227 u_int *l_ptr=(void *)ptr+offset+8;
228 u_int source=l_ptr[0];
229 u_int copy=l_ptr[1];
230 u_int len=l_ptr[2];
231 ptr+=4;
232 #else
233 // ARMv7 movw/movt
234 assert((*ptr&0xFFF00000)==0xe3000000);
235 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
236 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
237 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
238 ptr+=6;
239 #endif
240 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
241 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 242 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 243 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
244 unsigned int page=source>>12;
245 unsigned int map_value=memory_map[page];
246 if(map_value>=0x80000000) return 0;
247 while(page<((source+len-1)>>12)) {
248 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
249 }
250 source = source+(map_value<<2);
251 }
252 //printf("verify_dirty: %x %x %x\n",source,copy,len);
253 return !memcmp((void *)source,(void *)copy,len);
254}
255
256// This doesn't necessarily find all clean entry points, just
257// guarantees that it's not dirty
258int isclean(int addr)
259{
260 #ifdef ARMv5_ONLY
261 int *ptr=((u_int *)addr)+4;
262 #else
263 int *ptr=((u_int *)addr)+6;
264 #endif
265 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
266 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
267 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
268 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
269 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
270 return 1;
271}
272
273void get_bounds(int addr,u_int *start,u_int *end)
274{
275 u_int *ptr=(u_int *)addr;
276 #ifdef ARMv5_ONLY
277 // get from literal pool
278 assert((*ptr&0xFFF00000)==0xe5900000);
279 u_int offset=*ptr&0xfff;
280 u_int *l_ptr=(void *)ptr+offset+8;
281 u_int source=l_ptr[0];
282 //u_int copy=l_ptr[1];
283 u_int len=l_ptr[2];
284 ptr+=4;
285 #else
286 // ARMv7 movw/movt
287 assert((*ptr&0xFFF00000)==0xe3000000);
288 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
289 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
290 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
291 ptr+=6;
292 #endif
293 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
294 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 295 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 296 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
297 if(memory_map[source>>12]>=0x80000000) source = 0;
298 else source = source+(memory_map[source>>12]<<2);
299 }
300 *start=source;
301 *end=source+len;
302}
303
304/* Register allocation */
305
306// Note: registers are allocated clean (unmodified state)
307// if you intend to modify the register, you must call dirty_reg().
308void alloc_reg(struct regstat *cur,int i,signed char reg)
309{
310 int r,hr;
311 int preferred_reg = (reg&7);
312 if(reg==CCREG) preferred_reg=HOST_CCREG;
313 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
314
315 // Don't allocate unused registers
316 if((cur->u>>reg)&1) return;
317
318 // see if it's already allocated
319 for(hr=0;hr<HOST_REGS;hr++)
320 {
321 if(cur->regmap[hr]==reg) return;
322 }
323
324 // Keep the same mapping if the register was already allocated in a loop
325 preferred_reg = loop_reg(i,reg,preferred_reg);
326
327 // Try to allocate the preferred register
328 if(cur->regmap[preferred_reg]==-1) {
329 cur->regmap[preferred_reg]=reg;
330 cur->dirty&=~(1<<preferred_reg);
331 cur->isconst&=~(1<<preferred_reg);
332 return;
333 }
334 r=cur->regmap[preferred_reg];
335 if(r<64&&((cur->u>>r)&1)) {
336 cur->regmap[preferred_reg]=reg;
337 cur->dirty&=~(1<<preferred_reg);
338 cur->isconst&=~(1<<preferred_reg);
339 return;
340 }
341 if(r>=64&&((cur->uu>>(r&63))&1)) {
342 cur->regmap[preferred_reg]=reg;
343 cur->dirty&=~(1<<preferred_reg);
344 cur->isconst&=~(1<<preferred_reg);
345 return;
346 }
347
348 // Clear any unneeded registers
349 // We try to keep the mapping consistent, if possible, because it
350 // makes branches easier (especially loops). So we try to allocate
351 // first (see above) before removing old mappings. If this is not
352 // possible then go ahead and clear out the registers that are no
353 // longer needed.
354 for(hr=0;hr<HOST_REGS;hr++)
355 {
356 r=cur->regmap[hr];
357 if(r>=0) {
358 if(r<64) {
359 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
360 }
361 else
362 {
363 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
364 }
365 }
366 }
367 // Try to allocate any available register, but prefer
368 // registers that have not been used recently.
369 if(i>0) {
370 for(hr=0;hr<HOST_REGS;hr++) {
371 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
372 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
373 cur->regmap[hr]=reg;
374 cur->dirty&=~(1<<hr);
375 cur->isconst&=~(1<<hr);
376 return;
377 }
378 }
379 }
380 }
381 // Try to allocate any available register
382 for(hr=0;hr<HOST_REGS;hr++) {
383 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
384 cur->regmap[hr]=reg;
385 cur->dirty&=~(1<<hr);
386 cur->isconst&=~(1<<hr);
387 return;
388 }
389 }
390
391 // Ok, now we have to evict someone
392 // Pick a register we hopefully won't need soon
393 u_char hsn[MAXREG+1];
394 memset(hsn,10,sizeof(hsn));
395 int j;
396 lsn(hsn,i,&preferred_reg);
397 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
398 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
399 if(i>0) {
400 // Don't evict the cycle count at entry points, otherwise the entry
401 // stub will have to write it.
402 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
403 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
404 for(j=10;j>=3;j--)
405 {
406 // Alloc preferred register if available
407 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
408 for(hr=0;hr<HOST_REGS;hr++) {
409 // Evict both parts of a 64-bit register
410 if((cur->regmap[hr]&63)==r) {
411 cur->regmap[hr]=-1;
412 cur->dirty&=~(1<<hr);
413 cur->isconst&=~(1<<hr);
414 }
415 }
416 cur->regmap[preferred_reg]=reg;
417 return;
418 }
419 for(r=1;r<=MAXREG;r++)
420 {
421 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
422 for(hr=0;hr<HOST_REGS;hr++) {
423 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
424 if(cur->regmap[hr]==r+64) {
425 cur->regmap[hr]=reg;
426 cur->dirty&=~(1<<hr);
427 cur->isconst&=~(1<<hr);
428 return;
429 }
430 }
431 }
432 for(hr=0;hr<HOST_REGS;hr++) {
433 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
434 if(cur->regmap[hr]==r) {
435 cur->regmap[hr]=reg;
436 cur->dirty&=~(1<<hr);
437 cur->isconst&=~(1<<hr);
438 return;
439 }
440 }
441 }
442 }
443 }
444 }
445 }
446 for(j=10;j>=0;j--)
447 {
448 for(r=1;r<=MAXREG;r++)
449 {
450 if(hsn[r]==j) {
451 for(hr=0;hr<HOST_REGS;hr++) {
452 if(cur->regmap[hr]==r+64) {
453 cur->regmap[hr]=reg;
454 cur->dirty&=~(1<<hr);
455 cur->isconst&=~(1<<hr);
456 return;
457 }
458 }
459 for(hr=0;hr<HOST_REGS;hr++) {
460 if(cur->regmap[hr]==r) {
461 cur->regmap[hr]=reg;
462 cur->dirty&=~(1<<hr);
463 cur->isconst&=~(1<<hr);
464 return;
465 }
466 }
467 }
468 }
469 }
470 printf("This shouldn't happen (alloc_reg)");exit(1);
471}
472
473void alloc_reg64(struct regstat *cur,int i,signed char reg)
474{
475 int preferred_reg = 8+(reg&1);
476 int r,hr;
477
478 // allocate the lower 32 bits
479 alloc_reg(cur,i,reg);
480
481 // Don't allocate unused registers
482 if((cur->uu>>reg)&1) return;
483
484 // see if the upper half is already allocated
485 for(hr=0;hr<HOST_REGS;hr++)
486 {
487 if(cur->regmap[hr]==reg+64) return;
488 }
489
490 // Keep the same mapping if the register was already allocated in a loop
491 preferred_reg = loop_reg(i,reg,preferred_reg);
492
493 // Try to allocate the preferred register
494 if(cur->regmap[preferred_reg]==-1) {
495 cur->regmap[preferred_reg]=reg|64;
496 cur->dirty&=~(1<<preferred_reg);
497 cur->isconst&=~(1<<preferred_reg);
498 return;
499 }
500 r=cur->regmap[preferred_reg];
501 if(r<64&&((cur->u>>r)&1)) {
502 cur->regmap[preferred_reg]=reg|64;
503 cur->dirty&=~(1<<preferred_reg);
504 cur->isconst&=~(1<<preferred_reg);
505 return;
506 }
507 if(r>=64&&((cur->uu>>(r&63))&1)) {
508 cur->regmap[preferred_reg]=reg|64;
509 cur->dirty&=~(1<<preferred_reg);
510 cur->isconst&=~(1<<preferred_reg);
511 return;
512 }
513
514 // Clear any unneeded registers
515 // We try to keep the mapping consistent, if possible, because it
516 // makes branches easier (especially loops). So we try to allocate
517 // first (see above) before removing old mappings. If this is not
518 // possible then go ahead and clear out the registers that are no
519 // longer needed.
520 for(hr=HOST_REGS-1;hr>=0;hr--)
521 {
522 r=cur->regmap[hr];
523 if(r>=0) {
524 if(r<64) {
525 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
526 }
527 else
528 {
529 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
530 }
531 }
532 }
533 // Try to allocate any available register, but prefer
534 // registers that have not been used recently.
535 if(i>0) {
536 for(hr=0;hr<HOST_REGS;hr++) {
537 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
538 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
539 cur->regmap[hr]=reg|64;
540 cur->dirty&=~(1<<hr);
541 cur->isconst&=~(1<<hr);
542 return;
543 }
544 }
545 }
546 }
547 // Try to allocate any available register
548 for(hr=0;hr<HOST_REGS;hr++) {
549 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
550 cur->regmap[hr]=reg|64;
551 cur->dirty&=~(1<<hr);
552 cur->isconst&=~(1<<hr);
553 return;
554 }
555 }
556
557 // Ok, now we have to evict someone
558 // Pick a register we hopefully won't need soon
559 u_char hsn[MAXREG+1];
560 memset(hsn,10,sizeof(hsn));
561 int j;
562 lsn(hsn,i,&preferred_reg);
563 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
564 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
565 if(i>0) {
566 // Don't evict the cycle count at entry points, otherwise the entry
567 // stub will have to write it.
568 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
569 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
570 for(j=10;j>=3;j--)
571 {
572 // Alloc preferred register if available
573 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
574 for(hr=0;hr<HOST_REGS;hr++) {
575 // Evict both parts of a 64-bit register
576 if((cur->regmap[hr]&63)==r) {
577 cur->regmap[hr]=-1;
578 cur->dirty&=~(1<<hr);
579 cur->isconst&=~(1<<hr);
580 }
581 }
582 cur->regmap[preferred_reg]=reg|64;
583 return;
584 }
585 for(r=1;r<=MAXREG;r++)
586 {
587 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
588 for(hr=0;hr<HOST_REGS;hr++) {
589 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
590 if(cur->regmap[hr]==r+64) {
591 cur->regmap[hr]=reg|64;
592 cur->dirty&=~(1<<hr);
593 cur->isconst&=~(1<<hr);
594 return;
595 }
596 }
597 }
598 for(hr=0;hr<HOST_REGS;hr++) {
599 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
600 if(cur->regmap[hr]==r) {
601 cur->regmap[hr]=reg|64;
602 cur->dirty&=~(1<<hr);
603 cur->isconst&=~(1<<hr);
604 return;
605 }
606 }
607 }
608 }
609 }
610 }
611 }
612 for(j=10;j>=0;j--)
613 {
614 for(r=1;r<=MAXREG;r++)
615 {
616 if(hsn[r]==j) {
617 for(hr=0;hr<HOST_REGS;hr++) {
618 if(cur->regmap[hr]==r+64) {
619 cur->regmap[hr]=reg|64;
620 cur->dirty&=~(1<<hr);
621 cur->isconst&=~(1<<hr);
622 return;
623 }
624 }
625 for(hr=0;hr<HOST_REGS;hr++) {
626 if(cur->regmap[hr]==r) {
627 cur->regmap[hr]=reg|64;
628 cur->dirty&=~(1<<hr);
629 cur->isconst&=~(1<<hr);
630 return;
631 }
632 }
633 }
634 }
635 }
636 printf("This shouldn't happen");exit(1);
637}
638
639// Allocate a temporary register. This is done without regard to
640// dirty status or whether the register we request is on the unneeded list
641// Note: This will only allocate one register, even if called multiple times
642void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
643{
644 int r,hr;
645 int preferred_reg = -1;
646
647 // see if it's already allocated
648 for(hr=0;hr<HOST_REGS;hr++)
649 {
650 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
651 }
652
653 // Try to allocate any available register
654 for(hr=HOST_REGS-1;hr>=0;hr--) {
655 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
656 cur->regmap[hr]=reg;
657 cur->dirty&=~(1<<hr);
658 cur->isconst&=~(1<<hr);
659 return;
660 }
661 }
662
663 // Find an unneeded register
664 for(hr=HOST_REGS-1;hr>=0;hr--)
665 {
666 r=cur->regmap[hr];
667 if(r>=0) {
668 if(r<64) {
669 if((cur->u>>r)&1) {
670 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
671 cur->regmap[hr]=reg;
672 cur->dirty&=~(1<<hr);
673 cur->isconst&=~(1<<hr);
674 return;
675 }
676 }
677 }
678 else
679 {
680 if((cur->uu>>(r&63))&1) {
681 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
682 cur->regmap[hr]=reg;
683 cur->dirty&=~(1<<hr);
684 cur->isconst&=~(1<<hr);
685 return;
686 }
687 }
688 }
689 }
690 }
691
692 // Ok, now we have to evict someone
693 // Pick a register we hopefully won't need soon
694 // TODO: we might want to follow unconditional jumps here
695 // TODO: get rid of dupe code and make this into a function
696 u_char hsn[MAXREG+1];
697 memset(hsn,10,sizeof(hsn));
698 int j;
699 lsn(hsn,i,&preferred_reg);
700 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
701 if(i>0) {
702 // Don't evict the cycle count at entry points, otherwise the entry
703 // stub will have to write it.
704 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
705 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
706 for(j=10;j>=3;j--)
707 {
708 for(r=1;r<=MAXREG;r++)
709 {
710 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
711 for(hr=0;hr<HOST_REGS;hr++) {
712 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
713 if(cur->regmap[hr]==r+64) {
714 cur->regmap[hr]=reg;
715 cur->dirty&=~(1<<hr);
716 cur->isconst&=~(1<<hr);
717 return;
718 }
719 }
720 }
721 for(hr=0;hr<HOST_REGS;hr++) {
722 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
723 if(cur->regmap[hr]==r) {
724 cur->regmap[hr]=reg;
725 cur->dirty&=~(1<<hr);
726 cur->isconst&=~(1<<hr);
727 return;
728 }
729 }
730 }
731 }
732 }
733 }
734 }
735 for(j=10;j>=0;j--)
736 {
737 for(r=1;r<=MAXREG;r++)
738 {
739 if(hsn[r]==j) {
740 for(hr=0;hr<HOST_REGS;hr++) {
741 if(cur->regmap[hr]==r+64) {
742 cur->regmap[hr]=reg;
743 cur->dirty&=~(1<<hr);
744 cur->isconst&=~(1<<hr);
745 return;
746 }
747 }
748 for(hr=0;hr<HOST_REGS;hr++) {
749 if(cur->regmap[hr]==r) {
750 cur->regmap[hr]=reg;
751 cur->dirty&=~(1<<hr);
752 cur->isconst&=~(1<<hr);
753 return;
754 }
755 }
756 }
757 }
758 }
759 printf("This shouldn't happen");exit(1);
760}
761// Allocate a specific ARM register.
762void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
763{
764 int n;
765
766 // see if it's already allocated (and dealloc it)
767 for(n=0;n<HOST_REGS;n++)
768 {
769 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
770 }
771
772 cur->regmap[hr]=reg;
773 cur->dirty&=~(1<<hr);
774 cur->isconst&=~(1<<hr);
775}
776
777// Alloc cycle count into dedicated register
778alloc_cc(struct regstat *cur,int i)
779{
780 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
781}
782
783/* Special alloc */
784
785
786/* Assembler */
787
788char regname[16][4] = {
789 "r0",
790 "r1",
791 "r2",
792 "r3",
793 "r4",
794 "r5",
795 "r6",
796 "r7",
797 "r8",
798 "r9",
799 "r10",
800 "fp",
801 "r12",
802 "sp",
803 "lr",
804 "pc"};
805
806void output_byte(u_char byte)
807{
808 *(out++)=byte;
809}
810void output_modrm(u_char mod,u_char rm,u_char ext)
811{
812 assert(mod<4);
813 assert(rm<8);
814 assert(ext<8);
815 u_char byte=(mod<<6)|(ext<<3)|rm;
816 *(out++)=byte;
817}
818void output_sib(u_char scale,u_char index,u_char base)
819{
820 assert(scale<4);
821 assert(index<8);
822 assert(base<8);
823 u_char byte=(scale<<6)|(index<<3)|base;
824 *(out++)=byte;
825}
826void output_w32(u_int word)
827{
828 *((u_int *)out)=word;
829 out+=4;
830}
831u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
832{
833 assert(rd<16);
834 assert(rn<16);
835 assert(rm<16);
836 return((rn<<16)|(rd<<12)|rm);
837}
838u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
839{
840 assert(rd<16);
841 assert(rn<16);
842 assert(imm<256);
843 assert((shift&1)==0);
844 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
845}
846u_int genimm(u_int imm,u_int *encoded)
847{
c2e3bd42 848 *encoded=0;
849 if(imm==0) return 1;
57871462 850 int i=32;
851 while(i>0)
852 {
853 if(imm<256) {
854 *encoded=((i&30)<<7)|imm;
855 return 1;
856 }
857 imm=(imm>>2)|(imm<<30);i-=2;
858 }
859 return 0;
860}
cfbd3c6e 861void genimm_checked(u_int imm,u_int *encoded)
862{
863 u_int ret=genimm(imm,encoded);
864 assert(ret);
865}
57871462 866u_int genjmp(u_int addr)
867{
868 int offset=addr-(int)out-8;
e80343e2 869 if(offset<-33554432||offset>=33554432) {
870 if (addr>2) {
871 printf("genjmp: out of range: %08x\n", offset);
872 exit(1);
873 }
874 return 0;
875 }
57871462 876 return ((u_int)offset>>2)&0xffffff;
877}
878
879void emit_mov(int rs,int rt)
880{
881 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
882 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
883}
884
885void emit_movs(int rs,int rt)
886{
887 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
888 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
889}
890
891void emit_add(int rs1,int rs2,int rt)
892{
893 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
894 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
895}
896
897void emit_adds(int rs1,int rs2,int rt)
898{
899 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
900 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
901}
902
903void emit_adcs(int rs1,int rs2,int rt)
904{
905 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
906 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
907}
908
909void emit_sbc(int rs1,int rs2,int rt)
910{
911 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
912 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
913}
914
915void emit_sbcs(int rs1,int rs2,int rt)
916{
917 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
918 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
919}
920
921void emit_neg(int rs, int rt)
922{
923 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
924 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
925}
926
927void emit_negs(int rs, int rt)
928{
929 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
930 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
931}
932
933void emit_sub(int rs1,int rs2,int rt)
934{
935 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
936 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
937}
938
939void emit_subs(int rs1,int rs2,int rt)
940{
941 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
942 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
943}
944
945void emit_zeroreg(int rt)
946{
947 assem_debug("mov %s,#0\n",regname[rt]);
948 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
949}
950
790ee18e 951void emit_loadlp(u_int imm,u_int rt)
952{
953 add_literal((int)out,imm);
954 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
955 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
956}
957void emit_movw(u_int imm,u_int rt)
958{
959 assert(imm<65536);
960 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
961 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
962}
963void emit_movt(u_int imm,u_int rt)
964{
965 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
966 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
967}
968void emit_movimm(u_int imm,u_int rt)
969{
970 u_int armval;
971 if(genimm(imm,&armval)) {
972 assem_debug("mov %s,#%d\n",regname[rt],imm);
973 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
974 }else if(genimm(~imm,&armval)) {
975 assem_debug("mvn %s,#%d\n",regname[rt],imm);
976 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
977 }else if(imm<65536) {
978 #ifdef ARMv5_ONLY
979 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
980 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
981 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
982 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
983 #else
984 emit_movw(imm,rt);
985 #endif
986 }else{
987 #ifdef ARMv5_ONLY
988 emit_loadlp(imm,rt);
989 #else
990 emit_movw(imm&0x0000FFFF,rt);
991 emit_movt(imm&0xFFFF0000,rt);
992 #endif
993 }
994}
995void emit_pcreladdr(u_int rt)
996{
997 assem_debug("add %s,pc,#?\n",regname[rt]);
998 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
999}
1000
57871462 1001void emit_loadreg(int r, int hr)
1002{
3d624f89 1003#ifdef FORCE32
1004 if(r&64) {
1005 printf("64bit load in 32bit mode!\n");
7f2607ea 1006 assert(0);
1007 return;
3d624f89 1008 }
1009#endif
57871462 1010 if((r&63)==0)
1011 emit_zeroreg(hr);
1012 else {
3d624f89 1013 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1014 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1015 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1016 if(r==CCREG) addr=(int)&cycle_count;
1017 if(r==CSREG) addr=(int)&Status;
1018 if(r==FSREG) addr=(int)&FCR31;
1019 if(r==INVCP) addr=(int)&invc_ptr;
1020 u_int offset = addr-(u_int)&dynarec_local;
1021 assert(offset<4096);
1022 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1023 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1024 }
1025}
1026void emit_storereg(int r, int hr)
1027{
3d624f89 1028#ifdef FORCE32
1029 if(r&64) {
1030 printf("64bit store in 32bit mode!\n");
7f2607ea 1031 assert(0);
1032 return;
3d624f89 1033 }
1034#endif
1035 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1036 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1037 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1038 if(r==CCREG) addr=(int)&cycle_count;
1039 if(r==FSREG) addr=(int)&FCR31;
1040 u_int offset = addr-(u_int)&dynarec_local;
1041 assert(offset<4096);
1042 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1043 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1044}
1045
1046void emit_test(int rs, int rt)
1047{
1048 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1049 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1050}
1051
1052void emit_testimm(int rs,int imm)
1053{
1054 u_int armval;
1055 assem_debug("tst %s,$%d\n",regname[rs],imm);
cfbd3c6e 1056 genimm_checked(imm,&armval);
57871462 1057 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1058}
1059
b9b61529 1060void emit_testeqimm(int rs,int imm)
1061{
1062 u_int armval;
1063 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1064 genimm_checked(imm,&armval);
b9b61529 1065 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1066}
1067
57871462 1068void emit_not(int rs,int rt)
1069{
1070 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1071 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1072}
1073
b9b61529 1074void emit_mvnmi(int rs,int rt)
1075{
1076 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1077 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1078}
1079
57871462 1080void emit_and(u_int rs1,u_int rs2,u_int rt)
1081{
1082 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1083 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1084}
1085
1086void emit_or(u_int rs1,u_int rs2,u_int rt)
1087{
1088 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1089 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1090}
1091void emit_or_and_set_flags(int rs1,int rs2,int rt)
1092{
1093 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1094 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1095}
1096
f70d384d 1097void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1098{
1099 assert(rs<16);
1100 assert(rt<16);
1101 assert(imm<32);
1102 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1103 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1104}
1105
576bbd8f 1106void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1107{
1108 assert(rs<16);
1109 assert(rt<16);
1110 assert(imm<32);
1111 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1112 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1113}
1114
57871462 1115void emit_xor(u_int rs1,u_int rs2,u_int rt)
1116{
1117 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1118 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1119}
1120
57871462 1121void emit_addimm(u_int rs,int imm,u_int rt)
1122{
1123 assert(rs<16);
1124 assert(rt<16);
1125 if(imm!=0) {
1126 assert(imm>-65536&&imm<65536);
1127 u_int armval;
1128 if(genimm(imm,&armval)) {
1129 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1130 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1131 }else if(genimm(-imm,&armval)) {
1132 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1133 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1134 }else if(imm<0) {
1135 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1136 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1137 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1138 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1139 }else{
1140 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1141 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1142 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1143 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1144 }
1145 }
1146 else if(rs!=rt) emit_mov(rs,rt);
1147}
1148
1149void emit_addimm_and_set_flags(int imm,int rt)
1150{
1151 assert(imm>-65536&&imm<65536);
1152 u_int armval;
1153 if(genimm(imm,&armval)) {
1154 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1155 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1156 }else if(genimm(-imm,&armval)) {
1157 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1158 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1159 }else if(imm<0) {
1160 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1161 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1162 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1163 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1164 }else{
1165 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1166 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1167 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1168 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1169 }
1170}
1171void emit_addimm_no_flags(u_int imm,u_int rt)
1172{
1173 emit_addimm(rt,imm,rt);
1174}
1175
1176void emit_addnop(u_int r)
1177{
1178 assert(r<16);
1179 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1180 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1181}
1182
1183void emit_adcimm(u_int rs,int imm,u_int rt)
1184{
1185 u_int armval;
cfbd3c6e 1186 genimm_checked(imm,&armval);
57871462 1187 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1188 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1189}
1190/*void emit_sbcimm(int imm,u_int rt)
1191{
1192 u_int armval;
cfbd3c6e 1193 genimm_checked(imm,&armval);
57871462 1194 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1195 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1196}*/
1197void emit_sbbimm(int imm,u_int rt)
1198{
1199 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1200 assert(rt<8);
1201 if(imm<128&&imm>=-128) {
1202 output_byte(0x83);
1203 output_modrm(3,rt,3);
1204 output_byte(imm);
1205 }
1206 else
1207 {
1208 output_byte(0x81);
1209 output_modrm(3,rt,3);
1210 output_w32(imm);
1211 }
1212}
1213void emit_rscimm(int rs,int imm,u_int rt)
1214{
1215 assert(0);
1216 u_int armval;
cfbd3c6e 1217 genimm_checked(imm,&armval);
57871462 1218 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1219 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1220}
1221
1222void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1223{
1224 // TODO: if(genimm(imm,&armval)) ...
1225 // else
1226 emit_movimm(imm,HOST_TEMPREG);
1227 emit_adds(HOST_TEMPREG,rsl,rtl);
1228 emit_adcimm(rsh,0,rth);
1229}
1230
1231void emit_sbb(int rs1,int rs2)
1232{
1233 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1234 output_byte(0x19);
1235 output_modrm(3,rs1,rs2);
1236}
1237
1238void emit_andimm(int rs,int imm,int rt)
1239{
1240 u_int armval;
790ee18e 1241 if(imm==0) {
1242 emit_zeroreg(rt);
1243 }else if(genimm(imm,&armval)) {
57871462 1244 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1245 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1246 }else if(genimm(~imm,&armval)) {
1247 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1248 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1249 }else if(imm==65535) {
1250 #ifdef ARMv5_ONLY
1251 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1252 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1253 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1254 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1255 #else
1256 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1257 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1258 #endif
1259 }else{
1260 assert(imm>0&&imm<65535);
1261 #ifdef ARMv5_ONLY
1262 assem_debug("mov r14,#%d\n",imm&0xFF00);
1263 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1264 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1265 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1266 #else
1267 emit_movw(imm,HOST_TEMPREG);
1268 #endif
1269 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1270 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1271 }
1272}
1273
1274void emit_orimm(int rs,int imm,int rt)
1275{
1276 u_int armval;
790ee18e 1277 if(imm==0) {
1278 if(rs!=rt) emit_mov(rs,rt);
1279 }else if(genimm(imm,&armval)) {
57871462 1280 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1281 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1282 }else{
1283 assert(imm>0&&imm<65536);
1284 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1285 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1286 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1287 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1288 }
1289}
1290
1291void emit_xorimm(int rs,int imm,int rt)
1292{
57871462 1293 u_int armval;
790ee18e 1294 if(imm==0) {
1295 if(rs!=rt) emit_mov(rs,rt);
1296 }else if(genimm(imm,&armval)) {
57871462 1297 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1298 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1299 }else{
514ed0d9 1300 assert(imm>0&&imm<65536);
57871462 1301 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1302 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1303 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1304 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1305 }
1306}
1307
1308void emit_shlimm(int rs,u_int imm,int rt)
1309{
1310 assert(imm>0);
1311 assert(imm<32);
1312 //if(imm==1) ...
1313 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1314 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1315}
1316
1317void emit_shrimm(int rs,u_int imm,int rt)
1318{
1319 assert(imm>0);
1320 assert(imm<32);
1321 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1322 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1323}
1324
1325void emit_sarimm(int rs,u_int imm,int rt)
1326{
1327 assert(imm>0);
1328 assert(imm<32);
1329 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1330 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1331}
1332
1333void emit_rorimm(int rs,u_int imm,int rt)
1334{
1335 assert(imm>0);
1336 assert(imm<32);
1337 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1338 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1339}
1340
1341void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1342{
1343 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1344 assert(imm>0);
1345 assert(imm<32);
1346 //if(imm==1) ...
1347 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1348 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1349 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1350 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1351}
1352
1353void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1354{
1355 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1356 assert(imm>0);
1357 assert(imm<32);
1358 //if(imm==1) ...
1359 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1360 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1361 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1362 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1363}
1364
b9b61529 1365void emit_signextend16(int rs,int rt)
1366{
1367 #ifdef ARMv5_ONLY
1368 emit_shlimm(rs,16,rt);
1369 emit_sarimm(rt,16,rt);
1370 #else
1371 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1372 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1373 #endif
1374}
1375
57871462 1376void emit_shl(u_int rs,u_int shift,u_int rt)
1377{
1378 assert(rs<16);
1379 assert(rt<16);
1380 assert(shift<16);
1381 //if(imm==1) ...
1382 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1383 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1384}
1385void emit_shr(u_int rs,u_int shift,u_int rt)
1386{
1387 assert(rs<16);
1388 assert(rt<16);
1389 assert(shift<16);
1390 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1391 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1392}
1393void emit_sar(u_int rs,u_int shift,u_int rt)
1394{
1395 assert(rs<16);
1396 assert(rt<16);
1397 assert(shift<16);
1398 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1399 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1400}
1401void emit_shlcl(int r)
1402{
1403 assem_debug("shl %%%s,%%cl\n",regname[r]);
1404 assert(0);
1405}
1406void emit_shrcl(int r)
1407{
1408 assem_debug("shr %%%s,%%cl\n",regname[r]);
1409 assert(0);
1410}
1411void emit_sarcl(int r)
1412{
1413 assem_debug("sar %%%s,%%cl\n",regname[r]);
1414 assert(0);
1415}
1416
1417void emit_shldcl(int r1,int r2)
1418{
1419 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1420 assert(0);
1421}
1422void emit_shrdcl(int r1,int r2)
1423{
1424 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1425 assert(0);
1426}
1427void emit_orrshl(u_int rs,u_int shift,u_int rt)
1428{
1429 assert(rs<16);
1430 assert(rt<16);
1431 assert(shift<16);
1432 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1433 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1434}
1435void emit_orrshr(u_int rs,u_int shift,u_int rt)
1436{
1437 assert(rs<16);
1438 assert(rt<16);
1439 assert(shift<16);
1440 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1441 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1442}
1443
1444void emit_cmpimm(int rs,int imm)
1445{
1446 u_int armval;
1447 if(genimm(imm,&armval)) {
1448 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1449 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1450 }else if(genimm(-imm,&armval)) {
1451 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1452 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1453 }else if(imm>0) {
1454 assert(imm<65536);
1455 #ifdef ARMv5_ONLY
1456 emit_movimm(imm,HOST_TEMPREG);
1457 #else
1458 emit_movw(imm,HOST_TEMPREG);
1459 #endif
1460 assem_debug("cmp %s,r14\n",regname[rs]);
1461 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1462 }else{
1463 assert(imm>-65536);
1464 #ifdef ARMv5_ONLY
1465 emit_movimm(-imm,HOST_TEMPREG);
1466 #else
1467 emit_movw(-imm,HOST_TEMPREG);
1468 #endif
1469 assem_debug("cmn %s,r14\n",regname[rs]);
1470 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1471 }
1472}
1473
1474void emit_cmovne(u_int *addr,int rt)
1475{
1476 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1477 assert(0);
1478}
1479void emit_cmovl(u_int *addr,int rt)
1480{
1481 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1482 assert(0);
1483}
1484void emit_cmovs(u_int *addr,int rt)
1485{
1486 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1487 assert(0);
1488}
1489void emit_cmovne_imm(int imm,int rt)
1490{
1491 assem_debug("movne %s,#%d\n",regname[rt],imm);
1492 u_int armval;
cfbd3c6e 1493 genimm_checked(imm,&armval);
57871462 1494 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1495}
1496void emit_cmovl_imm(int imm,int rt)
1497{
1498 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1499 u_int armval;
cfbd3c6e 1500 genimm_checked(imm,&armval);
57871462 1501 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1502}
1503void emit_cmovb_imm(int imm,int rt)
1504{
1505 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1506 u_int armval;
cfbd3c6e 1507 genimm_checked(imm,&armval);
57871462 1508 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1509}
1510void emit_cmovs_imm(int imm,int rt)
1511{
1512 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1513 u_int armval;
cfbd3c6e 1514 genimm_checked(imm,&armval);
57871462 1515 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1516}
1517void emit_cmove_reg(int rs,int rt)
1518{
1519 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1520 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1521}
1522void emit_cmovne_reg(int rs,int rt)
1523{
1524 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1525 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1526}
1527void emit_cmovl_reg(int rs,int rt)
1528{
1529 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1530 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1531}
1532void emit_cmovs_reg(int rs,int rt)
1533{
1534 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1535 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1536}
1537
1538void emit_slti32(int rs,int imm,int rt)
1539{
1540 if(rs!=rt) emit_zeroreg(rt);
1541 emit_cmpimm(rs,imm);
1542 if(rs==rt) emit_movimm(0,rt);
1543 emit_cmovl_imm(1,rt);
1544}
1545void emit_sltiu32(int rs,int imm,int rt)
1546{
1547 if(rs!=rt) emit_zeroreg(rt);
1548 emit_cmpimm(rs,imm);
1549 if(rs==rt) emit_movimm(0,rt);
1550 emit_cmovb_imm(1,rt);
1551}
1552void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1553{
1554 assert(rsh!=rt);
1555 emit_slti32(rsl,imm,rt);
1556 if(imm>=0)
1557 {
1558 emit_test(rsh,rsh);
1559 emit_cmovne_imm(0,rt);
1560 emit_cmovs_imm(1,rt);
1561 }
1562 else
1563 {
1564 emit_cmpimm(rsh,-1);
1565 emit_cmovne_imm(0,rt);
1566 emit_cmovl_imm(1,rt);
1567 }
1568}
1569void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1570{
1571 assert(rsh!=rt);
1572 emit_sltiu32(rsl,imm,rt);
1573 if(imm>=0)
1574 {
1575 emit_test(rsh,rsh);
1576 emit_cmovne_imm(0,rt);
1577 }
1578 else
1579 {
1580 emit_cmpimm(rsh,-1);
1581 emit_cmovne_imm(1,rt);
1582 }
1583}
1584
1585void emit_cmp(int rs,int rt)
1586{
1587 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1588 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1589}
1590void emit_set_gz32(int rs, int rt)
1591{
1592 //assem_debug("set_gz32\n");
1593 emit_cmpimm(rs,1);
1594 emit_movimm(1,rt);
1595 emit_cmovl_imm(0,rt);
1596}
1597void emit_set_nz32(int rs, int rt)
1598{
1599 //assem_debug("set_nz32\n");
1600 if(rs!=rt) emit_movs(rs,rt);
1601 else emit_test(rs,rs);
1602 emit_cmovne_imm(1,rt);
1603}
1604void emit_set_gz64_32(int rsh, int rsl, int rt)
1605{
1606 //assem_debug("set_gz64\n");
1607 emit_set_gz32(rsl,rt);
1608 emit_test(rsh,rsh);
1609 emit_cmovne_imm(1,rt);
1610 emit_cmovs_imm(0,rt);
1611}
1612void emit_set_nz64_32(int rsh, int rsl, int rt)
1613{
1614 //assem_debug("set_nz64\n");
1615 emit_or_and_set_flags(rsh,rsl,rt);
1616 emit_cmovne_imm(1,rt);
1617}
1618void emit_set_if_less32(int rs1, int rs2, int rt)
1619{
1620 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1621 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1622 emit_cmp(rs1,rs2);
1623 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1624 emit_cmovl_imm(1,rt);
1625}
1626void emit_set_if_carry32(int rs1, int rs2, int rt)
1627{
1628 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1629 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1630 emit_cmp(rs1,rs2);
1631 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1632 emit_cmovb_imm(1,rt);
1633}
1634void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1635{
1636 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1637 assert(u1!=rt);
1638 assert(u2!=rt);
1639 emit_cmp(l1,l2);
1640 emit_movimm(0,rt);
1641 emit_sbcs(u1,u2,HOST_TEMPREG);
1642 emit_cmovl_imm(1,rt);
1643}
1644void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1645{
1646 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1647 assert(u1!=rt);
1648 assert(u2!=rt);
1649 emit_cmp(l1,l2);
1650 emit_movimm(0,rt);
1651 emit_sbcs(u1,u2,HOST_TEMPREG);
1652 emit_cmovb_imm(1,rt);
1653}
1654
1655void emit_call(int a)
1656{
1657 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1658 u_int offset=genjmp(a);
1659 output_w32(0xeb000000|offset);
1660}
1661void emit_jmp(int a)
1662{
1663 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1664 u_int offset=genjmp(a);
1665 output_w32(0xea000000|offset);
1666}
1667void emit_jne(int a)
1668{
1669 assem_debug("bne %x\n",a);
1670 u_int offset=genjmp(a);
1671 output_w32(0x1a000000|offset);
1672}
1673void emit_jeq(int a)
1674{
1675 assem_debug("beq %x\n",a);
1676 u_int offset=genjmp(a);
1677 output_w32(0x0a000000|offset);
1678}
1679void emit_js(int a)
1680{
1681 assem_debug("bmi %x\n",a);
1682 u_int offset=genjmp(a);
1683 output_w32(0x4a000000|offset);
1684}
1685void emit_jns(int a)
1686{
1687 assem_debug("bpl %x\n",a);
1688 u_int offset=genjmp(a);
1689 output_w32(0x5a000000|offset);
1690}
1691void emit_jl(int a)
1692{
1693 assem_debug("blt %x\n",a);
1694 u_int offset=genjmp(a);
1695 output_w32(0xba000000|offset);
1696}
1697void emit_jge(int a)
1698{
1699 assem_debug("bge %x\n",a);
1700 u_int offset=genjmp(a);
1701 output_w32(0xaa000000|offset);
1702}
1703void emit_jno(int a)
1704{
1705 assem_debug("bvc %x\n",a);
1706 u_int offset=genjmp(a);
1707 output_w32(0x7a000000|offset);
1708}
1709void emit_jc(int a)
1710{
1711 assem_debug("bcs %x\n",a);
1712 u_int offset=genjmp(a);
1713 output_w32(0x2a000000|offset);
1714}
1715void emit_jcc(int a)
1716{
1717 assem_debug("bcc %x\n",a);
1718 u_int offset=genjmp(a);
1719 output_w32(0x3a000000|offset);
1720}
1721
1722void emit_pushimm(int imm)
1723{
1724 assem_debug("push $%x\n",imm);
1725 assert(0);
1726}
1727void emit_pusha()
1728{
1729 assem_debug("pusha\n");
1730 assert(0);
1731}
1732void emit_popa()
1733{
1734 assem_debug("popa\n");
1735 assert(0);
1736}
1737void emit_pushreg(u_int r)
1738{
1739 assem_debug("push %%%s\n",regname[r]);
1740 assert(0);
1741}
1742void emit_popreg(u_int r)
1743{
1744 assem_debug("pop %%%s\n",regname[r]);
1745 assert(0);
1746}
1747void emit_callreg(u_int r)
1748{
1749 assem_debug("call *%%%s\n",regname[r]);
1750 assert(0);
1751}
1752void emit_jmpreg(u_int r)
1753{
1754 assem_debug("mov pc,%s\n",regname[r]);
1755 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1756}
1757
1758void emit_readword_indexed(int offset, int rs, int rt)
1759{
1760 assert(offset>-4096&&offset<4096);
1761 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1762 if(offset>=0) {
1763 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1764 }else{
1765 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1766 }
1767}
1768void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1769{
1770 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1771 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1772}
1773void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1774{
1775 if(map<0) emit_readword_indexed(addr, rs, rt);
1776 else {
1777 assert(addr==0);
1778 emit_readword_dualindexedx4(rs, map, rt);
1779 }
1780}
1781void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1782{
1783 if(map<0) {
1784 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1785 emit_readword_indexed(addr+4, rs, rl);
1786 }else{
1787 assert(rh!=rs);
1788 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1789 emit_addimm(map,1,map);
1790 emit_readword_indexed_tlb(addr, rs, map, rl);
1791 }
1792}
1793void emit_movsbl_indexed(int offset, int rs, int rt)
1794{
1795 assert(offset>-256&&offset<256);
1796 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1797 if(offset>=0) {
1798 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1799 }else{
1800 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1801 }
1802}
1803void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1804{
1805 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1806 else {
1807 if(addr==0) {
1808 emit_shlimm(map,2,map);
1809 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1810 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1811 }else{
1812 assert(addr>-256&&addr<256);
1813 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1814 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1815 emit_movsbl_indexed(addr, rt, rt);
1816 }
1817 }
1818}
1819void emit_movswl_indexed(int offset, int rs, int rt)
1820{
1821 assert(offset>-256&&offset<256);
1822 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1823 if(offset>=0) {
1824 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1825 }else{
1826 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1827 }
1828}
1829void emit_movzbl_indexed(int offset, int rs, int rt)
1830{
1831 assert(offset>-4096&&offset<4096);
1832 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1833 if(offset>=0) {
1834 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1835 }else{
1836 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1837 }
1838}
1839void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1840{
1841 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1842 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1843}
1844void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1845{
1846 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1847 else {
1848 if(addr==0) {
1849 emit_movzbl_dualindexedx4(rs, map, rt);
1850 }else{
1851 emit_addimm(rs,addr,rt);
1852 emit_movzbl_dualindexedx4(rt, map, rt);
1853 }
1854 }
1855}
1856void emit_movzwl_indexed(int offset, int rs, int rt)
1857{
1858 assert(offset>-256&&offset<256);
1859 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1860 if(offset>=0) {
1861 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1862 }else{
1863 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1864 }
1865}
1866void emit_readword(int addr, int rt)
1867{
1868 u_int offset = addr-(u_int)&dynarec_local;
1869 assert(offset<4096);
1870 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1871 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1872}
1873void emit_movsbl(int addr, int rt)
1874{
1875 u_int offset = addr-(u_int)&dynarec_local;
1876 assert(offset<256);
1877 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1878 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1879}
1880void emit_movswl(int addr, int rt)
1881{
1882 u_int offset = addr-(u_int)&dynarec_local;
1883 assert(offset<256);
1884 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1885 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1886}
1887void emit_movzbl(int addr, int rt)
1888{
1889 u_int offset = addr-(u_int)&dynarec_local;
1890 assert(offset<4096);
1891 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1892 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1893}
1894void emit_movzwl(int addr, int rt)
1895{
1896 u_int offset = addr-(u_int)&dynarec_local;
1897 assert(offset<256);
1898 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1899 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1900}
1901void emit_movzwl_reg(int rs, int rt)
1902{
1903 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1904 assert(0);
1905}
1906
1907void emit_xchg(int rs, int rt)
1908{
1909 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1910 assert(0);
1911}
1912void emit_writeword_indexed(int rt, int offset, int rs)
1913{
1914 assert(offset>-4096&&offset<4096);
1915 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1916 if(offset>=0) {
1917 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1918 }else{
1919 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1920 }
1921}
1922void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1923{
1924 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1925 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1926}
1927void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1928{
1929 if(map<0) emit_writeword_indexed(rt, addr, rs);
1930 else {
1931 assert(addr==0);
1932 emit_writeword_dualindexedx4(rt, rs, map);
1933 }
1934}
1935void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1936{
1937 if(map<0) {
1938 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1939 emit_writeword_indexed(rl, addr+4, rs);
1940 }else{
1941 assert(rh>=0);
1942 if(temp!=rs) emit_addimm(map,1,temp);
1943 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1944 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1945 else {
1946 emit_addimm(rs,4,rs);
1947 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1948 }
1949 }
1950}
1951void emit_writehword_indexed(int rt, int offset, int rs)
1952{
1953 assert(offset>-256&&offset<256);
1954 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1955 if(offset>=0) {
1956 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1957 }else{
1958 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1959 }
1960}
1961void emit_writebyte_indexed(int rt, int offset, int rs)
1962{
1963 assert(offset>-4096&&offset<4096);
1964 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1965 if(offset>=0) {
1966 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1967 }else{
1968 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1969 }
1970}
1971void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1972{
1973 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1974 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1975}
1976void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1977{
1978 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1979 else {
1980 if(addr==0) {
1981 emit_writebyte_dualindexedx4(rt, rs, map);
1982 }else{
1983 emit_addimm(rs,addr,temp);
1984 emit_writebyte_dualindexedx4(rt, temp, map);
1985 }
1986 }
1987}
1988void emit_writeword(int rt, int addr)
1989{
1990 u_int offset = addr-(u_int)&dynarec_local;
1991 assert(offset<4096);
1992 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1993 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1994}
1995void emit_writehword(int rt, int addr)
1996{
1997 u_int offset = addr-(u_int)&dynarec_local;
1998 assert(offset<256);
1999 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2000 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2001}
2002void emit_writebyte(int rt, int addr)
2003{
2004 u_int offset = addr-(u_int)&dynarec_local;
2005 assert(offset<4096);
74426039 2006 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2007 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2008}
2009void emit_writeword_imm(int imm, int addr)
2010{
2011 assem_debug("movl $%x,%x\n",imm,addr);
2012 assert(0);
2013}
2014void emit_writebyte_imm(int imm, int addr)
2015{
2016 assem_debug("movb $%x,%x\n",imm,addr);
2017 assert(0);
2018}
2019
2020void emit_mul(int rs)
2021{
2022 assem_debug("mul %%%s\n",regname[rs]);
2023 assert(0);
2024}
2025void emit_imul(int rs)
2026{
2027 assem_debug("imul %%%s\n",regname[rs]);
2028 assert(0);
2029}
2030void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2031{
2032 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2033 assert(rs1<16);
2034 assert(rs2<16);
2035 assert(hi<16);
2036 assert(lo<16);
2037 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2038}
2039void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2040{
2041 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2042 assert(rs1<16);
2043 assert(rs2<16);
2044 assert(hi<16);
2045 assert(lo<16);
2046 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2047}
2048
2049void emit_div(int rs)
2050{
2051 assem_debug("div %%%s\n",regname[rs]);
2052 assert(0);
2053}
2054void emit_idiv(int rs)
2055{
2056 assem_debug("idiv %%%s\n",regname[rs]);
2057 assert(0);
2058}
2059void emit_cdq()
2060{
2061 assem_debug("cdq\n");
2062 assert(0);
2063}
2064
2065void emit_clz(int rs,int rt)
2066{
2067 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2068 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2069}
2070
2071void emit_subcs(int rs1,int rs2,int rt)
2072{
2073 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2074 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2075}
2076
2077void emit_shrcc_imm(int rs,u_int imm,int rt)
2078{
2079 assert(imm>0);
2080 assert(imm<32);
2081 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2082 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2083}
2084
2085void emit_negmi(int rs, int rt)
2086{
2087 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2088 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2089}
2090
2091void emit_negsmi(int rs, int rt)
2092{
2093 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2094 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2095}
2096
2097void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2098{
2099 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2100 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2101}
2102
2103void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2104{
2105 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2106 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2107}
2108
2109void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2110{
2111 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2112 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2113}
2114
2115void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2116{
2117 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2118 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2119}
2120
2121void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2122{
2123 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2124 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2125}
2126
2127void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2128{
2129 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2130 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2131}
2132
2133void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2134{
2135 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2136 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2137}
2138
2139void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2140{
2141 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2142 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2143}
2144
2145void emit_teq(int rs, int rt)
2146{
2147 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2148 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2149}
2150
2151void emit_rsbimm(int rs, int imm, int rt)
2152{
2153 u_int armval;
cfbd3c6e 2154 genimm_checked(imm,&armval);
57871462 2155 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2156 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2157}
2158
2159// Load 2 immediates optimizing for small code size
2160void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2161{
2162 emit_movimm(imm1,rt1);
2163 u_int armval;
2164 if(genimm(imm2-imm1,&armval)) {
2165 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2166 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2167 }else if(genimm(imm1-imm2,&armval)) {
2168 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2169 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2170 }
2171 else emit_movimm(imm2,rt2);
2172}
2173
2174// Conditionally select one of two immediates, optimizing for small code size
2175// This will only be called if HAVE_CMOV_IMM is defined
2176void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2177{
2178 u_int armval;
2179 if(genimm(imm2-imm1,&armval)) {
2180 emit_movimm(imm1,rt);
2181 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2182 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2183 }else if(genimm(imm1-imm2,&armval)) {
2184 emit_movimm(imm1,rt);
2185 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2186 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2187 }
2188 else {
2189 #ifdef ARMv5_ONLY
2190 emit_movimm(imm1,rt);
2191 add_literal((int)out,imm2);
2192 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2193 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2194 #else
2195 emit_movw(imm1&0x0000FFFF,rt);
2196 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2197 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2198 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2199 }
2200 emit_movt(imm1&0xFFFF0000,rt);
2201 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2202 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2203 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2204 }
2205 #endif
2206 }
2207}
2208
2209// special case for checking invalid_code
2210void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2211{
2212 assert(0);
2213}
2214
2215// special case for checking invalid_code
2216void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2217{
2218 assert(imm<128&&imm>=0);
2219 assert(r>=0&&r<16);
2220 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2221 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2222 emit_cmpimm(HOST_TEMPREG,imm);
2223}
2224
2225// special case for tlb mapping
2226void emit_addsr12(int rs1,int rs2,int rt)
2227{
2228 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2229 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2230}
2231
0bbd1454 2232void emit_callne(int a)
2233{
2234 assem_debug("blne %x\n",a);
2235 u_int offset=genjmp(a);
2236 output_w32(0x1b000000|offset);
2237}
2238
57871462 2239// Used to preload hash table entries
2240void emit_prefetch(void *addr)
2241{
2242 assem_debug("prefetch %x\n",(int)addr);
2243 output_byte(0x0F);
2244 output_byte(0x18);
2245 output_modrm(0,5,1);
2246 output_w32((int)addr);
2247}
2248void emit_prefetchreg(int r)
2249{
2250 assem_debug("pld %s\n",regname[r]);
2251 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2252}
2253
2254// Special case for mini_ht
2255void emit_ldreq_indexed(int rs, u_int offset, int rt)
2256{
2257 assert(offset<4096);
2258 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2259 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2260}
2261
2262void emit_flds(int r,int sr)
2263{
2264 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2265 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2266}
2267
2268void emit_vldr(int r,int vr)
2269{
2270 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2271 output_w32(0xed900b00|(vr<<12)|(r<<16));
2272}
2273
2274void emit_fsts(int sr,int r)
2275{
2276 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2277 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2278}
2279
2280void emit_vstr(int vr,int r)
2281{
2282 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2283 output_w32(0xed800b00|(vr<<12)|(r<<16));
2284}
2285
2286void emit_ftosizs(int s,int d)
2287{
2288 assem_debug("ftosizs s%d,s%d\n",d,s);
2289 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2290}
2291
2292void emit_ftosizd(int s,int d)
2293{
2294 assem_debug("ftosizd s%d,d%d\n",d,s);
2295 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2296}
2297
2298void emit_fsitos(int s,int d)
2299{
2300 assem_debug("fsitos s%d,s%d\n",d,s);
2301 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2302}
2303
2304void emit_fsitod(int s,int d)
2305{
2306 assem_debug("fsitod d%d,s%d\n",d,s);
2307 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2308}
2309
2310void emit_fcvtds(int s,int d)
2311{
2312 assem_debug("fcvtds d%d,s%d\n",d,s);
2313 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2314}
2315
2316void emit_fcvtsd(int s,int d)
2317{
2318 assem_debug("fcvtsd s%d,d%d\n",d,s);
2319 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2320}
2321
2322void emit_fsqrts(int s,int d)
2323{
2324 assem_debug("fsqrts d%d,s%d\n",d,s);
2325 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2326}
2327
2328void emit_fsqrtd(int s,int d)
2329{
2330 assem_debug("fsqrtd s%d,d%d\n",d,s);
2331 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2332}
2333
2334void emit_fabss(int s,int d)
2335{
2336 assem_debug("fabss d%d,s%d\n",d,s);
2337 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2338}
2339
2340void emit_fabsd(int s,int d)
2341{
2342 assem_debug("fabsd s%d,d%d\n",d,s);
2343 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2344}
2345
2346void emit_fnegs(int s,int d)
2347{
2348 assem_debug("fnegs d%d,s%d\n",d,s);
2349 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2350}
2351
2352void emit_fnegd(int s,int d)
2353{
2354 assem_debug("fnegd s%d,d%d\n",d,s);
2355 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2356}
2357
2358void emit_fadds(int s1,int s2,int d)
2359{
2360 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2361 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2362}
2363
2364void emit_faddd(int s1,int s2,int d)
2365{
2366 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2367 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2368}
2369
2370void emit_fsubs(int s1,int s2,int d)
2371{
2372 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2373 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2374}
2375
2376void emit_fsubd(int s1,int s2,int d)
2377{
2378 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2379 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2380}
2381
2382void emit_fmuls(int s1,int s2,int d)
2383{
2384 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2385 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2386}
2387
2388void emit_fmuld(int s1,int s2,int d)
2389{
2390 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2391 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2392}
2393
2394void emit_fdivs(int s1,int s2,int d)
2395{
2396 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2397 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2398}
2399
2400void emit_fdivd(int s1,int s2,int d)
2401{
2402 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2403 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2404}
2405
2406void emit_fcmps(int x,int y)
2407{
2408 assem_debug("fcmps s14, s15\n");
2409 output_w32(0xeeb47a67);
2410}
2411
2412void emit_fcmpd(int x,int y)
2413{
2414 assem_debug("fcmpd d6, d7\n");
2415 output_w32(0xeeb46b47);
2416}
2417
2418void emit_fmstat()
2419{
2420 assem_debug("fmstat\n");
2421 output_w32(0xeef1fa10);
2422}
2423
2424void emit_bicne_imm(int rs,int imm,int rt)
2425{
2426 u_int armval;
cfbd3c6e 2427 genimm_checked(imm,&armval);
57871462 2428 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2429 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2430}
2431
2432void emit_biccs_imm(int rs,int imm,int rt)
2433{
2434 u_int armval;
cfbd3c6e 2435 genimm_checked(imm,&armval);
57871462 2436 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2437 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2438}
2439
2440void emit_bicvc_imm(int rs,int imm,int rt)
2441{
2442 u_int armval;
cfbd3c6e 2443 genimm_checked(imm,&armval);
57871462 2444 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2445 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2446}
2447
2448void emit_bichi_imm(int rs,int imm,int rt)
2449{
2450 u_int armval;
cfbd3c6e 2451 genimm_checked(imm,&armval);
57871462 2452 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2453 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2454}
2455
2456void emit_orrvs_imm(int rs,int imm,int rt)
2457{
2458 u_int armval;
cfbd3c6e 2459 genimm_checked(imm,&armval);
57871462 2460 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2461 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2462}
2463
b9b61529 2464void emit_orrne_imm(int rs,int imm,int rt)
2465{
2466 u_int armval;
cfbd3c6e 2467 genimm_checked(imm,&armval);
b9b61529 2468 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2469 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2470}
2471
2472void emit_andne_imm(int rs,int imm,int rt)
2473{
2474 u_int armval;
cfbd3c6e 2475 genimm_checked(imm,&armval);
b9b61529 2476 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2477 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2478}
2479
57871462 2480void emit_jno_unlikely(int a)
2481{
2482 //emit_jno(a);
2483 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2484 output_w32(0x72800000|rd_rn_rm(15,15,0));
2485}
2486
2487// Save registers before function call
2488void save_regs(u_int reglist)
2489{
2490 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2491 if(!reglist) return;
2492 assem_debug("stmia fp,{");
2493 if(reglist&1) assem_debug("r0, ");
2494 if(reglist&2) assem_debug("r1, ");
2495 if(reglist&4) assem_debug("r2, ");
2496 if(reglist&8) assem_debug("r3, ");
2497 if(reglist&0x1000) assem_debug("r12");
2498 assem_debug("}\n");
2499 output_w32(0xe88b0000|reglist);
2500}
2501// Restore registers after function call
2502void restore_regs(u_int reglist)
2503{
2504 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2505 if(!reglist) return;
2506 assem_debug("ldmia fp,{");
2507 if(reglist&1) assem_debug("r0, ");
2508 if(reglist&2) assem_debug("r1, ");
2509 if(reglist&4) assem_debug("r2, ");
2510 if(reglist&8) assem_debug("r3, ");
2511 if(reglist&0x1000) assem_debug("r12");
2512 assem_debug("}\n");
2513 output_w32(0xe89b0000|reglist);
2514}
2515
2516// Write back consts using r14 so we don't disturb the other registers
2517void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2518{
2519 int hr;
2520 for(hr=0;hr<HOST_REGS;hr++) {
2521 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2522 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2523 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2524 int value=constmap[i][hr];
2525 if(value==0) {
2526 emit_zeroreg(HOST_TEMPREG);
2527 }
2528 else {
2529 emit_movimm(value,HOST_TEMPREG);
2530 }
2531 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2532#ifndef FORCE32
57871462 2533 if((i_is32>>i_regmap[hr])&1) {
2534 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2535 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2536 }
24385cae 2537#endif
57871462 2538 }
2539 }
2540 }
2541 }
2542}
2543
2544/* Stubs/epilogue */
2545
2546void literal_pool(int n)
2547{
2548 if(!literalcount) return;
2549 if(n) {
2550 if((int)out-literals[0][0]<4096-n) return;
2551 }
2552 u_int *ptr;
2553 int i;
2554 for(i=0;i<literalcount;i++)
2555 {
2556 ptr=(u_int *)literals[i][0];
2557 u_int offset=(u_int)out-(u_int)ptr-8;
2558 assert(offset<4096);
2559 assert(!(offset&3));
2560 *ptr|=offset;
2561 output_w32(literals[i][1]);
2562 }
2563 literalcount=0;
2564}
2565
2566void literal_pool_jumpover(int n)
2567{
2568 if(!literalcount) return;
2569 if(n) {
2570 if((int)out-literals[0][0]<4096-n) return;
2571 }
2572 int jaddr=(int)out;
2573 emit_jmp(0);
2574 literal_pool(0);
2575 set_jump_target(jaddr,(int)out);
2576}
2577
2578emit_extjump2(int addr, int target, int linker)
2579{
2580 u_char *ptr=(u_char *)addr;
2581 assert((ptr[3]&0x0e)==0xa);
2582 emit_loadlp(target,0);
2583 emit_loadlp(addr,1);
24385cae 2584 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2585 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2586//DEBUG >
2587#ifdef DEBUG_CYCLE_COUNT
2588 emit_readword((int)&last_count,ECX);
2589 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2590 emit_readword((int)&next_interupt,ECX);
2591 emit_writeword(HOST_CCREG,(int)&Count);
2592 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2593 emit_writeword(ECX,(int)&last_count);
2594#endif
2595//DEBUG <
2596 emit_jmp(linker);
2597}
2598
2599emit_extjump(int addr, int target)
2600{
2601 emit_extjump2(addr, target, (int)dyna_linker);
2602}
2603emit_extjump_ds(int addr, int target)
2604{
2605 emit_extjump2(addr, target, (int)dyna_linker_ds);
2606}
2607
cbbab9cd 2608#ifdef PCSX
2609#include "pcsxmem_inline.c"
2610#endif
2611
57871462 2612do_readstub(int n)
2613{
2614 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2615 literal_pool(256);
2616 set_jump_target(stubs[n][1],(int)out);
2617 int type=stubs[n][0];
2618 int i=stubs[n][3];
2619 int rs=stubs[n][4];
2620 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2621 u_int reglist=stubs[n][7];
2622 signed char *i_regmap=i_regs->regmap;
2623 int addr=get_reg(i_regmap,AGEN1+(i&1));
2624 int rth,rt;
2625 int ds;
b9b61529 2626 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2627 rth=get_reg(i_regmap,FTEMP|64);
2628 rt=get_reg(i_regmap,FTEMP);
2629 }else{
2630 rth=get_reg(i_regmap,rt1[i]|64);
2631 rt=get_reg(i_regmap,rt1[i]);
2632 }
2633 assert(rs>=0);
57871462 2634 if(addr<0) addr=rt;
535d208a 2635 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2636 assert(addr>=0);
2637 int ftable=0;
2638 if(type==LOADB_STUB||type==LOADBU_STUB)
2639 ftable=(int)readmemb;
2640 if(type==LOADH_STUB||type==LOADHU_STUB)
2641 ftable=(int)readmemh;
2642 if(type==LOADW_STUB)
2643 ftable=(int)readmem;
24385cae 2644#ifndef FORCE32
57871462 2645 if(type==LOADD_STUB)
2646 ftable=(int)readmemd;
24385cae 2647#endif
2648 assert(ftable!=0);
57871462 2649 emit_writeword(rs,(int)&address);
2650 //emit_pusha();
2651 save_regs(reglist);
97a238a6 2652#ifndef PCSX
57871462 2653 ds=i_regs!=&regs[i];
2654 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2655 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2656 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2657 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2658 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2659#endif
57871462 2660 emit_shrimm(rs,16,1);
2661 int cc=get_reg(i_regmap,CCREG);
2662 if(cc<0) {
2663 emit_loadreg(CCREG,2);
2664 }
2665 emit_movimm(ftable,0);
2666 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2667#ifndef PCSX
57871462 2668 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2669#endif
57871462 2670 //emit_readword((int)&last_count,temp);
2671 //emit_add(cc,temp,cc);
2672 //emit_writeword(cc,(int)&Count);
2673 //emit_mov(15,14);
2674 emit_call((int)&indirect_jump_indexed);
2675 //emit_callreg(rs);
2676 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2677#ifndef PCSX
57871462 2678 // We really shouldn't need to update the count here,
2679 // but not doing so causes random crashes...
2680 emit_readword((int)&Count,HOST_TEMPREG);
2681 emit_readword((int)&next_interupt,2);
2682 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2683 emit_writeword(2,(int)&last_count);
2684 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2685 if(cc<0) {
2686 emit_storereg(CCREG,HOST_TEMPREG);
2687 }
f51dc36c 2688#endif
57871462 2689 //emit_popa();
2690 restore_regs(reglist);
2691 //if((cc=get_reg(regmap,CCREG))>=0) {
2692 // emit_loadreg(CCREG,cc);
2693 //}
f18c0f46 2694 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2695 assert(rt>=0);
2696 if(type==LOADB_STUB)
2697 emit_movsbl((int)&readmem_dword,rt);
2698 if(type==LOADBU_STUB)
2699 emit_movzbl((int)&readmem_dword,rt);
2700 if(type==LOADH_STUB)
2701 emit_movswl((int)&readmem_dword,rt);
2702 if(type==LOADHU_STUB)
2703 emit_movzwl((int)&readmem_dword,rt);
2704 if(type==LOADW_STUB)
2705 emit_readword((int)&readmem_dword,rt);
2706 if(type==LOADD_STUB) {
2707 emit_readword((int)&readmem_dword,rt);
2708 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2709 }
57871462 2710 }
2711 emit_jmp(stubs[n][2]); // return address
2712}
2713
2714inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2715{
2716 int rs=get_reg(regmap,target);
2717 int rth=get_reg(regmap,target|64);
2718 int rt=get_reg(regmap,target);
535d208a 2719 if(rs<0) rs=get_reg(regmap,-1);
57871462 2720 assert(rs>=0);
57871462 2721 int ftable=0;
2722 if(type==LOADB_STUB||type==LOADBU_STUB)
2723 ftable=(int)readmemb;
2724 if(type==LOADH_STUB||type==LOADHU_STUB)
2725 ftable=(int)readmemh;
2726 if(type==LOADW_STUB)
2727 ftable=(int)readmem;
24385cae 2728#ifndef FORCE32
57871462 2729 if(type==LOADD_STUB)
2730 ftable=(int)readmemd;
24385cae 2731#endif
2732 assert(ftable!=0);
cbbab9cd 2733#ifdef PCSX
2734 if(pcsx_direct_read(type,addr,target?rs:-1,rt))
2735 return;
2736#endif
fd99c415 2737 if(target==0)
2738 emit_movimm(addr,rs);
57871462 2739 emit_writeword(rs,(int)&address);
2740 //emit_pusha();
2741 save_regs(reglist);
0c1fe38b 2742#ifndef PCSX
2743 if((signed int)addr>=(signed int)0xC0000000) {
2744 // Theoretically we can have a pagefault here, if the TLB has never
2745 // been enabled and the address is outside the range 80000000..BFFFFFFF
2746 // Write out the registers so the pagefault can be handled. This is
2747 // a very rare case and likely represents a bug.
2748 int ds=regmap!=regs[i].regmap;
2749 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2750 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2751 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2752 }
2753#endif
57871462 2754 //emit_shrimm(rs,16,1);
2755 int cc=get_reg(regmap,CCREG);
2756 if(cc<0) {
2757 emit_loadreg(CCREG,2);
2758 }
2759 //emit_movimm(ftable,0);
2760 emit_movimm(((u_int *)ftable)[addr>>16],0);
2761 //emit_readword((int)&last_count,12);
2762 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2763#ifndef PCSX
57871462 2764 if((signed int)addr>=(signed int)0xC0000000) {
2765 // Pagefault address
2766 int ds=regmap!=regs[i].regmap;
2767 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2768 }
f51dc36c 2769#endif
57871462 2770 //emit_add(12,2,2);
2771 //emit_writeword(2,(int)&Count);
2772 //emit_call(((u_int *)ftable)[addr>>16]);
2773 emit_call((int)&indirect_jump);
f51dc36c 2774#ifndef PCSX
57871462 2775 // We really shouldn't need to update the count here,
2776 // but not doing so causes random crashes...
2777 emit_readword((int)&Count,HOST_TEMPREG);
2778 emit_readword((int)&next_interupt,2);
2779 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2780 emit_writeword(2,(int)&last_count);
2781 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2782 if(cc<0) {
2783 emit_storereg(CCREG,HOST_TEMPREG);
2784 }
f51dc36c 2785#endif
57871462 2786 //emit_popa();
2787 restore_regs(reglist);
fd99c415 2788 if(rt>=0) {
2789 if(type==LOADB_STUB)
2790 emit_movsbl((int)&readmem_dword,rt);
2791 if(type==LOADBU_STUB)
2792 emit_movzbl((int)&readmem_dword,rt);
2793 if(type==LOADH_STUB)
2794 emit_movswl((int)&readmem_dword,rt);
2795 if(type==LOADHU_STUB)
2796 emit_movzwl((int)&readmem_dword,rt);
2797 if(type==LOADW_STUB)
2798 emit_readword((int)&readmem_dword,rt);
2799 if(type==LOADD_STUB) {
2800 emit_readword((int)&readmem_dword,rt);
2801 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2802 }
57871462 2803 }
2804}
2805
2806do_writestub(int n)
2807{
2808 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2809 literal_pool(256);
2810 set_jump_target(stubs[n][1],(int)out);
2811 int type=stubs[n][0];
2812 int i=stubs[n][3];
2813 int rs=stubs[n][4];
2814 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2815 u_int reglist=stubs[n][7];
2816 signed char *i_regmap=i_regs->regmap;
2817 int addr=get_reg(i_regmap,AGEN1+(i&1));
2818 int rth,rt,r;
2819 int ds;
b9b61529 2820 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2821 rth=get_reg(i_regmap,FTEMP|64);
2822 rt=get_reg(i_regmap,r=FTEMP);
2823 }else{
2824 rth=get_reg(i_regmap,rs2[i]|64);
2825 rt=get_reg(i_regmap,r=rs2[i]);
2826 }
2827 assert(rs>=0);
2828 assert(rt>=0);
2829 if(addr<0) addr=get_reg(i_regmap,-1);
2830 assert(addr>=0);
2831 int ftable=0;
2832 if(type==STOREB_STUB)
2833 ftable=(int)writememb;
2834 if(type==STOREH_STUB)
2835 ftable=(int)writememh;
2836 if(type==STOREW_STUB)
2837 ftable=(int)writemem;
24385cae 2838#ifndef FORCE32
57871462 2839 if(type==STORED_STUB)
2840 ftable=(int)writememd;
24385cae 2841#endif
2842 assert(ftable!=0);
57871462 2843 emit_writeword(rs,(int)&address);
2844 //emit_shrimm(rs,16,rs);
2845 //emit_movmem_indexedx4(ftable,rs,rs);
2846 if(type==STOREB_STUB)
2847 emit_writebyte(rt,(int)&byte);
2848 if(type==STOREH_STUB)
2849 emit_writehword(rt,(int)&hword);
2850 if(type==STOREW_STUB)
2851 emit_writeword(rt,(int)&word);
2852 if(type==STORED_STUB) {
3d624f89 2853#ifndef FORCE32
57871462 2854 emit_writeword(rt,(int)&dword);
2855 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2856#else
2857 printf("STORED_STUB\n");
2858#endif
57871462 2859 }
2860 //emit_pusha();
2861 save_regs(reglist);
97a238a6 2862#ifndef PCSX
57871462 2863 ds=i_regs!=&regs[i];
2864 int real_rs=get_reg(i_regmap,rs1[i]);
2865 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2866 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2867 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2868 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2869#endif
57871462 2870 emit_shrimm(rs,16,1);
2871 int cc=get_reg(i_regmap,CCREG);
2872 if(cc<0) {
2873 emit_loadreg(CCREG,2);
2874 }
2875 emit_movimm(ftable,0);
2876 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2877#ifndef PCSX
57871462 2878 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2879#endif
57871462 2880 //emit_readword((int)&last_count,temp);
2881 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2882 //emit_add(cc,temp,cc);
2883 //emit_writeword(cc,(int)&Count);
2884 emit_call((int)&indirect_jump_indexed);
2885 //emit_callreg(rs);
2886 emit_readword((int)&Count,HOST_TEMPREG);
2887 emit_readword((int)&next_interupt,2);
2888 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2889 emit_writeword(2,(int)&last_count);
2890 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2891 if(cc<0) {
2892 emit_storereg(CCREG,HOST_TEMPREG);
2893 }
2894 //emit_popa();
2895 restore_regs(reglist);
2896 //if((cc=get_reg(regmap,CCREG))>=0) {
2897 // emit_loadreg(CCREG,cc);
2898 //}
2899 emit_jmp(stubs[n][2]); // return address
2900}
2901
2902inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2903{
2904 int rs=get_reg(regmap,-1);
2905 int rth=get_reg(regmap,target|64);
2906 int rt=get_reg(regmap,target);
2907 assert(rs>=0);
2908 assert(rt>=0);
cbbab9cd 2909#ifdef PCSX
2910 if(pcsx_direct_write(type,addr,rs,rt,regmap))
2911 return;
2912#endif
57871462 2913 int ftable=0;
2914 if(type==STOREB_STUB)
2915 ftable=(int)writememb;
2916 if(type==STOREH_STUB)
2917 ftable=(int)writememh;
2918 if(type==STOREW_STUB)
2919 ftable=(int)writemem;
24385cae 2920#ifndef FORCE32
57871462 2921 if(type==STORED_STUB)
2922 ftable=(int)writememd;
24385cae 2923#endif
2924 assert(ftable!=0);
57871462 2925 emit_writeword(rs,(int)&address);
2926 //emit_shrimm(rs,16,rs);
2927 //emit_movmem_indexedx4(ftable,rs,rs);
2928 if(type==STOREB_STUB)
2929 emit_writebyte(rt,(int)&byte);
2930 if(type==STOREH_STUB)
2931 emit_writehword(rt,(int)&hword);
2932 if(type==STOREW_STUB)
2933 emit_writeword(rt,(int)&word);
2934 if(type==STORED_STUB) {
3d624f89 2935#ifndef FORCE32
57871462 2936 emit_writeword(rt,(int)&dword);
2937 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2938#else
2939 printf("STORED_STUB\n");
2940#endif
57871462 2941 }
2942 //emit_pusha();
2943 save_regs(reglist);
0c1fe38b 2944#ifndef PCSX
2945 // rearmed note: load_all_consts prevents BIOS boot, some bug?
2946 if((signed int)addr>=(signed int)0xC0000000) {
2947 // Theoretically we can have a pagefault here, if the TLB has never
2948 // been enabled and the address is outside the range 80000000..BFFFFFFF
2949 // Write out the registers so the pagefault can be handled. This is
2950 // a very rare case and likely represents a bug.
2951 int ds=regmap!=regs[i].regmap;
2952 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2953 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2954 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2955 }
2956#endif
57871462 2957 //emit_shrimm(rs,16,1);
2958 int cc=get_reg(regmap,CCREG);
2959 if(cc<0) {
2960 emit_loadreg(CCREG,2);
2961 }
2962 //emit_movimm(ftable,0);
2963 emit_movimm(((u_int *)ftable)[addr>>16],0);
2964 //emit_readword((int)&last_count,12);
2965 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2966#ifndef PCSX
57871462 2967 if((signed int)addr>=(signed int)0xC0000000) {
2968 // Pagefault address
2969 int ds=regmap!=regs[i].regmap;
2970 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2971 }
f51dc36c 2972#endif
57871462 2973 //emit_add(12,2,2);
2974 //emit_writeword(2,(int)&Count);
2975 //emit_call(((u_int *)ftable)[addr>>16]);
2976 emit_call((int)&indirect_jump);
2977 emit_readword((int)&Count,HOST_TEMPREG);
2978 emit_readword((int)&next_interupt,2);
2979 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2980 emit_writeword(2,(int)&last_count);
2981 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2982 if(cc<0) {
2983 emit_storereg(CCREG,HOST_TEMPREG);
2984 }
2985 //emit_popa();
2986 restore_regs(reglist);
2987}
2988
2989do_unalignedwritestub(int n)
2990{
b7918751 2991 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2992 literal_pool(256);
57871462 2993 set_jump_target(stubs[n][1],(int)out);
b7918751 2994
2995 int i=stubs[n][3];
2996 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2997 int addr=stubs[n][5];
2998 u_int reglist=stubs[n][7];
2999 signed char *i_regmap=i_regs->regmap;
3000 int temp2=get_reg(i_regmap,FTEMP);
3001 int rt;
3002 int ds, real_rs;
3003 rt=get_reg(i_regmap,rs2[i]);
3004 assert(rt>=0);
3005 assert(addr>=0);
3006 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3007 reglist|=(1<<addr);
3008 reglist&=~(1<<temp2);
3009
3010 emit_andimm(addr,0xfffffffc,temp2);
3011 emit_writeword(temp2,(int)&address);
3012
3013 save_regs(reglist);
97a238a6 3014#ifndef PCSX
b7918751 3015 ds=i_regs!=&regs[i];
3016 real_rs=get_reg(i_regmap,rs1[i]);
3017 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3018 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3019 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3020 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3021#endif
b7918751 3022 emit_shrimm(addr,16,1);
3023 int cc=get_reg(i_regmap,CCREG);
3024 if(cc<0) {
3025 emit_loadreg(CCREG,2);
3026 }
3027 emit_movimm((u_int)readmem,0);
3028 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3029#ifndef PCSX
3030 // pagefault address
3031 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3032#endif
b7918751 3033 emit_call((int)&indirect_jump_indexed);
3034 restore_regs(reglist);
3035
3036 emit_readword((int)&readmem_dword,temp2);
3037 int temp=addr; //hmh
3038 emit_shlimm(addr,3,temp);
3039 emit_andimm(temp,24,temp);
3040#ifdef BIG_ENDIAN_MIPS
3041 if (opcode[i]==0x2e) // SWR
3042#else
3043 if (opcode[i]==0x2a) // SWL
3044#endif
3045 emit_xorimm(temp,24,temp);
3046 emit_movimm(-1,HOST_TEMPREG);
55439448 3047 if (opcode[i]==0x2a) { // SWL
b7918751 3048 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3049 emit_orrshr(rt,temp,temp2);
3050 }else{
3051 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3052 emit_orrshl(rt,temp,temp2);
3053 }
3054 emit_readword((int)&address,addr);
3055 emit_writeword(temp2,(int)&word);
3056 //save_regs(reglist); // don't need to, no state changes
3057 emit_shrimm(addr,16,1);
3058 emit_movimm((u_int)writemem,0);
3059 //emit_call((int)&indirect_jump_indexed);
3060 emit_mov(15,14);
3061 emit_readword_dualindexedx4(0,1,15);
3062 emit_readword((int)&Count,HOST_TEMPREG);
3063 emit_readword((int)&next_interupt,2);
3064 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3065 emit_writeword(2,(int)&last_count);
3066 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3067 if(cc<0) {
3068 emit_storereg(CCREG,HOST_TEMPREG);
3069 }
3070 restore_regs(reglist);
57871462 3071 emit_jmp(stubs[n][2]); // return address
3072}
3073
3074void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3075{
3076 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3077}
3078
3079do_invstub(int n)
3080{
3081 literal_pool(20);
3082 u_int reglist=stubs[n][3];
3083 set_jump_target(stubs[n][1],(int)out);
3084 save_regs(reglist);
3085 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3086 emit_call((int)&invalidate_addr);
3087 restore_regs(reglist);
3088 emit_jmp(stubs[n][2]); // return address
3089}
3090
3091int do_dirty_stub(int i)
3092{
3093 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3094 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3095 #ifdef PCSX
3096 addr=(u_int)source;
3097 #endif
57871462 3098 // Careful about the code output here, verify_dirty needs to parse it.
3099 #ifdef ARMv5_ONLY
ac545b3a 3100 emit_loadlp(addr,1);
57871462 3101 emit_loadlp((int)copy,2);
3102 emit_loadlp(slen*4,3);
3103 #else
ac545b3a 3104 emit_movw(addr&0x0000FFFF,1);
57871462 3105 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3106 emit_movt(addr&0xFFFF0000,1);
57871462 3107 emit_movt(((u_int)copy)&0xFFFF0000,2);
3108 emit_movw(slen*4,3);
3109 #endif
3110 emit_movimm(start+i*4,0);
3111 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3112 int entry=(int)out;
3113 load_regs_entry(i);
3114 if(entry==(int)out) entry=instr_addr[i];
3115 emit_jmp(instr_addr[i]);
3116 return entry;
3117}
3118
3119void do_dirty_stub_ds()
3120{
3121 // Careful about the code output here, verify_dirty needs to parse it.
3122 #ifdef ARMv5_ONLY
3123 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3124 emit_loadlp((int)copy,2);
3125 emit_loadlp(slen*4,3);
3126 #else
3127 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3128 emit_movw(((u_int)copy)&0x0000FFFF,2);
3129 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3130 emit_movt(((u_int)copy)&0xFFFF0000,2);
3131 emit_movw(slen*4,3);
3132 #endif
3133 emit_movimm(start+1,0);
3134 emit_call((int)&verify_code_ds);
3135}
3136
3137do_cop1stub(int n)
3138{
3139 literal_pool(256);
3140 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3141 set_jump_target(stubs[n][1],(int)out);
3142 int i=stubs[n][3];
3d624f89 3143// int rs=stubs[n][4];
57871462 3144 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3145 int ds=stubs[n][6];
3146 if(!ds) {
3147 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3148 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3149 }
3150 //else {printf("fp exception in delay slot\n");}
3151 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3152 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3153 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3154 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3155 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3156}
3157
3158/* TLB */
3159
3160int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3161{
3162 if(c) {
3163 if((signed int)addr>=(signed int)0xC0000000) {
3164 // address_generation already loaded the const
3165 emit_readword_dualindexedx4(FP,map,map);
3166 }
3167 else
3168 return -1; // No mapping
3169 }
3170 else {
3171 assert(s!=map);
3172 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3173 emit_addsr12(map,s,map);
3174 // Schedule this while we wait on the load
3175 //if(x) emit_xorimm(s,x,ar);
3176 if(shift>=0) emit_shlimm(s,3,shift);
3177 if(~a) emit_andimm(s,a,ar);
3178 emit_readword_dualindexedx4(FP,map,map);
3179 }
3180 return map;
3181}
3182int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3183{
3184 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3185 emit_test(map,map);
3186 *jaddr=(int)out;
3187 emit_js(0);
3188 }
3189 return map;
3190}
3191
3192int gen_tlb_addr_r(int ar, int map) {
3193 if(map>=0) {
3194 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3195 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3196 }
3197}
3198
3199int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3200{
3201 if(c) {
3202 if(addr<0x80800000||addr>=0xC0000000) {
3203 // address_generation already loaded the const
3204 emit_readword_dualindexedx4(FP,map,map);
3205 }
3206 else
3207 return -1; // No mapping
3208 }
3209 else {
3210 assert(s!=map);
3211 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3212 emit_addsr12(map,s,map);
3213 // Schedule this while we wait on the load
3214 //if(x) emit_xorimm(s,x,ar);
3215 emit_readword_dualindexedx4(FP,map,map);
3216 }
3217 return map;
3218}
3219int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3220{
3221 if(!c||addr<0x80800000||addr>=0xC0000000) {
3222 emit_testimm(map,0x40000000);
3223 *jaddr=(int)out;
3224 emit_jne(0);
3225 }
3226}
3227
3228int gen_tlb_addr_w(int ar, int map) {
3229 if(map>=0) {
3230 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3231 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3232 }
3233}
3234
3235// Generate the address of the memory_map entry, relative to dynarec_local
3236generate_map_const(u_int addr,int reg) {
3237 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3238 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3239}
3240
3241/* Special assem */
3242
3243void shift_assemble_arm(int i,struct regstat *i_regs)
3244{
3245 if(rt1[i]) {
3246 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3247 {
3248 signed char s,t,shift;
3249 t=get_reg(i_regs->regmap,rt1[i]);
3250 s=get_reg(i_regs->regmap,rs1[i]);
3251 shift=get_reg(i_regs->regmap,rs2[i]);
3252 if(t>=0){
3253 if(rs1[i]==0)
3254 {
3255 emit_zeroreg(t);
3256 }
3257 else if(rs2[i]==0)
3258 {
3259 assert(s>=0);
3260 if(s!=t) emit_mov(s,t);
3261 }
3262 else
3263 {
3264 emit_andimm(shift,31,HOST_TEMPREG);
3265 if(opcode2[i]==4) // SLLV
3266 {
3267 emit_shl(s,HOST_TEMPREG,t);
3268 }
3269 if(opcode2[i]==6) // SRLV
3270 {
3271 emit_shr(s,HOST_TEMPREG,t);
3272 }
3273 if(opcode2[i]==7) // SRAV
3274 {
3275 emit_sar(s,HOST_TEMPREG,t);
3276 }
3277 }
3278 }
3279 } else { // DSLLV/DSRLV/DSRAV
3280 signed char sh,sl,th,tl,shift;
3281 th=get_reg(i_regs->regmap,rt1[i]|64);
3282 tl=get_reg(i_regs->regmap,rt1[i]);
3283 sh=get_reg(i_regs->regmap,rs1[i]|64);
3284 sl=get_reg(i_regs->regmap,rs1[i]);
3285 shift=get_reg(i_regs->regmap,rs2[i]);
3286 if(tl>=0){
3287 if(rs1[i]==0)
3288 {
3289 emit_zeroreg(tl);
3290 if(th>=0) emit_zeroreg(th);
3291 }
3292 else if(rs2[i]==0)
3293 {
3294 assert(sl>=0);
3295 if(sl!=tl) emit_mov(sl,tl);
3296 if(th>=0&&sh!=th) emit_mov(sh,th);
3297 }
3298 else
3299 {
3300 // FIXME: What if shift==tl ?
3301 assert(shift!=tl);
3302 int temp=get_reg(i_regs->regmap,-1);
3303 int real_th=th;
3304 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3305 assert(sl>=0);
3306 assert(sh>=0);
3307 emit_andimm(shift,31,HOST_TEMPREG);
3308 if(opcode2[i]==0x14) // DSLLV
3309 {
3310 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3311 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3312 emit_orrshr(sl,HOST_TEMPREG,th);
3313 emit_andimm(shift,31,HOST_TEMPREG);
3314 emit_testimm(shift,32);
3315 emit_shl(sl,HOST_TEMPREG,tl);
3316 if(th>=0) emit_cmovne_reg(tl,th);
3317 emit_cmovne_imm(0,tl);
3318 }
3319 if(opcode2[i]==0x16) // DSRLV
3320 {
3321 assert(th>=0);
3322 emit_shr(sl,HOST_TEMPREG,tl);
3323 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3324 emit_orrshl(sh,HOST_TEMPREG,tl);
3325 emit_andimm(shift,31,HOST_TEMPREG);
3326 emit_testimm(shift,32);
3327 emit_shr(sh,HOST_TEMPREG,th);
3328 emit_cmovne_reg(th,tl);
3329 if(real_th>=0) emit_cmovne_imm(0,th);
3330 }
3331 if(opcode2[i]==0x17) // DSRAV
3332 {
3333 assert(th>=0);
3334 emit_shr(sl,HOST_TEMPREG,tl);
3335 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3336 if(real_th>=0) {
3337 assert(temp>=0);
3338 emit_sarimm(th,31,temp);
3339 }
3340 emit_orrshl(sh,HOST_TEMPREG,tl);
3341 emit_andimm(shift,31,HOST_TEMPREG);
3342 emit_testimm(shift,32);
3343 emit_sar(sh,HOST_TEMPREG,th);
3344 emit_cmovne_reg(th,tl);
3345 if(real_th>=0) emit_cmovne_reg(temp,th);
3346 }
3347 }
3348 }
3349 }
3350 }
3351}
3352#define shift_assemble shift_assemble_arm
3353
3354void loadlr_assemble_arm(int i,struct regstat *i_regs)
3355{
3356 int s,th,tl,temp,temp2,addr,map=-1;
3357 int offset;
3358 int jaddr=0;
af4ee1fe 3359 int memtarget=0,c=0;
57871462 3360 u_int hr,reglist=0;
3361 th=get_reg(i_regs->regmap,rt1[i]|64);
3362 tl=get_reg(i_regs->regmap,rt1[i]);
3363 s=get_reg(i_regs->regmap,rs1[i]);
3364 temp=get_reg(i_regs->regmap,-1);
3365 temp2=get_reg(i_regs->regmap,FTEMP);
3366 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3367 assert(addr<0);
3368 offset=imm[i];
3369 for(hr=0;hr<HOST_REGS;hr++) {
3370 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3371 }
3372 reglist|=1<<temp;
3373 if(offset||s<0||c) addr=temp2;
3374 else addr=s;
3375 if(s>=0) {
3376 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3377 if(c) {
3378 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3379 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3380 }
57871462 3381 }
535d208a 3382 if(!using_tlb) {
3383 if(!c) {
3384 #ifdef RAM_OFFSET
3385 map=get_reg(i_regs->regmap,ROREG);
3386 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3387 #endif
3388 emit_shlimm(addr,3,temp);
3389 if (opcode[i]==0x22||opcode[i]==0x26) {
3390 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 3391 }else{
535d208a 3392 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 3393 }
535d208a 3394 emit_cmpimm(addr,RAM_SIZE);
3395 jaddr=(int)out;
3396 emit_jno(0);
3397 }
3398 else {
3399 if (opcode[i]==0x22||opcode[i]==0x26) {
3400 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3401 }else{
3402 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3403 }
57871462 3404 }
535d208a 3405 }else{ // using tlb
3406 int a;
3407 if(c) {
3408 a=-1;
3409 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3410 a=0xFFFFFFFC; // LWL/LWR
3411 }else{
3412 a=0xFFFFFFF8; // LDL/LDR
3413 }
3414 map=get_reg(i_regs->regmap,TLREG);
3415 assert(map>=0);
3416 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3417 if(c) {
3418 if (opcode[i]==0x22||opcode[i]==0x26) {
3419 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3420 }else{
3421 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3422 }
535d208a 3423 }
3424 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3425 }
3426 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3427 if(!c||memtarget) {
3428 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3429 emit_readword_indexed_tlb(0,temp2,map,temp2);
3430 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3431 }
3432 else
3433 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3434 if(rt1[i]) {
3435 assert(tl>=0);
57871462 3436 emit_andimm(temp,24,temp);
2002a1db 3437#ifdef BIG_ENDIAN_MIPS
3438 if (opcode[i]==0x26) // LWR
3439#else
3440 if (opcode[i]==0x22) // LWL
3441#endif
3442 emit_xorimm(temp,24,temp);
57871462 3443 emit_movimm(-1,HOST_TEMPREG);
3444 if (opcode[i]==0x26) {
3445 emit_shr(temp2,temp,temp2);
3446 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3447 }else{
3448 emit_shl(temp2,temp,temp2);
3449 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3450 }
3451 emit_or(temp2,tl,tl);
57871462 3452 }
535d208a 3453 //emit_storereg(rt1[i],tl); // DEBUG
3454 }
3455 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3456 // FIXME: little endian
3457 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3458 if(!c||memtarget) {
3459 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3460 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3461 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3462 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3463 }
3464 else
3465 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3466 if(rt1[i]) {
3467 assert(th>=0);
3468 assert(tl>=0);
57871462 3469 emit_testimm(temp,32);
3470 emit_andimm(temp,24,temp);
3471 if (opcode[i]==0x1A) { // LDL
3472 emit_rsbimm(temp,32,HOST_TEMPREG);
3473 emit_shl(temp2h,temp,temp2h);
3474 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3475 emit_movimm(-1,HOST_TEMPREG);
3476 emit_shl(temp2,temp,temp2);
3477 emit_cmove_reg(temp2h,th);
3478 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3479 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3480 emit_orreq(temp2,tl,tl);
3481 emit_orrne(temp2,th,th);
3482 }
3483 if (opcode[i]==0x1B) { // LDR
3484 emit_xorimm(temp,24,temp);
3485 emit_rsbimm(temp,32,HOST_TEMPREG);
3486 emit_shr(temp2,temp,temp2);
3487 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3488 emit_movimm(-1,HOST_TEMPREG);
3489 emit_shr(temp2h,temp,temp2h);
3490 emit_cmovne_reg(temp2,tl);
3491 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3492 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3493 emit_orrne(temp2h,th,th);
3494 emit_orreq(temp2h,tl,tl);
3495 }
3496 }
3497 }
3498}
3499#define loadlr_assemble loadlr_assemble_arm
3500
3501void cop0_assemble(int i,struct regstat *i_regs)
3502{
3503 if(opcode2[i]==0) // MFC0
3504 {
3505 signed char t=get_reg(i_regs->regmap,rt1[i]);
3506 char copr=(source[i]>>11)&0x1f;
3507 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3508 if(t>=0&&rt1[i]!=0) {
7139f3c8 3509#ifdef MUPEN64
57871462 3510 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3511 emit_movimm((source[i]>>11)&0x1f,1);
3512 emit_writeword(0,(int)&PC);
3513 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3514 if(copr==9) {
3515 emit_readword((int)&last_count,ECX);
3516 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3517 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3518 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3519 emit_writeword(HOST_CCREG,(int)&Count);
3520 }
3521 emit_call((int)MFC0);
3522 emit_readword((int)&readmem_dword,t);
7139f3c8 3523#else
3524 emit_readword((int)&reg_cop0+copr*4,t);
3525#endif
57871462 3526 }
3527 }
3528 else if(opcode2[i]==4) // MTC0
3529 {
3530 signed char s=get_reg(i_regs->regmap,rs1[i]);
3531 char copr=(source[i]>>11)&0x1f;
3532 assert(s>=0);
3533 emit_writeword(s,(int)&readmem_dword);
3534 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
fca1aef2 3535#ifdef MUPEN64
57871462 3536 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3537 emit_movimm((source[i]>>11)&0x1f,1);
3538 emit_writeword(0,(int)&PC);
3539 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
7139f3c8 3540#endif
3541 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3542 emit_readword((int)&last_count,ECX);
3543 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3544 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3545 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3546 emit_writeword(HOST_CCREG,(int)&Count);
3547 }
3548 // What a mess. The status register (12) can enable interrupts,
3549 // so needs a special case to handle a pending interrupt.
3550 // The interrupt must be taken immediately, because a subsequent
3551 // instruction might disable interrupts again.
7139f3c8 3552 if(copr==12||copr==13) {
fca1aef2 3553#ifdef PCSX
3554 if (is_delayslot) {
3555 // burn cycles to cause cc_interrupt, which will
3556 // reschedule next_interupt. Relies on CCREG from above.
3557 assem_debug("MTC0 DS %d\n", copr);
3558 emit_writeword(HOST_CCREG,(int)&last_count);
3559 emit_movimm(0,HOST_CCREG);
3560 emit_storereg(CCREG,HOST_CCREG);
3561 emit_movimm(copr,0);
3562 emit_call((int)pcsx_mtc0_ds);
3563 return;
3564 }
3565#endif
57871462 3566 emit_movimm(start+i*4+4,0);
3567 emit_movimm(0,1);
3568 emit_writeword(0,(int)&pcaddr);
3569 emit_writeword(1,(int)&pending_exception);
3570 }
3571 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3572 //else
fca1aef2 3573#ifdef PCSX
3574 emit_movimm(copr,0);
3575 emit_call((int)pcsx_mtc0);
3576#else
57871462 3577 emit_call((int)MTC0);
fca1aef2 3578#endif
7139f3c8 3579 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3580 emit_readword((int)&Count,HOST_CCREG);
3581 emit_readword((int)&next_interupt,ECX);
3582 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3583 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3584 emit_writeword(ECX,(int)&last_count);
3585 emit_storereg(CCREG,HOST_CCREG);
3586 }
7139f3c8 3587 if(copr==12||copr==13) {
57871462 3588 assert(!is_delayslot);
3589 emit_readword((int)&pending_exception,14);
3590 }
3591 emit_loadreg(rs1[i],s);
3592 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3593 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 3594 if(copr==12||copr==13) {
57871462 3595 emit_test(14,14);
3596 emit_jne((int)&do_interrupt);
3597 }
3598 cop1_usable=0;
3599 }
3600 else
3601 {
3602 assert(opcode2[i]==0x10);
3d624f89 3603#ifndef DISABLE_TLB
57871462 3604 if((source[i]&0x3f)==0x01) // TLBR
3605 emit_call((int)TLBR);
3606 if((source[i]&0x3f)==0x02) // TLBWI
3607 emit_call((int)TLBWI_new);
3608 if((source[i]&0x3f)==0x06) { // TLBWR
3609 // The TLB entry written by TLBWR is dependent on the count,
3610 // so update the cycle count
3611 emit_readword((int)&last_count,ECX);
3612 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3613 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3614 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3615 emit_writeword(HOST_CCREG,(int)&Count);
3616 emit_call((int)TLBWR_new);
3617 }
3618 if((source[i]&0x3f)==0x08) // TLBP
3619 emit_call((int)TLBP);
3d624f89 3620#endif
576bbd8f 3621#ifdef PCSX
3622 if((source[i]&0x3f)==0x10) // RFE
3623 {
3624 emit_readword((int)&Status,0);
3625 emit_andimm(0,0x3c,1);
3626 emit_andimm(0,~0xf,0);
3627 emit_orrshr_imm(1,2,0);
3628 emit_writeword(0,(int)&Status);
3629 }
3630#else
57871462 3631 if((source[i]&0x3f)==0x18) // ERET
3632 {
3633 int count=ccadj[i];
3634 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3635 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3636 emit_jmp((int)jump_eret);
3637 }
576bbd8f 3638#endif
57871462 3639 }
3640}
3641
b9b61529 3642static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3643{
3644 switch (copr) {
3645 case 1:
3646 case 3:
3647 case 5:
3648 case 8:
3649 case 9:
3650 case 10:
3651 case 11:
3652 emit_readword((int)&reg_cop2d[copr],tl);
3653 emit_signextend16(tl,tl);
3654 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3655 break;
3656 case 7:
3657 case 16:
3658 case 17:
3659 case 18:
3660 case 19:
3661 emit_readword((int)&reg_cop2d[copr],tl);
3662 emit_andimm(tl,0xffff,tl);
3663 emit_writeword(tl,(int)&reg_cop2d[copr]);
3664 break;
3665 case 15:
3666 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3667 emit_writeword(tl,(int)&reg_cop2d[copr]);
3668 break;
3669 case 28:
b9b61529 3670 case 29:
3671 emit_readword((int)&reg_cop2d[9],temp);
3672 emit_testimm(temp,0x8000); // do we need this?
3673 emit_andimm(temp,0xf80,temp);
3674 emit_andne_imm(temp,0,temp);
f70d384d 3675 emit_shrimm(temp,7,tl);
b9b61529 3676 emit_readword((int)&reg_cop2d[10],temp);
3677 emit_testimm(temp,0x8000);
3678 emit_andimm(temp,0xf80,temp);
3679 emit_andne_imm(temp,0,temp);
f70d384d 3680 emit_orrshr_imm(temp,2,tl);
b9b61529 3681 emit_readword((int)&reg_cop2d[11],temp);
3682 emit_testimm(temp,0x8000);
3683 emit_andimm(temp,0xf80,temp);
3684 emit_andne_imm(temp,0,temp);
f70d384d 3685 emit_orrshl_imm(temp,3,tl);
b9b61529 3686 emit_writeword(tl,(int)&reg_cop2d[copr]);
3687 break;
3688 default:
3689 emit_readword((int)&reg_cop2d[copr],tl);
3690 break;
3691 }
3692}
3693
3694static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3695{
3696 switch (copr) {
3697 case 15:
3698 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3699 emit_writeword(sl,(int)&reg_cop2d[copr]);
3700 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3701 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3702 emit_writeword(sl,(int)&reg_cop2d[14]);
3703 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3704 break;
3705 case 28:
3706 emit_andimm(sl,0x001f,temp);
f70d384d 3707 emit_shlimm(temp,7,temp);
b9b61529 3708 emit_writeword(temp,(int)&reg_cop2d[9]);
3709 emit_andimm(sl,0x03e0,temp);
f70d384d 3710 emit_shlimm(temp,2,temp);
b9b61529 3711 emit_writeword(temp,(int)&reg_cop2d[10]);
3712 emit_andimm(sl,0x7c00,temp);
f70d384d 3713 emit_shrimm(temp,3,temp);
b9b61529 3714 emit_writeword(temp,(int)&reg_cop2d[11]);
3715 emit_writeword(sl,(int)&reg_cop2d[28]);
3716 break;
3717 case 30:
3718 emit_movs(sl,temp);
3719 emit_mvnmi(temp,temp);
3720 emit_clz(temp,temp);
3721 emit_writeword(sl,(int)&reg_cop2d[30]);
3722 emit_writeword(temp,(int)&reg_cop2d[31]);
3723 break;
b9b61529 3724 case 31:
3725 break;
3726 default:
3727 emit_writeword(sl,(int)&reg_cop2d[copr]);
3728 break;
3729 }
3730}
3731
3732void cop2_assemble(int i,struct regstat *i_regs)
3733{
3734 u_int copr=(source[i]>>11)&0x1f;
3735 signed char temp=get_reg(i_regs->regmap,-1);
3736 if (opcode2[i]==0) { // MFC2
3737 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3738 if(tl>=0&&rt1[i]!=0)
b9b61529 3739 cop2_get_dreg(copr,tl,temp);
3740 }
3741 else if (opcode2[i]==4) { // MTC2
3742 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3743 cop2_put_dreg(copr,sl,temp);
3744 }
3745 else if (opcode2[i]==2) // CFC2
3746 {
3747 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3748 if(tl>=0&&rt1[i]!=0)
b9b61529 3749 emit_readword((int)&reg_cop2c[copr],tl);
3750 }
3751 else if (opcode2[i]==6) // CTC2
3752 {
3753 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3754 switch(copr) {
3755 case 4:
3756 case 12:
3757 case 20:
3758 case 26:
3759 case 27:
3760 case 29:
3761 case 30:
3762 emit_signextend16(sl,temp);
3763 break;
3764 case 31:
3765 //value = value & 0x7ffff000;
3766 //if (value & 0x7f87e000) value |= 0x80000000;
3767 emit_shrimm(sl,12,temp);
3768 emit_shlimm(temp,12,temp);
3769 emit_testimm(temp,0x7f000000);
3770 emit_testeqimm(temp,0x00870000);
3771 emit_testeqimm(temp,0x0000e000);
3772 emit_orrne_imm(temp,0x80000000,temp);
3773 break;
3774 default:
3775 temp=sl;
3776 break;
3777 }
3778 emit_writeword(temp,(int)&reg_cop2c[copr]);
3779 assert(sl>=0);
3780 }
3781}
3782
3783void c2op_assemble(int i,struct regstat *i_regs)
3784{
3785 signed char temp=get_reg(i_regs->regmap,-1);
3786 u_int c2op=source[i]&0x3f;
3787 u_int hr,reglist=0;
3788 for(hr=0;hr<HOST_REGS;hr++) {
3789 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3790 }
3791 if(i==0||itype[i-1]!=C2OP)
3792 save_regs(reglist);
3793
3794 if (gte_handlers[c2op]!=NULL) {
3795 int cc=get_reg(i_regs->regmap,CCREG);
009faf24 3796 emit_movimm(source[i],1); // opcode
b9b61529 3797 if (cc>=0&&gte_cycletab[c2op])
009faf24 3798 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: could just adjust ccadj?
3799 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3800 emit_writeword(1,(int)&psxRegs.code);
b9b61529 3801 emit_call((int)gte_handlers[c2op]);
3802 }
3803
3804 if(i>=slen-1||itype[i+1]!=C2OP)
3805 restore_regs(reglist);
3806}
3807
3808void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3809{
3810 // XXX: should just just do the exception instead
3811 if(!cop1_usable) {
3812 int jaddr=(int)out;
3813 emit_jmp(0);
3814 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3815 cop1_usable=1;
3816 }
3817}
3818
57871462 3819void cop1_assemble(int i,struct regstat *i_regs)
3820{
3d624f89 3821#ifndef DISABLE_COP1
57871462 3822 // Check cop1 unusable
3823 if(!cop1_usable) {
3824 signed char rs=get_reg(i_regs->regmap,CSREG);
3825 assert(rs>=0);
3826 emit_testimm(rs,0x20000000);
3827 int jaddr=(int)out;
3828 emit_jeq(0);
3829 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3830 cop1_usable=1;
3831 }
3832 if (opcode2[i]==0) { // MFC1
3833 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3834 if(tl>=0) {
3835 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3836 emit_readword_indexed(0,tl,tl);
3837 }
3838 }
3839 else if (opcode2[i]==1) { // DMFC1
3840 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3841 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3842 if(tl>=0) {
3843 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3844 if(th>=0) emit_readword_indexed(4,tl,th);
3845 emit_readword_indexed(0,tl,tl);
3846 }
3847 }
3848 else if (opcode2[i]==4) { // MTC1
3849 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3850 signed char temp=get_reg(i_regs->regmap,-1);
3851 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3852 emit_writeword_indexed(sl,0,temp);
3853 }
3854 else if (opcode2[i]==5) { // DMTC1
3855 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3856 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3857 signed char temp=get_reg(i_regs->regmap,-1);
3858 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3859 emit_writeword_indexed(sh,4,temp);
3860 emit_writeword_indexed(sl,0,temp);
3861 }
3862 else if (opcode2[i]==2) // CFC1
3863 {
3864 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3865 if(tl>=0) {
3866 u_int copr=(source[i]>>11)&0x1f;
3867 if(copr==0) emit_readword((int)&FCR0,tl);
3868 if(copr==31) emit_readword((int)&FCR31,tl);
3869 }
3870 }
3871 else if (opcode2[i]==6) // CTC1
3872 {
3873 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3874 u_int copr=(source[i]>>11)&0x1f;
3875 assert(sl>=0);
3876 if(copr==31)
3877 {
3878 emit_writeword(sl,(int)&FCR31);
3879 // Set the rounding mode
3880 //FIXME
3881 //char temp=get_reg(i_regs->regmap,-1);
3882 //emit_andimm(sl,3,temp);
3883 //emit_fldcw_indexed((int)&rounding_modes,temp);
3884 }
3885 }
3d624f89 3886#else
3887 cop1_unusable(i, i_regs);
3888#endif
57871462 3889}
3890
3891void fconv_assemble_arm(int i,struct regstat *i_regs)
3892{
3d624f89 3893#ifndef DISABLE_COP1
57871462 3894 signed char temp=get_reg(i_regs->regmap,-1);
3895 assert(temp>=0);
3896 // Check cop1 unusable
3897 if(!cop1_usable) {
3898 signed char rs=get_reg(i_regs->regmap,CSREG);
3899 assert(rs>=0);
3900 emit_testimm(rs,0x20000000);
3901 int jaddr=(int)out;
3902 emit_jeq(0);
3903 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3904 cop1_usable=1;
3905 }
3906
3907 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3908 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3909 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3910 emit_flds(temp,15);
3911 emit_ftosizs(15,15); // float->int, truncate
3912 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3913 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3914 emit_fsts(15,temp);
3915 return;
3916 }
3917 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3918 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3919 emit_vldr(temp,7);
3920 emit_ftosizd(7,13); // double->int, truncate
3921 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3922 emit_fsts(13,temp);
3923 return;
3924 }
3925
3926 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3927 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3928 emit_flds(temp,13);
3929 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3930 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3931 emit_fsitos(13,15);
3932 emit_fsts(15,temp);
3933 return;
3934 }
3935 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3936 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3937 emit_flds(temp,13);
3938 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3939 emit_fsitod(13,7);
3940 emit_vstr(7,temp);
3941 return;
3942 }
3943
3944 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3945 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3946 emit_flds(temp,13);
3947 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3948 emit_fcvtds(13,7);
3949 emit_vstr(7,temp);
3950 return;
3951 }
3952 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3953 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3954 emit_vldr(temp,7);
3955 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3956 emit_fcvtsd(7,13);
3957 emit_fsts(13,temp);
3958 return;
3959 }
3960 #endif
3961
3962 // C emulation code
3963
3964 u_int hr,reglist=0;
3965 for(hr=0;hr<HOST_REGS;hr++) {
3966 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3967 }
3968 save_regs(reglist);
3969
3970 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3971 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3972 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3973 emit_call((int)cvt_s_w);
3974 }
3975 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3976 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3977 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3978 emit_call((int)cvt_d_w);
3979 }
3980 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3981 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3982 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3983 emit_call((int)cvt_s_l);
3984 }
3985 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3986 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3987 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3988 emit_call((int)cvt_d_l);
3989 }
3990
3991 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3992 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3993 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3994 emit_call((int)cvt_d_s);
3995 }
3996 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3997 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3998 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3999 emit_call((int)cvt_w_s);
4000 }
4001 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4002 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4003 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4004 emit_call((int)cvt_l_s);
4005 }
4006
4007 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4008 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4009 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4010 emit_call((int)cvt_s_d);
4011 }
4012 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4013 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4014 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4015 emit_call((int)cvt_w_d);
4016 }
4017 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4018 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4019 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4020 emit_call((int)cvt_l_d);
4021 }
4022
4023 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4024 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4025 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4026 emit_call((int)round_l_s);
4027 }
4028 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4029 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4030 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4031 emit_call((int)trunc_l_s);
4032 }
4033 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4034 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4035 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4036 emit_call((int)ceil_l_s);
4037 }
4038 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4039 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4040 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4041 emit_call((int)floor_l_s);
4042 }
4043 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4044 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4045 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4046 emit_call((int)round_w_s);
4047 }
4048 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4049 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4050 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4051 emit_call((int)trunc_w_s);
4052 }
4053 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4054 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4055 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4056 emit_call((int)ceil_w_s);
4057 }
4058 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4059 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4060 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4061 emit_call((int)floor_w_s);
4062 }
4063
4064 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4065 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4066 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4067 emit_call((int)round_l_d);
4068 }
4069 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4070 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4071 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4072 emit_call((int)trunc_l_d);
4073 }
4074 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4075 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4076 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4077 emit_call((int)ceil_l_d);
4078 }
4079 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4080 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4081 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4082 emit_call((int)floor_l_d);
4083 }
4084 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4085 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4086 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4087 emit_call((int)round_w_d);
4088 }
4089 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4090 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4091 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4092 emit_call((int)trunc_w_d);
4093 }
4094 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4095 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4096 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4097 emit_call((int)ceil_w_d);
4098 }
4099 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4100 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4101 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4102 emit_call((int)floor_w_d);
4103 }
4104
4105 restore_regs(reglist);
3d624f89 4106#else
4107 cop1_unusable(i, i_regs);
4108#endif
57871462 4109}
4110#define fconv_assemble fconv_assemble_arm
4111
4112void fcomp_assemble(int i,struct regstat *i_regs)
4113{
3d624f89 4114#ifndef DISABLE_COP1
57871462 4115 signed char fs=get_reg(i_regs->regmap,FSREG);
4116 signed char temp=get_reg(i_regs->regmap,-1);
4117 assert(temp>=0);
4118 // Check cop1 unusable
4119 if(!cop1_usable) {
4120 signed char cs=get_reg(i_regs->regmap,CSREG);
4121 assert(cs>=0);
4122 emit_testimm(cs,0x20000000);
4123 int jaddr=(int)out;
4124 emit_jeq(0);
4125 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4126 cop1_usable=1;
4127 }
4128
4129 if((source[i]&0x3f)==0x30) {
4130 emit_andimm(fs,~0x800000,fs);
4131 return;
4132 }
4133
4134 if((source[i]&0x3e)==0x38) {
4135 // sf/ngle - these should throw exceptions for NaNs
4136 emit_andimm(fs,~0x800000,fs);
4137 return;
4138 }
4139
4140 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4141 if(opcode2[i]==0x10) {
4142 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4143 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4144 emit_orimm(fs,0x800000,fs);
4145 emit_flds(temp,14);
4146 emit_flds(HOST_TEMPREG,15);
4147 emit_fcmps(14,15);
4148 emit_fmstat();
4149 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4150 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4151 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4152 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4153 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4154 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4155 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4156 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4157 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4158 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4159 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4160 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4161 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4162 return;
4163 }
4164 if(opcode2[i]==0x11) {
4165 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4166 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4167 emit_orimm(fs,0x800000,fs);
4168 emit_vldr(temp,6);
4169 emit_vldr(HOST_TEMPREG,7);
4170 emit_fcmpd(6,7);
4171 emit_fmstat();
4172 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4173 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4174 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4175 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4176 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4177 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4178 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4179 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4180 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4181 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4182 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4183 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4184 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4185 return;
4186 }
4187 #endif
4188
4189 // C only
4190
4191 u_int hr,reglist=0;
4192 for(hr=0;hr<HOST_REGS;hr++) {
4193 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4194 }
4195 reglist&=~(1<<fs);
4196 save_regs(reglist);
4197 if(opcode2[i]==0x10) {
4198 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4199 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4200 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4201 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4202 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4203 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4204 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4205 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4206 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4207 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4208 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4209 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4210 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4211 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4212 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4213 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4214 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4215 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4216 }
4217 if(opcode2[i]==0x11) {
4218 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4219 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4220 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4221 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4222 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4223 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4224 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4225 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4226 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4227 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4228 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4229 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4230 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4231 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4232 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4233 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4234 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4235 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4236 }
4237 restore_regs(reglist);
4238 emit_loadreg(FSREG,fs);
3d624f89 4239#else
4240 cop1_unusable(i, i_regs);
4241#endif
57871462 4242}
4243
4244void float_assemble(int i,struct regstat *i_regs)
4245{
3d624f89 4246#ifndef DISABLE_COP1
57871462 4247 signed char temp=get_reg(i_regs->regmap,-1);
4248 assert(temp>=0);
4249 // Check cop1 unusable
4250 if(!cop1_usable) {
4251 signed char cs=get_reg(i_regs->regmap,CSREG);
4252 assert(cs>=0);
4253 emit_testimm(cs,0x20000000);
4254 int jaddr=(int)out;
4255 emit_jeq(0);
4256 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4257 cop1_usable=1;
4258 }
4259
4260 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4261 if((source[i]&0x3f)==6) // mov
4262 {
4263 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4264 if(opcode2[i]==0x10) {
4265 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4266 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4267 emit_readword_indexed(0,temp,temp);
4268 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4269 }
4270 if(opcode2[i]==0x11) {
4271 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4272 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4273 emit_vldr(temp,7);
4274 emit_vstr(7,HOST_TEMPREG);
4275 }
4276 }
4277 return;
4278 }
4279
4280 if((source[i]&0x3f)>3)
4281 {
4282 if(opcode2[i]==0x10) {
4283 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4284 emit_flds(temp,15);
4285 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4286 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4287 }
4288 if((source[i]&0x3f)==4) // sqrt
4289 emit_fsqrts(15,15);
4290 if((source[i]&0x3f)==5) // abs
4291 emit_fabss(15,15);
4292 if((source[i]&0x3f)==7) // neg
4293 emit_fnegs(15,15);
4294 emit_fsts(15,temp);
4295 }
4296 if(opcode2[i]==0x11) {
4297 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4298 emit_vldr(temp,7);
4299 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4300 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4301 }
4302 if((source[i]&0x3f)==4) // sqrt
4303 emit_fsqrtd(7,7);
4304 if((source[i]&0x3f)==5) // abs
4305 emit_fabsd(7,7);
4306 if((source[i]&0x3f)==7) // neg
4307 emit_fnegd(7,7);
4308 emit_vstr(7,temp);
4309 }
4310 return;
4311 }
4312 if((source[i]&0x3f)<4)
4313 {
4314 if(opcode2[i]==0x10) {
4315 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4316 }
4317 if(opcode2[i]==0x11) {
4318 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4319 }
4320 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4321 if(opcode2[i]==0x10) {
4322 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4323 emit_flds(temp,15);
4324 emit_flds(HOST_TEMPREG,13);
4325 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4326 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4327 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4328 }
4329 }
4330 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4331 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4332 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4333 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4334 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4335 emit_fsts(15,HOST_TEMPREG);
4336 }else{
4337 emit_fsts(15,temp);
4338 }
4339 }
4340 else if(opcode2[i]==0x11) {
4341 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4342 emit_vldr(temp,7);
4343 emit_vldr(HOST_TEMPREG,6);
4344 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4345 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4346 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4347 }
4348 }
4349 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4350 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4351 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4352 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4353 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4354 emit_vstr(7,HOST_TEMPREG);
4355 }else{
4356 emit_vstr(7,temp);
4357 }
4358 }
4359 }
4360 else {
4361 if(opcode2[i]==0x10) {
4362 emit_flds(temp,15);
4363 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4364 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4365 }
4366 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4367 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4368 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4369 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4370 emit_fsts(15,temp);
4371 }
4372 else if(opcode2[i]==0x11) {
4373 emit_vldr(temp,7);
4374 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4375 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4376 }
4377 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4378 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4379 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4380 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4381 emit_vstr(7,temp);
4382 }
4383 }
4384 return;
4385 }
4386 #endif
4387
4388 u_int hr,reglist=0;
4389 for(hr=0;hr<HOST_REGS;hr++) {
4390 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4391 }
4392 if(opcode2[i]==0x10) { // Single precision
4393 save_regs(reglist);
4394 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4395 if((source[i]&0x3f)<4) {
4396 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4397 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4398 }else{
4399 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4400 }
4401 switch(source[i]&0x3f)
4402 {
4403 case 0x00: emit_call((int)add_s);break;
4404 case 0x01: emit_call((int)sub_s);break;
4405 case 0x02: emit_call((int)mul_s);break;
4406 case 0x03: emit_call((int)div_s);break;
4407 case 0x04: emit_call((int)sqrt_s);break;
4408 case 0x05: emit_call((int)abs_s);break;
4409 case 0x06: emit_call((int)mov_s);break;
4410 case 0x07: emit_call((int)neg_s);break;
4411 }
4412 restore_regs(reglist);
4413 }
4414 if(opcode2[i]==0x11) { // Double precision
4415 save_regs(reglist);
4416 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4417 if((source[i]&0x3f)<4) {
4418 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4419 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4420 }else{
4421 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4422 }
4423 switch(source[i]&0x3f)
4424 {
4425 case 0x00: emit_call((int)add_d);break;
4426 case 0x01: emit_call((int)sub_d);break;
4427 case 0x02: emit_call((int)mul_d);break;
4428 case 0x03: emit_call((int)div_d);break;
4429 case 0x04: emit_call((int)sqrt_d);break;
4430 case 0x05: emit_call((int)abs_d);break;
4431 case 0x06: emit_call((int)mov_d);break;
4432 case 0x07: emit_call((int)neg_d);break;
4433 }
4434 restore_regs(reglist);
4435 }
3d624f89 4436#else
4437 cop1_unusable(i, i_regs);
4438#endif
57871462 4439}
4440
4441void multdiv_assemble_arm(int i,struct regstat *i_regs)
4442{
4443 // case 0x18: MULT
4444 // case 0x19: MULTU
4445 // case 0x1A: DIV
4446 // case 0x1B: DIVU
4447 // case 0x1C: DMULT
4448 // case 0x1D: DMULTU
4449 // case 0x1E: DDIV
4450 // case 0x1F: DDIVU
4451 if(rs1[i]&&rs2[i])
4452 {
4453 if((opcode2[i]&4)==0) // 32-bit
4454 {
4455 if(opcode2[i]==0x18) // MULT
4456 {
4457 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4458 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4459 signed char hi=get_reg(i_regs->regmap,HIREG);
4460 signed char lo=get_reg(i_regs->regmap,LOREG);
4461 assert(m1>=0);
4462 assert(m2>=0);
4463 assert(hi>=0);
4464 assert(lo>=0);
4465 emit_smull(m1,m2,hi,lo);
4466 }
4467 if(opcode2[i]==0x19) // MULTU
4468 {
4469 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4470 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4471 signed char hi=get_reg(i_regs->regmap,HIREG);
4472 signed char lo=get_reg(i_regs->regmap,LOREG);
4473 assert(m1>=0);
4474 assert(m2>=0);
4475 assert(hi>=0);
4476 assert(lo>=0);
4477 emit_umull(m1,m2,hi,lo);
4478 }
4479 if(opcode2[i]==0x1A) // DIV
4480 {
4481 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4482 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4483 assert(d1>=0);
4484 assert(d2>=0);
4485 signed char quotient=get_reg(i_regs->regmap,LOREG);
4486 signed char remainder=get_reg(i_regs->regmap,HIREG);
4487 assert(quotient>=0);
4488 assert(remainder>=0);
4489 emit_movs(d1,remainder);
4490 emit_negmi(remainder,remainder);
4491 emit_movs(d2,HOST_TEMPREG);
4492 emit_jeq((int)out+52); // Division by zero
4493 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4494 emit_clz(HOST_TEMPREG,quotient);
4495 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4496 emit_orimm(quotient,1<<31,quotient);
4497 emit_shr(quotient,quotient,quotient);
4498 emit_cmp(remainder,HOST_TEMPREG);
4499 emit_subcs(remainder,HOST_TEMPREG,remainder);
4500 emit_adcs(quotient,quotient,quotient);
4501 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4502 emit_jcc((int)out-16); // -4
4503 emit_teq(d1,d2);
4504 emit_negmi(quotient,quotient);
4505 emit_test(d1,d1);
4506 emit_negmi(remainder,remainder);
4507 }
4508 if(opcode2[i]==0x1B) // DIVU
4509 {
4510 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4511 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4512 assert(d1>=0);
4513 assert(d2>=0);
4514 signed char quotient=get_reg(i_regs->regmap,LOREG);
4515 signed char remainder=get_reg(i_regs->regmap,HIREG);
4516 assert(quotient>=0);
4517 assert(remainder>=0);
4518 emit_test(d2,d2);
4519 emit_jeq((int)out+44); // Division by zero
4520 emit_clz(d2,HOST_TEMPREG);
4521 emit_movimm(1<<31,quotient);
4522 emit_shl(d2,HOST_TEMPREG,d2);
4523 emit_mov(d1,remainder);
4524 emit_shr(quotient,HOST_TEMPREG,quotient);
4525 emit_cmp(remainder,d2);
4526 emit_subcs(remainder,d2,remainder);
4527 emit_adcs(quotient,quotient,quotient);
4528 emit_shrcc_imm(d2,1,d2);
4529 emit_jcc((int)out-16); // -4
4530 }
4531 }
4532 else // 64-bit
4533 {
4534 if(opcode2[i]==0x1C) // DMULT
4535 {
4536 assert(opcode2[i]!=0x1C);
4537 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4538 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4539 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4540 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4541 assert(m1h>=0);
4542 assert(m2h>=0);
4543 assert(m1l>=0);
4544 assert(m2l>=0);
4545 emit_pushreg(m2h);
4546 emit_pushreg(m2l);
4547 emit_pushreg(m1h);
4548 emit_pushreg(m1l);
4549 emit_call((int)&mult64);
4550 emit_popreg(m1l);
4551 emit_popreg(m1h);
4552 emit_popreg(m2l);
4553 emit_popreg(m2h);
4554 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4555 signed char hil=get_reg(i_regs->regmap,HIREG);
4556 if(hih>=0) emit_loadreg(HIREG|64,hih);
4557 if(hil>=0) emit_loadreg(HIREG,hil);
4558 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4559 signed char lol=get_reg(i_regs->regmap,LOREG);
4560 if(loh>=0) emit_loadreg(LOREG|64,loh);
4561 if(lol>=0) emit_loadreg(LOREG,lol);
4562 }
4563 if(opcode2[i]==0x1D) // DMULTU
4564 {
4565 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4566 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4567 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4568 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4569 assert(m1h>=0);
4570 assert(m2h>=0);
4571 assert(m1l>=0);
4572 assert(m2l>=0);
4573 save_regs(0x100f);
4574 if(m1l!=0) emit_mov(m1l,0);
4575 if(m1h==0) emit_readword((int)&dynarec_local,1);
4576 else if(m1h>1) emit_mov(m1h,1);
4577 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4578 else if(m2l>2) emit_mov(m2l,2);
4579 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4580 else if(m2h>3) emit_mov(m2h,3);
4581 emit_call((int)&multu64);
4582 restore_regs(0x100f);
4583 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4584 signed char hil=get_reg(i_regs->regmap,HIREG);
4585 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4586 signed char lol=get_reg(i_regs->regmap,LOREG);
4587 /*signed char temp=get_reg(i_regs->regmap,-1);
4588 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4589 signed char rl=get_reg(i_regs->regmap,HIREG);
4590 assert(m1h>=0);
4591 assert(m2h>=0);
4592 assert(m1l>=0);
4593 assert(m2l>=0);
4594 assert(temp>=0);
4595 //emit_mov(m1l,EAX);
4596 //emit_mul(m2l);
4597 emit_umull(rl,rh,m1l,m2l);
4598 emit_storereg(LOREG,rl);
4599 emit_mov(rh,temp);
4600 //emit_mov(m1h,EAX);
4601 //emit_mul(m2l);
4602 emit_umull(rl,rh,m1h,m2l);
4603 emit_adds(rl,temp,temp);
4604 emit_adcimm(rh,0,rh);
4605 emit_storereg(HIREG,rh);
4606 //emit_mov(m2h,EAX);
4607 //emit_mul(m1l);
4608 emit_umull(rl,rh,m1l,m2h);
4609 emit_adds(rl,temp,temp);
4610 emit_adcimm(rh,0,rh);
4611 emit_storereg(LOREG|64,temp);
4612 emit_mov(rh,temp);
4613 //emit_mov(m2h,EAX);
4614 //emit_mul(m1h);
4615 emit_umull(rl,rh,m1h,m2h);
4616 emit_adds(rl,temp,rl);
4617 emit_loadreg(HIREG,temp);
4618 emit_adcimm(rh,0,rh);
4619 emit_adds(rl,temp,rl);
4620 emit_adcimm(rh,0,rh);
4621 // DEBUG
4622 /*
4623 emit_pushreg(m2h);
4624 emit_pushreg(m2l);
4625 emit_pushreg(m1h);
4626 emit_pushreg(m1l);
4627 emit_call((int)&multu64);
4628 emit_popreg(m1l);
4629 emit_popreg(m1h);
4630 emit_popreg(m2l);
4631 emit_popreg(m2h);
4632 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4633 signed char hil=get_reg(i_regs->regmap,HIREG);
4634 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4635 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4636 */
4637 // Shouldn't be necessary
4638 //char loh=get_reg(i_regs->regmap,LOREG|64);
4639 //char lol=get_reg(i_regs->regmap,LOREG);
4640 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4641 //if(lol>=0) emit_loadreg(LOREG,lol);
4642 }
4643 if(opcode2[i]==0x1E) // DDIV
4644 {
4645 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4646 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4647 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4648 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4649 assert(d1h>=0);
4650 assert(d2h>=0);
4651 assert(d1l>=0);
4652 assert(d2l>=0);
4653 save_regs(0x100f);
4654 if(d1l!=0) emit_mov(d1l,0);
4655 if(d1h==0) emit_readword((int)&dynarec_local,1);
4656 else if(d1h>1) emit_mov(d1h,1);
4657 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4658 else if(d2l>2) emit_mov(d2l,2);
4659 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4660 else if(d2h>3) emit_mov(d2h,3);
4661 emit_call((int)&div64);
4662 restore_regs(0x100f);
4663 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4664 signed char hil=get_reg(i_regs->regmap,HIREG);
4665 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4666 signed char lol=get_reg(i_regs->regmap,LOREG);
4667 if(hih>=0) emit_loadreg(HIREG|64,hih);
4668 if(hil>=0) emit_loadreg(HIREG,hil);
4669 if(loh>=0) emit_loadreg(LOREG|64,loh);
4670 if(lol>=0) emit_loadreg(LOREG,lol);
4671 }
4672 if(opcode2[i]==0x1F) // DDIVU
4673 {
4674 //u_int hr,reglist=0;
4675 //for(hr=0;hr<HOST_REGS;hr++) {
4676 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4677 //}
4678 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4679 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4680 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4681 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4682 assert(d1h>=0);
4683 assert(d2h>=0);
4684 assert(d1l>=0);
4685 assert(d2l>=0);
4686 save_regs(0x100f);
4687 if(d1l!=0) emit_mov(d1l,0);
4688 if(d1h==0) emit_readword((int)&dynarec_local,1);
4689 else if(d1h>1) emit_mov(d1h,1);
4690 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4691 else if(d2l>2) emit_mov(d2l,2);
4692 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4693 else if(d2h>3) emit_mov(d2h,3);
4694 emit_call((int)&divu64);
4695 restore_regs(0x100f);
4696 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4697 signed char hil=get_reg(i_regs->regmap,HIREG);
4698 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4699 signed char lol=get_reg(i_regs->regmap,LOREG);
4700 if(hih>=0) emit_loadreg(HIREG|64,hih);
4701 if(hil>=0) emit_loadreg(HIREG,hil);
4702 if(loh>=0) emit_loadreg(LOREG|64,loh);
4703 if(lol>=0) emit_loadreg(LOREG,lol);
4704 }
4705 }
4706 }
4707 else
4708 {
4709 // Multiply by zero is zero.
4710 // MIPS does not have a divide by zero exception.
4711 // The result is undefined, we return zero.
4712 signed char hr=get_reg(i_regs->regmap,HIREG);
4713 signed char lr=get_reg(i_regs->regmap,LOREG);
4714 if(hr>=0) emit_zeroreg(hr);
4715 if(lr>=0) emit_zeroreg(lr);
4716 }
4717}
4718#define multdiv_assemble multdiv_assemble_arm
4719
4720void do_preload_rhash(int r) {
4721 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4722 // register. On ARM the hash can be done with a single instruction (below)
4723}
4724
4725void do_preload_rhtbl(int ht) {
4726 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4727}
4728
4729void do_rhash(int rs,int rh) {
4730 emit_andimm(rs,0xf8,rh);
4731}
4732
4733void do_miniht_load(int ht,int rh) {
4734 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4735 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4736}
4737
4738void do_miniht_jump(int rs,int rh,int ht) {
4739 emit_cmp(rh,rs);
4740 emit_ldreq_indexed(ht,4,15);
4741 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4742 emit_mov(rs,7);
4743 emit_jmp(jump_vaddr_reg[7]);
4744 #else
4745 emit_jmp(jump_vaddr_reg[rs]);
4746 #endif
4747}
4748
4749void do_miniht_insert(u_int return_address,int rt,int temp) {
4750 #ifdef ARMv5_ONLY
4751 emit_movimm(return_address,rt); // PC into link register
4752 add_to_linker((int)out,return_address,1);
4753 emit_pcreladdr(temp);
4754 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4755 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4756 #else
4757 emit_movw(return_address&0x0000FFFF,rt);
4758 add_to_linker((int)out,return_address,1);
4759 emit_pcreladdr(temp);
4760 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4761 emit_movt(return_address&0xFFFF0000,rt);
4762 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4763 #endif
4764}
4765
4766// Sign-extend to 64 bits and write out upper half of a register
4767// This is useful where we have a 32-bit value in a register, and want to
4768// keep it in a 32-bit register, but can't guarantee that it won't be read
4769// as a 64-bit value later.
4770void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4771{
24385cae 4772#ifndef FORCE32
57871462 4773 if(is32_pre==is32) return;
4774 int hr,reg;
4775 for(hr=0;hr<HOST_REGS;hr++) {
4776 if(hr!=EXCLUDE_REG) {
4777 //if(pre[hr]==entry[hr]) {
4778 if((reg=pre[hr])>=0) {
4779 if((dirty>>hr)&1) {
4780 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4781 emit_sarimm(hr,31,HOST_TEMPREG);
4782 emit_storereg(reg|64,HOST_TEMPREG);
4783 }
4784 }
4785 }
4786 //}
4787 }
4788 }
24385cae 4789#endif
57871462 4790}
4791
4792void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4793{
4794 //if(dirty_pre==dirty) return;
4795 int hr,reg,new_hr;
4796 for(hr=0;hr<HOST_REGS;hr++) {
4797 if(hr!=EXCLUDE_REG) {
4798 reg=pre[hr];
4799 if(((~u)>>(reg&63))&1) {
4800 if(reg==entry[hr]||(reg>0&&entry[hr]<0)) {
4801 if(((dirty_pre&~dirty)>>hr)&1) {
4802 if(reg>0&&reg<34) {
4803 emit_storereg(reg,hr);
4804 if( ((is32_pre&~uu)>>reg)&1 ) {
4805 emit_sarimm(hr,31,HOST_TEMPREG);
4806 emit_storereg(reg|64,HOST_TEMPREG);
4807 }
4808 }
4809 else if(reg>=64) {
4810 emit_storereg(reg,hr);
4811 }
4812 }
4813 }
4814 else // Check if register moved to a different register
4815 if((new_hr=get_reg(entry,reg))>=0) {
4816 if((dirty_pre>>hr)&(~dirty>>new_hr)&1) {
4817 if(reg>0&&reg<34) {
4818 emit_storereg(reg,hr);
4819 if( ((is32_pre&~uu)>>reg)&1 ) {
4820 emit_sarimm(hr,31,HOST_TEMPREG);
4821 emit_storereg(reg|64,HOST_TEMPREG);
4822 }
4823 }
4824 else if(reg>=64) {
4825 emit_storereg(reg,hr);
4826 }
4827 }
4828 }
4829 }
4830 }
4831 }
4832}
4833
4834
4835/* using strd could possibly help but you'd have to allocate registers in pairs
4836void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4837{
4838 int hr;
4839 int wrote=-1;
4840 for(hr=HOST_REGS-1;hr>=0;hr--) {
4841 if(hr!=EXCLUDE_REG) {
4842 if(pre[hr]!=entry[hr]) {
4843 if(pre[hr]>=0) {
4844 if((dirty>>hr)&1) {
4845 if(get_reg(entry,pre[hr])<0) {
4846 if(pre[hr]<64) {
4847 if(!((u>>pre[hr])&1)) {
4848 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4849 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4850 emit_sarimm(hr,31,hr+1);
4851 emit_strdreg(pre[hr],hr);
4852 }
4853 else
4854 emit_storereg(pre[hr],hr);
4855 }else{
4856 emit_storereg(pre[hr],hr);
4857 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4858 emit_sarimm(hr,31,hr);
4859 emit_storereg(pre[hr]|64,hr);
4860 }
4861 }
4862 }
4863 }else{
4864 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4865 emit_storereg(pre[hr],hr);
4866 }
4867 }
4868 wrote=hr;
4869 }
4870 }
4871 }
4872 }
4873 }
4874 }
4875 for(hr=0;hr<HOST_REGS;hr++) {
4876 if(hr!=EXCLUDE_REG) {
4877 if(pre[hr]!=entry[hr]) {
4878 if(pre[hr]>=0) {
4879 int nr;
4880 if((nr=get_reg(entry,pre[hr]))>=0) {
4881 emit_mov(hr,nr);
4882 }
4883 }
4884 }
4885 }
4886 }
4887}
4888#define wb_invalidate wb_invalidate_arm
4889*/
4890
dd3a91a1 4891// Clearing the cache is rather slow on ARM Linux, so mark the areas
4892// that need to be cleared, and then only clear these areas once.
4893void do_clear_cache()
4894{
4895 int i,j;
4896 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4897 {
4898 u_int bitmap=needs_clear_cache[i];
4899 if(bitmap) {
4900 u_int start,end;
4901 for(j=0;j<32;j++)
4902 {
4903 if(bitmap&(1<<j)) {
4904 start=BASE_ADDR+i*131072+j*4096;
4905 end=start+4095;
4906 j++;
4907 while(j<32) {
4908 if(bitmap&(1<<j)) {
4909 end+=4096;
4910 j++;
4911 }else{
4912 __clear_cache((void *)start,(void *)end);
4913 break;
4914 }
4915 }
4916 }
4917 }
4918 needs_clear_cache[i]=0;
4919 }
4920 }
4921}
4922
57871462 4923// CPU-architecture-specific initialization
4924void arch_init() {
3d624f89 4925#ifndef DISABLE_COP1
57871462 4926 rounding_modes[0]=0x0<<22; // round
4927 rounding_modes[1]=0x3<<22; // trunc
4928 rounding_modes[2]=0x1<<22; // ceil
4929 rounding_modes[3]=0x2<<22; // floor
3d624f89 4930#endif
57871462 4931}
b9b61529 4932
4933// vim:shiftwidth=2:expandtab