cdrom: remove play pregap hack
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
0bbd1454 69void invalidate_addr_r0();
70void invalidate_addr_r1();
71void invalidate_addr_r2();
72void invalidate_addr_r3();
73void invalidate_addr_r4();
74void invalidate_addr_r5();
75void invalidate_addr_r6();
76void invalidate_addr_r7();
77void invalidate_addr_r8();
78void invalidate_addr_r9();
79void invalidate_addr_r10();
80void invalidate_addr_r12();
81
82const u_int invalidate_addr_reg[16] = {
83 (int)invalidate_addr_r0,
84 (int)invalidate_addr_r1,
85 (int)invalidate_addr_r2,
86 (int)invalidate_addr_r3,
87 (int)invalidate_addr_r4,
88 (int)invalidate_addr_r5,
89 (int)invalidate_addr_r6,
90 (int)invalidate_addr_r7,
91 (int)invalidate_addr_r8,
92 (int)invalidate_addr_r9,
93 (int)invalidate_addr_r10,
94 0,
95 (int)invalidate_addr_r12,
96 0,
97 0,
98 0};
99
57871462 100#include "fpu.h"
101
dd3a91a1 102unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
103
57871462 104/* Linker */
105
106void set_jump_target(int addr,u_int target)
107{
108 u_char *ptr=(u_char *)addr;
109 u_int *ptr2=(u_int *)ptr;
110 if(ptr[3]==0xe2) {
111 assert((target-(u_int)ptr2-8)<1024);
112 assert((addr&3)==0);
113 assert((target&3)==0);
114 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
115 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
116 }
117 else if(ptr[3]==0x72) {
118 // generated by emit_jno_unlikely
119 if((target-(u_int)ptr2-8)<1024) {
120 assert((addr&3)==0);
121 assert((target&3)==0);
122 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
123 }
124 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
125 assert((addr&3)==0);
126 assert((target&3)==0);
127 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
128 }
129 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
130 }
131 else {
132 assert((ptr[3]&0x0e)==0xa);
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137// This optionally copies the instruction from the target of the branch into
138// the space before the branch. Works, but the difference in speed is
139// usually insignificant.
140void set_jump_target_fillslot(int addr,u_int target,int copy)
141{
142 u_char *ptr=(u_char *)addr;
143 u_int *ptr2=(u_int *)ptr;
144 assert(!copy||ptr2[-1]==0xe28dd000);
145 if(ptr[3]==0xe2) {
146 assert(!copy);
147 assert((target-(u_int)ptr2-8)<4096);
148 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
149 }
150 else {
151 assert((ptr[3]&0x0e)==0xa);
152 u_int target_insn=*(u_int *)target;
153 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
154 copy=0;
155 }
156 if((target_insn&0x0c100000)==0x04100000) { // Load
157 copy=0;
158 }
159 if(target_insn&0x08000000) {
160 copy=0;
161 }
162 if(copy) {
163 ptr2[-1]=target_insn;
164 target+=4;
165 }
166 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
167 }
168}
169
170/* Literal pool */
171add_literal(int addr,int val)
172{
173 literals[literalcount][0]=addr;
174 literals[literalcount][1]=val;
175 literalcount++;
176}
177
f76eeef9 178void *kill_pointer(void *stub)
57871462 179{
180 int *ptr=(int *)(stub+4);
181 assert((*ptr&0x0ff00000)==0x05900000);
182 u_int offset=*ptr&0xfff;
183 int **l_ptr=(void *)ptr+offset+8;
184 int *i_ptr=*l_ptr;
185 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 186 return i_ptr;
57871462 187}
188
189int get_pointer(void *stub)
190{
191 //printf("get_pointer(%x)\n",(int)stub);
192 int *ptr=(int *)(stub+4);
193 assert((*ptr&0x0ff00000)==0x05900000);
194 u_int offset=*ptr&0xfff;
195 int **l_ptr=(void *)ptr+offset+8;
196 int *i_ptr=*l_ptr;
197 assert((*i_ptr&0x0f000000)==0x0a000000);
198 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
199}
200
201// Find the "clean" entry point from a "dirty" entry point
202// by skipping past the call to verify_code
203u_int get_clean_addr(int addr)
204{
205 int *ptr=(int *)addr;
206 #ifdef ARMv5_ONLY
207 ptr+=4;
208 #else
209 ptr+=6;
210 #endif
211 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
212 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
213 ptr++;
214 if((*ptr&0xFF000000)==0xea000000) {
215 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
216 }
217 return (u_int)ptr;
218}
219
220int verify_dirty(int addr)
221{
222 u_int *ptr=(u_int *)addr;
223 #ifdef ARMv5_ONLY
224 // get from literal pool
225 assert((*ptr&0xFFF00000)==0xe5900000);
226 u_int offset=*ptr&0xfff;
227 u_int *l_ptr=(void *)ptr+offset+8;
228 u_int source=l_ptr[0];
229 u_int copy=l_ptr[1];
230 u_int len=l_ptr[2];
231 ptr+=4;
232 #else
233 // ARMv7 movw/movt
234 assert((*ptr&0xFFF00000)==0xe3000000);
235 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
236 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
237 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
238 ptr+=6;
239 #endif
240 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
241 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 242 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 243 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
244 unsigned int page=source>>12;
245 unsigned int map_value=memory_map[page];
246 if(map_value>=0x80000000) return 0;
247 while(page<((source+len-1)>>12)) {
248 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
249 }
250 source = source+(map_value<<2);
251 }
252 //printf("verify_dirty: %x %x %x\n",source,copy,len);
253 return !memcmp((void *)source,(void *)copy,len);
254}
255
256// This doesn't necessarily find all clean entry points, just
257// guarantees that it's not dirty
258int isclean(int addr)
259{
260 #ifdef ARMv5_ONLY
261 int *ptr=((u_int *)addr)+4;
262 #else
263 int *ptr=((u_int *)addr)+6;
264 #endif
265 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
266 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
267 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
268 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
269 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
270 return 1;
271}
272
273void get_bounds(int addr,u_int *start,u_int *end)
274{
275 u_int *ptr=(u_int *)addr;
276 #ifdef ARMv5_ONLY
277 // get from literal pool
278 assert((*ptr&0xFFF00000)==0xe5900000);
279 u_int offset=*ptr&0xfff;
280 u_int *l_ptr=(void *)ptr+offset+8;
281 u_int source=l_ptr[0];
282 //u_int copy=l_ptr[1];
283 u_int len=l_ptr[2];
284 ptr+=4;
285 #else
286 // ARMv7 movw/movt
287 assert((*ptr&0xFFF00000)==0xe3000000);
288 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
289 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
290 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
291 ptr+=6;
292 #endif
293 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
294 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 295 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 296 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
297 if(memory_map[source>>12]>=0x80000000) source = 0;
298 else source = source+(memory_map[source>>12]<<2);
299 }
300 *start=source;
301 *end=source+len;
302}
303
304/* Register allocation */
305
306// Note: registers are allocated clean (unmodified state)
307// if you intend to modify the register, you must call dirty_reg().
308void alloc_reg(struct regstat *cur,int i,signed char reg)
309{
310 int r,hr;
311 int preferred_reg = (reg&7);
312 if(reg==CCREG) preferred_reg=HOST_CCREG;
313 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
314
315 // Don't allocate unused registers
316 if((cur->u>>reg)&1) return;
317
318 // see if it's already allocated
319 for(hr=0;hr<HOST_REGS;hr++)
320 {
321 if(cur->regmap[hr]==reg) return;
322 }
323
324 // Keep the same mapping if the register was already allocated in a loop
325 preferred_reg = loop_reg(i,reg,preferred_reg);
326
327 // Try to allocate the preferred register
328 if(cur->regmap[preferred_reg]==-1) {
329 cur->regmap[preferred_reg]=reg;
330 cur->dirty&=~(1<<preferred_reg);
331 cur->isconst&=~(1<<preferred_reg);
332 return;
333 }
334 r=cur->regmap[preferred_reg];
335 if(r<64&&((cur->u>>r)&1)) {
336 cur->regmap[preferred_reg]=reg;
337 cur->dirty&=~(1<<preferred_reg);
338 cur->isconst&=~(1<<preferred_reg);
339 return;
340 }
341 if(r>=64&&((cur->uu>>(r&63))&1)) {
342 cur->regmap[preferred_reg]=reg;
343 cur->dirty&=~(1<<preferred_reg);
344 cur->isconst&=~(1<<preferred_reg);
345 return;
346 }
347
348 // Clear any unneeded registers
349 // We try to keep the mapping consistent, if possible, because it
350 // makes branches easier (especially loops). So we try to allocate
351 // first (see above) before removing old mappings. If this is not
352 // possible then go ahead and clear out the registers that are no
353 // longer needed.
354 for(hr=0;hr<HOST_REGS;hr++)
355 {
356 r=cur->regmap[hr];
357 if(r>=0) {
358 if(r<64) {
359 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
360 }
361 else
362 {
363 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
364 }
365 }
366 }
367 // Try to allocate any available register, but prefer
368 // registers that have not been used recently.
369 if(i>0) {
370 for(hr=0;hr<HOST_REGS;hr++) {
371 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
372 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
373 cur->regmap[hr]=reg;
374 cur->dirty&=~(1<<hr);
375 cur->isconst&=~(1<<hr);
376 return;
377 }
378 }
379 }
380 }
381 // Try to allocate any available register
382 for(hr=0;hr<HOST_REGS;hr++) {
383 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
384 cur->regmap[hr]=reg;
385 cur->dirty&=~(1<<hr);
386 cur->isconst&=~(1<<hr);
387 return;
388 }
389 }
390
391 // Ok, now we have to evict someone
392 // Pick a register we hopefully won't need soon
393 u_char hsn[MAXREG+1];
394 memset(hsn,10,sizeof(hsn));
395 int j;
396 lsn(hsn,i,&preferred_reg);
397 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
398 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
399 if(i>0) {
400 // Don't evict the cycle count at entry points, otherwise the entry
401 // stub will have to write it.
402 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
403 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
404 for(j=10;j>=3;j--)
405 {
406 // Alloc preferred register if available
407 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
408 for(hr=0;hr<HOST_REGS;hr++) {
409 // Evict both parts of a 64-bit register
410 if((cur->regmap[hr]&63)==r) {
411 cur->regmap[hr]=-1;
412 cur->dirty&=~(1<<hr);
413 cur->isconst&=~(1<<hr);
414 }
415 }
416 cur->regmap[preferred_reg]=reg;
417 return;
418 }
419 for(r=1;r<=MAXREG;r++)
420 {
421 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
422 for(hr=0;hr<HOST_REGS;hr++) {
423 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
424 if(cur->regmap[hr]==r+64) {
425 cur->regmap[hr]=reg;
426 cur->dirty&=~(1<<hr);
427 cur->isconst&=~(1<<hr);
428 return;
429 }
430 }
431 }
432 for(hr=0;hr<HOST_REGS;hr++) {
433 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
434 if(cur->regmap[hr]==r) {
435 cur->regmap[hr]=reg;
436 cur->dirty&=~(1<<hr);
437 cur->isconst&=~(1<<hr);
438 return;
439 }
440 }
441 }
442 }
443 }
444 }
445 }
446 for(j=10;j>=0;j--)
447 {
448 for(r=1;r<=MAXREG;r++)
449 {
450 if(hsn[r]==j) {
451 for(hr=0;hr<HOST_REGS;hr++) {
452 if(cur->regmap[hr]==r+64) {
453 cur->regmap[hr]=reg;
454 cur->dirty&=~(1<<hr);
455 cur->isconst&=~(1<<hr);
456 return;
457 }
458 }
459 for(hr=0;hr<HOST_REGS;hr++) {
460 if(cur->regmap[hr]==r) {
461 cur->regmap[hr]=reg;
462 cur->dirty&=~(1<<hr);
463 cur->isconst&=~(1<<hr);
464 return;
465 }
466 }
467 }
468 }
469 }
470 printf("This shouldn't happen (alloc_reg)");exit(1);
471}
472
473void alloc_reg64(struct regstat *cur,int i,signed char reg)
474{
475 int preferred_reg = 8+(reg&1);
476 int r,hr;
477
478 // allocate the lower 32 bits
479 alloc_reg(cur,i,reg);
480
481 // Don't allocate unused registers
482 if((cur->uu>>reg)&1) return;
483
484 // see if the upper half is already allocated
485 for(hr=0;hr<HOST_REGS;hr++)
486 {
487 if(cur->regmap[hr]==reg+64) return;
488 }
489
490 // Keep the same mapping if the register was already allocated in a loop
491 preferred_reg = loop_reg(i,reg,preferred_reg);
492
493 // Try to allocate the preferred register
494 if(cur->regmap[preferred_reg]==-1) {
495 cur->regmap[preferred_reg]=reg|64;
496 cur->dirty&=~(1<<preferred_reg);
497 cur->isconst&=~(1<<preferred_reg);
498 return;
499 }
500 r=cur->regmap[preferred_reg];
501 if(r<64&&((cur->u>>r)&1)) {
502 cur->regmap[preferred_reg]=reg|64;
503 cur->dirty&=~(1<<preferred_reg);
504 cur->isconst&=~(1<<preferred_reg);
505 return;
506 }
507 if(r>=64&&((cur->uu>>(r&63))&1)) {
508 cur->regmap[preferred_reg]=reg|64;
509 cur->dirty&=~(1<<preferred_reg);
510 cur->isconst&=~(1<<preferred_reg);
511 return;
512 }
513
514 // Clear any unneeded registers
515 // We try to keep the mapping consistent, if possible, because it
516 // makes branches easier (especially loops). So we try to allocate
517 // first (see above) before removing old mappings. If this is not
518 // possible then go ahead and clear out the registers that are no
519 // longer needed.
520 for(hr=HOST_REGS-1;hr>=0;hr--)
521 {
522 r=cur->regmap[hr];
523 if(r>=0) {
524 if(r<64) {
525 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
526 }
527 else
528 {
529 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
530 }
531 }
532 }
533 // Try to allocate any available register, but prefer
534 // registers that have not been used recently.
535 if(i>0) {
536 for(hr=0;hr<HOST_REGS;hr++) {
537 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
538 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
539 cur->regmap[hr]=reg|64;
540 cur->dirty&=~(1<<hr);
541 cur->isconst&=~(1<<hr);
542 return;
543 }
544 }
545 }
546 }
547 // Try to allocate any available register
548 for(hr=0;hr<HOST_REGS;hr++) {
549 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
550 cur->regmap[hr]=reg|64;
551 cur->dirty&=~(1<<hr);
552 cur->isconst&=~(1<<hr);
553 return;
554 }
555 }
556
557 // Ok, now we have to evict someone
558 // Pick a register we hopefully won't need soon
559 u_char hsn[MAXREG+1];
560 memset(hsn,10,sizeof(hsn));
561 int j;
562 lsn(hsn,i,&preferred_reg);
563 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
564 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
565 if(i>0) {
566 // Don't evict the cycle count at entry points, otherwise the entry
567 // stub will have to write it.
568 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
569 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
570 for(j=10;j>=3;j--)
571 {
572 // Alloc preferred register if available
573 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
574 for(hr=0;hr<HOST_REGS;hr++) {
575 // Evict both parts of a 64-bit register
576 if((cur->regmap[hr]&63)==r) {
577 cur->regmap[hr]=-1;
578 cur->dirty&=~(1<<hr);
579 cur->isconst&=~(1<<hr);
580 }
581 }
582 cur->regmap[preferred_reg]=reg|64;
583 return;
584 }
585 for(r=1;r<=MAXREG;r++)
586 {
587 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
588 for(hr=0;hr<HOST_REGS;hr++) {
589 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
590 if(cur->regmap[hr]==r+64) {
591 cur->regmap[hr]=reg|64;
592 cur->dirty&=~(1<<hr);
593 cur->isconst&=~(1<<hr);
594 return;
595 }
596 }
597 }
598 for(hr=0;hr<HOST_REGS;hr++) {
599 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
600 if(cur->regmap[hr]==r) {
601 cur->regmap[hr]=reg|64;
602 cur->dirty&=~(1<<hr);
603 cur->isconst&=~(1<<hr);
604 return;
605 }
606 }
607 }
608 }
609 }
610 }
611 }
612 for(j=10;j>=0;j--)
613 {
614 for(r=1;r<=MAXREG;r++)
615 {
616 if(hsn[r]==j) {
617 for(hr=0;hr<HOST_REGS;hr++) {
618 if(cur->regmap[hr]==r+64) {
619 cur->regmap[hr]=reg|64;
620 cur->dirty&=~(1<<hr);
621 cur->isconst&=~(1<<hr);
622 return;
623 }
624 }
625 for(hr=0;hr<HOST_REGS;hr++) {
626 if(cur->regmap[hr]==r) {
627 cur->regmap[hr]=reg|64;
628 cur->dirty&=~(1<<hr);
629 cur->isconst&=~(1<<hr);
630 return;
631 }
632 }
633 }
634 }
635 }
636 printf("This shouldn't happen");exit(1);
637}
638
639// Allocate a temporary register. This is done without regard to
640// dirty status or whether the register we request is on the unneeded list
641// Note: This will only allocate one register, even if called multiple times
642void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
643{
644 int r,hr;
645 int preferred_reg = -1;
646
647 // see if it's already allocated
648 for(hr=0;hr<HOST_REGS;hr++)
649 {
650 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
651 }
652
653 // Try to allocate any available register
654 for(hr=HOST_REGS-1;hr>=0;hr--) {
655 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
656 cur->regmap[hr]=reg;
657 cur->dirty&=~(1<<hr);
658 cur->isconst&=~(1<<hr);
659 return;
660 }
661 }
662
663 // Find an unneeded register
664 for(hr=HOST_REGS-1;hr>=0;hr--)
665 {
666 r=cur->regmap[hr];
667 if(r>=0) {
668 if(r<64) {
669 if((cur->u>>r)&1) {
670 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
671 cur->regmap[hr]=reg;
672 cur->dirty&=~(1<<hr);
673 cur->isconst&=~(1<<hr);
674 return;
675 }
676 }
677 }
678 else
679 {
680 if((cur->uu>>(r&63))&1) {
681 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
682 cur->regmap[hr]=reg;
683 cur->dirty&=~(1<<hr);
684 cur->isconst&=~(1<<hr);
685 return;
686 }
687 }
688 }
689 }
690 }
691
692 // Ok, now we have to evict someone
693 // Pick a register we hopefully won't need soon
694 // TODO: we might want to follow unconditional jumps here
695 // TODO: get rid of dupe code and make this into a function
696 u_char hsn[MAXREG+1];
697 memset(hsn,10,sizeof(hsn));
698 int j;
699 lsn(hsn,i,&preferred_reg);
700 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
701 if(i>0) {
702 // Don't evict the cycle count at entry points, otherwise the entry
703 // stub will have to write it.
704 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
705 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
706 for(j=10;j>=3;j--)
707 {
708 for(r=1;r<=MAXREG;r++)
709 {
710 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
711 for(hr=0;hr<HOST_REGS;hr++) {
712 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
713 if(cur->regmap[hr]==r+64) {
714 cur->regmap[hr]=reg;
715 cur->dirty&=~(1<<hr);
716 cur->isconst&=~(1<<hr);
717 return;
718 }
719 }
720 }
721 for(hr=0;hr<HOST_REGS;hr++) {
722 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
723 if(cur->regmap[hr]==r) {
724 cur->regmap[hr]=reg;
725 cur->dirty&=~(1<<hr);
726 cur->isconst&=~(1<<hr);
727 return;
728 }
729 }
730 }
731 }
732 }
733 }
734 }
735 for(j=10;j>=0;j--)
736 {
737 for(r=1;r<=MAXREG;r++)
738 {
739 if(hsn[r]==j) {
740 for(hr=0;hr<HOST_REGS;hr++) {
741 if(cur->regmap[hr]==r+64) {
742 cur->regmap[hr]=reg;
743 cur->dirty&=~(1<<hr);
744 cur->isconst&=~(1<<hr);
745 return;
746 }
747 }
748 for(hr=0;hr<HOST_REGS;hr++) {
749 if(cur->regmap[hr]==r) {
750 cur->regmap[hr]=reg;
751 cur->dirty&=~(1<<hr);
752 cur->isconst&=~(1<<hr);
753 return;
754 }
755 }
756 }
757 }
758 }
759 printf("This shouldn't happen");exit(1);
760}
761// Allocate a specific ARM register.
762void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
763{
764 int n;
765
766 // see if it's already allocated (and dealloc it)
767 for(n=0;n<HOST_REGS;n++)
768 {
769 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
770 }
771
772 cur->regmap[hr]=reg;
773 cur->dirty&=~(1<<hr);
774 cur->isconst&=~(1<<hr);
775}
776
777// Alloc cycle count into dedicated register
778alloc_cc(struct regstat *cur,int i)
779{
780 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
781}
782
783/* Special alloc */
784
785
786/* Assembler */
787
788char regname[16][4] = {
789 "r0",
790 "r1",
791 "r2",
792 "r3",
793 "r4",
794 "r5",
795 "r6",
796 "r7",
797 "r8",
798 "r9",
799 "r10",
800 "fp",
801 "r12",
802 "sp",
803 "lr",
804 "pc"};
805
806void output_byte(u_char byte)
807{
808 *(out++)=byte;
809}
810void output_modrm(u_char mod,u_char rm,u_char ext)
811{
812 assert(mod<4);
813 assert(rm<8);
814 assert(ext<8);
815 u_char byte=(mod<<6)|(ext<<3)|rm;
816 *(out++)=byte;
817}
818void output_sib(u_char scale,u_char index,u_char base)
819{
820 assert(scale<4);
821 assert(index<8);
822 assert(base<8);
823 u_char byte=(scale<<6)|(index<<3)|base;
824 *(out++)=byte;
825}
826void output_w32(u_int word)
827{
828 *((u_int *)out)=word;
829 out+=4;
830}
831u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
832{
833 assert(rd<16);
834 assert(rn<16);
835 assert(rm<16);
836 return((rn<<16)|(rd<<12)|rm);
837}
838u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
839{
840 assert(rd<16);
841 assert(rn<16);
842 assert(imm<256);
843 assert((shift&1)==0);
844 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
845}
846u_int genimm(u_int imm,u_int *encoded)
847{
c2e3bd42 848 *encoded=0;
849 if(imm==0) return 1;
57871462 850 int i=32;
851 while(i>0)
852 {
853 if(imm<256) {
854 *encoded=((i&30)<<7)|imm;
855 return 1;
856 }
857 imm=(imm>>2)|(imm<<30);i-=2;
858 }
859 return 0;
860}
cfbd3c6e 861void genimm_checked(u_int imm,u_int *encoded)
862{
863 u_int ret=genimm(imm,encoded);
864 assert(ret);
865}
57871462 866u_int genjmp(u_int addr)
867{
868 int offset=addr-(int)out-8;
e80343e2 869 if(offset<-33554432||offset>=33554432) {
870 if (addr>2) {
871 printf("genjmp: out of range: %08x\n", offset);
872 exit(1);
873 }
874 return 0;
875 }
57871462 876 return ((u_int)offset>>2)&0xffffff;
877}
878
879void emit_mov(int rs,int rt)
880{
881 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
882 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
883}
884
885void emit_movs(int rs,int rt)
886{
887 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
888 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
889}
890
891void emit_add(int rs1,int rs2,int rt)
892{
893 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
894 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
895}
896
897void emit_adds(int rs1,int rs2,int rt)
898{
899 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
900 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
901}
902
903void emit_adcs(int rs1,int rs2,int rt)
904{
905 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
906 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
907}
908
909void emit_sbc(int rs1,int rs2,int rt)
910{
911 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
912 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
913}
914
915void emit_sbcs(int rs1,int rs2,int rt)
916{
917 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
918 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
919}
920
921void emit_neg(int rs, int rt)
922{
923 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
924 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
925}
926
927void emit_negs(int rs, int rt)
928{
929 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
930 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
931}
932
933void emit_sub(int rs1,int rs2,int rt)
934{
935 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
936 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
937}
938
939void emit_subs(int rs1,int rs2,int rt)
940{
941 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
942 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
943}
944
945void emit_zeroreg(int rt)
946{
947 assem_debug("mov %s,#0\n",regname[rt]);
948 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
949}
950
790ee18e 951void emit_loadlp(u_int imm,u_int rt)
952{
953 add_literal((int)out,imm);
954 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
955 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
956}
957void emit_movw(u_int imm,u_int rt)
958{
959 assert(imm<65536);
960 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
961 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
962}
963void emit_movt(u_int imm,u_int rt)
964{
965 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
966 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
967}
968void emit_movimm(u_int imm,u_int rt)
969{
970 u_int armval;
971 if(genimm(imm,&armval)) {
972 assem_debug("mov %s,#%d\n",regname[rt],imm);
973 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
974 }else if(genimm(~imm,&armval)) {
975 assem_debug("mvn %s,#%d\n",regname[rt],imm);
976 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
977 }else if(imm<65536) {
978 #ifdef ARMv5_ONLY
979 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
980 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
981 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
982 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
983 #else
984 emit_movw(imm,rt);
985 #endif
986 }else{
987 #ifdef ARMv5_ONLY
988 emit_loadlp(imm,rt);
989 #else
990 emit_movw(imm&0x0000FFFF,rt);
991 emit_movt(imm&0xFFFF0000,rt);
992 #endif
993 }
994}
995void emit_pcreladdr(u_int rt)
996{
997 assem_debug("add %s,pc,#?\n",regname[rt]);
998 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
999}
1000
57871462 1001void emit_loadreg(int r, int hr)
1002{
3d624f89 1003#ifdef FORCE32
1004 if(r&64) {
1005 printf("64bit load in 32bit mode!\n");
7f2607ea 1006 assert(0);
1007 return;
3d624f89 1008 }
1009#endif
57871462 1010 if((r&63)==0)
1011 emit_zeroreg(hr);
1012 else {
3d624f89 1013 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1014 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1015 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1016 if(r==CCREG) addr=(int)&cycle_count;
1017 if(r==CSREG) addr=(int)&Status;
1018 if(r==FSREG) addr=(int)&FCR31;
1019 if(r==INVCP) addr=(int)&invc_ptr;
1020 u_int offset = addr-(u_int)&dynarec_local;
1021 assert(offset<4096);
1022 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1023 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1024 }
1025}
1026void emit_storereg(int r, int hr)
1027{
3d624f89 1028#ifdef FORCE32
1029 if(r&64) {
1030 printf("64bit store in 32bit mode!\n");
7f2607ea 1031 assert(0);
1032 return;
3d624f89 1033 }
1034#endif
1035 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1036 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1037 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1038 if(r==CCREG) addr=(int)&cycle_count;
1039 if(r==FSREG) addr=(int)&FCR31;
1040 u_int offset = addr-(u_int)&dynarec_local;
1041 assert(offset<4096);
1042 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1043 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1044}
1045
1046void emit_test(int rs, int rt)
1047{
1048 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1049 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1050}
1051
1052void emit_testimm(int rs,int imm)
1053{
1054 u_int armval;
1055 assem_debug("tst %s,$%d\n",regname[rs],imm);
cfbd3c6e 1056 genimm_checked(imm,&armval);
57871462 1057 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1058}
1059
b9b61529 1060void emit_testeqimm(int rs,int imm)
1061{
1062 u_int armval;
1063 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1064 genimm_checked(imm,&armval);
b9b61529 1065 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1066}
1067
57871462 1068void emit_not(int rs,int rt)
1069{
1070 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1071 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1072}
1073
b9b61529 1074void emit_mvnmi(int rs,int rt)
1075{
1076 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1077 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1078}
1079
57871462 1080void emit_and(u_int rs1,u_int rs2,u_int rt)
1081{
1082 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1083 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1084}
1085
1086void emit_or(u_int rs1,u_int rs2,u_int rt)
1087{
1088 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1089 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1090}
1091void emit_or_and_set_flags(int rs1,int rs2,int rt)
1092{
1093 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1094 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1095}
1096
f70d384d 1097void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1098{
1099 assert(rs<16);
1100 assert(rt<16);
1101 assert(imm<32);
1102 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1103 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1104}
1105
576bbd8f 1106void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1107{
1108 assert(rs<16);
1109 assert(rt<16);
1110 assert(imm<32);
1111 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1112 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1113}
1114
57871462 1115void emit_xor(u_int rs1,u_int rs2,u_int rt)
1116{
1117 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1118 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1119}
1120
57871462 1121void emit_addimm(u_int rs,int imm,u_int rt)
1122{
1123 assert(rs<16);
1124 assert(rt<16);
1125 if(imm!=0) {
1126 assert(imm>-65536&&imm<65536);
1127 u_int armval;
1128 if(genimm(imm,&armval)) {
1129 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1130 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1131 }else if(genimm(-imm,&armval)) {
1132 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1133 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1134 }else if(imm<0) {
1135 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1136 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1137 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1138 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1139 }else{
1140 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1141 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1142 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1143 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1144 }
1145 }
1146 else if(rs!=rt) emit_mov(rs,rt);
1147}
1148
1149void emit_addimm_and_set_flags(int imm,int rt)
1150{
1151 assert(imm>-65536&&imm<65536);
1152 u_int armval;
1153 if(genimm(imm,&armval)) {
1154 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1155 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1156 }else if(genimm(-imm,&armval)) {
1157 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1158 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1159 }else if(imm<0) {
1160 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1161 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1162 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1163 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1164 }else{
1165 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1166 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1167 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1168 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1169 }
1170}
1171void emit_addimm_no_flags(u_int imm,u_int rt)
1172{
1173 emit_addimm(rt,imm,rt);
1174}
1175
1176void emit_addnop(u_int r)
1177{
1178 assert(r<16);
1179 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1180 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1181}
1182
1183void emit_adcimm(u_int rs,int imm,u_int rt)
1184{
1185 u_int armval;
cfbd3c6e 1186 genimm_checked(imm,&armval);
57871462 1187 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1188 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1189}
1190/*void emit_sbcimm(int imm,u_int rt)
1191{
1192 u_int armval;
cfbd3c6e 1193 genimm_checked(imm,&armval);
57871462 1194 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1195 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1196}*/
1197void emit_sbbimm(int imm,u_int rt)
1198{
1199 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1200 assert(rt<8);
1201 if(imm<128&&imm>=-128) {
1202 output_byte(0x83);
1203 output_modrm(3,rt,3);
1204 output_byte(imm);
1205 }
1206 else
1207 {
1208 output_byte(0x81);
1209 output_modrm(3,rt,3);
1210 output_w32(imm);
1211 }
1212}
1213void emit_rscimm(int rs,int imm,u_int rt)
1214{
1215 assert(0);
1216 u_int armval;
cfbd3c6e 1217 genimm_checked(imm,&armval);
57871462 1218 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1219 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1220}
1221
1222void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1223{
1224 // TODO: if(genimm(imm,&armval)) ...
1225 // else
1226 emit_movimm(imm,HOST_TEMPREG);
1227 emit_adds(HOST_TEMPREG,rsl,rtl);
1228 emit_adcimm(rsh,0,rth);
1229}
1230
1231void emit_sbb(int rs1,int rs2)
1232{
1233 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1234 output_byte(0x19);
1235 output_modrm(3,rs1,rs2);
1236}
1237
1238void emit_andimm(int rs,int imm,int rt)
1239{
1240 u_int armval;
790ee18e 1241 if(imm==0) {
1242 emit_zeroreg(rt);
1243 }else if(genimm(imm,&armval)) {
57871462 1244 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1245 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1246 }else if(genimm(~imm,&armval)) {
1247 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1248 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1249 }else if(imm==65535) {
1250 #ifdef ARMv5_ONLY
1251 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1252 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1253 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1254 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1255 #else
1256 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1257 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1258 #endif
1259 }else{
1260 assert(imm>0&&imm<65535);
1261 #ifdef ARMv5_ONLY
1262 assem_debug("mov r14,#%d\n",imm&0xFF00);
1263 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1264 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1265 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1266 #else
1267 emit_movw(imm,HOST_TEMPREG);
1268 #endif
1269 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1270 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1271 }
1272}
1273
1274void emit_orimm(int rs,int imm,int rt)
1275{
1276 u_int armval;
790ee18e 1277 if(imm==0) {
1278 if(rs!=rt) emit_mov(rs,rt);
1279 }else if(genimm(imm,&armval)) {
57871462 1280 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1281 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1282 }else{
1283 assert(imm>0&&imm<65536);
1284 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1285 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1286 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1287 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1288 }
1289}
1290
1291void emit_xorimm(int rs,int imm,int rt)
1292{
57871462 1293 u_int armval;
790ee18e 1294 if(imm==0) {
1295 if(rs!=rt) emit_mov(rs,rt);
1296 }else if(genimm(imm,&armval)) {
57871462 1297 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1298 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1299 }else{
514ed0d9 1300 assert(imm>0&&imm<65536);
57871462 1301 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1302 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1303 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1304 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1305 }
1306}
1307
1308void emit_shlimm(int rs,u_int imm,int rt)
1309{
1310 assert(imm>0);
1311 assert(imm<32);
1312 //if(imm==1) ...
1313 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1314 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1315}
1316
1317void emit_shrimm(int rs,u_int imm,int rt)
1318{
1319 assert(imm>0);
1320 assert(imm<32);
1321 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1322 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1323}
1324
1325void emit_sarimm(int rs,u_int imm,int rt)
1326{
1327 assert(imm>0);
1328 assert(imm<32);
1329 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1330 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1331}
1332
1333void emit_rorimm(int rs,u_int imm,int rt)
1334{
1335 assert(imm>0);
1336 assert(imm<32);
1337 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1338 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1339}
1340
1341void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1342{
1343 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1344 assert(imm>0);
1345 assert(imm<32);
1346 //if(imm==1) ...
1347 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1348 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1349 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1350 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1351}
1352
1353void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1354{
1355 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1356 assert(imm>0);
1357 assert(imm<32);
1358 //if(imm==1) ...
1359 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1360 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1361 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1362 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1363}
1364
b9b61529 1365void emit_signextend16(int rs,int rt)
1366{
1367 #ifdef ARMv5_ONLY
1368 emit_shlimm(rs,16,rt);
1369 emit_sarimm(rt,16,rt);
1370 #else
1371 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1372 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1373 #endif
1374}
1375
57871462 1376void emit_shl(u_int rs,u_int shift,u_int rt)
1377{
1378 assert(rs<16);
1379 assert(rt<16);
1380 assert(shift<16);
1381 //if(imm==1) ...
1382 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1383 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1384}
1385void emit_shr(u_int rs,u_int shift,u_int rt)
1386{
1387 assert(rs<16);
1388 assert(rt<16);
1389 assert(shift<16);
1390 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1391 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1392}
1393void emit_sar(u_int rs,u_int shift,u_int rt)
1394{
1395 assert(rs<16);
1396 assert(rt<16);
1397 assert(shift<16);
1398 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1399 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1400}
1401void emit_shlcl(int r)
1402{
1403 assem_debug("shl %%%s,%%cl\n",regname[r]);
1404 assert(0);
1405}
1406void emit_shrcl(int r)
1407{
1408 assem_debug("shr %%%s,%%cl\n",regname[r]);
1409 assert(0);
1410}
1411void emit_sarcl(int r)
1412{
1413 assem_debug("sar %%%s,%%cl\n",regname[r]);
1414 assert(0);
1415}
1416
1417void emit_shldcl(int r1,int r2)
1418{
1419 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1420 assert(0);
1421}
1422void emit_shrdcl(int r1,int r2)
1423{
1424 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1425 assert(0);
1426}
1427void emit_orrshl(u_int rs,u_int shift,u_int rt)
1428{
1429 assert(rs<16);
1430 assert(rt<16);
1431 assert(shift<16);
1432 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1433 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1434}
1435void emit_orrshr(u_int rs,u_int shift,u_int rt)
1436{
1437 assert(rs<16);
1438 assert(rt<16);
1439 assert(shift<16);
1440 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1441 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1442}
1443
1444void emit_cmpimm(int rs,int imm)
1445{
1446 u_int armval;
1447 if(genimm(imm,&armval)) {
1448 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1449 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1450 }else if(genimm(-imm,&armval)) {
1451 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1452 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1453 }else if(imm>0) {
1454 assert(imm<65536);
1455 #ifdef ARMv5_ONLY
1456 emit_movimm(imm,HOST_TEMPREG);
1457 #else
1458 emit_movw(imm,HOST_TEMPREG);
1459 #endif
1460 assem_debug("cmp %s,r14\n",regname[rs]);
1461 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1462 }else{
1463 assert(imm>-65536);
1464 #ifdef ARMv5_ONLY
1465 emit_movimm(-imm,HOST_TEMPREG);
1466 #else
1467 emit_movw(-imm,HOST_TEMPREG);
1468 #endif
1469 assem_debug("cmn %s,r14\n",regname[rs]);
1470 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1471 }
1472}
1473
1474void emit_cmovne(u_int *addr,int rt)
1475{
1476 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1477 assert(0);
1478}
1479void emit_cmovl(u_int *addr,int rt)
1480{
1481 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1482 assert(0);
1483}
1484void emit_cmovs(u_int *addr,int rt)
1485{
1486 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1487 assert(0);
1488}
1489void emit_cmovne_imm(int imm,int rt)
1490{
1491 assem_debug("movne %s,#%d\n",regname[rt],imm);
1492 u_int armval;
cfbd3c6e 1493 genimm_checked(imm,&armval);
57871462 1494 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1495}
1496void emit_cmovl_imm(int imm,int rt)
1497{
1498 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1499 u_int armval;
cfbd3c6e 1500 genimm_checked(imm,&armval);
57871462 1501 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1502}
1503void emit_cmovb_imm(int imm,int rt)
1504{
1505 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1506 u_int armval;
cfbd3c6e 1507 genimm_checked(imm,&armval);
57871462 1508 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1509}
1510void emit_cmovs_imm(int imm,int rt)
1511{
1512 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1513 u_int armval;
cfbd3c6e 1514 genimm_checked(imm,&armval);
57871462 1515 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1516}
1517void emit_cmove_reg(int rs,int rt)
1518{
1519 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1520 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1521}
1522void emit_cmovne_reg(int rs,int rt)
1523{
1524 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1525 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1526}
1527void emit_cmovl_reg(int rs,int rt)
1528{
1529 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1530 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1531}
1532void emit_cmovs_reg(int rs,int rt)
1533{
1534 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1535 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1536}
1537
1538void emit_slti32(int rs,int imm,int rt)
1539{
1540 if(rs!=rt) emit_zeroreg(rt);
1541 emit_cmpimm(rs,imm);
1542 if(rs==rt) emit_movimm(0,rt);
1543 emit_cmovl_imm(1,rt);
1544}
1545void emit_sltiu32(int rs,int imm,int rt)
1546{
1547 if(rs!=rt) emit_zeroreg(rt);
1548 emit_cmpimm(rs,imm);
1549 if(rs==rt) emit_movimm(0,rt);
1550 emit_cmovb_imm(1,rt);
1551}
1552void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1553{
1554 assert(rsh!=rt);
1555 emit_slti32(rsl,imm,rt);
1556 if(imm>=0)
1557 {
1558 emit_test(rsh,rsh);
1559 emit_cmovne_imm(0,rt);
1560 emit_cmovs_imm(1,rt);
1561 }
1562 else
1563 {
1564 emit_cmpimm(rsh,-1);
1565 emit_cmovne_imm(0,rt);
1566 emit_cmovl_imm(1,rt);
1567 }
1568}
1569void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1570{
1571 assert(rsh!=rt);
1572 emit_sltiu32(rsl,imm,rt);
1573 if(imm>=0)
1574 {
1575 emit_test(rsh,rsh);
1576 emit_cmovne_imm(0,rt);
1577 }
1578 else
1579 {
1580 emit_cmpimm(rsh,-1);
1581 emit_cmovne_imm(1,rt);
1582 }
1583}
1584
1585void emit_cmp(int rs,int rt)
1586{
1587 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1588 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1589}
1590void emit_set_gz32(int rs, int rt)
1591{
1592 //assem_debug("set_gz32\n");
1593 emit_cmpimm(rs,1);
1594 emit_movimm(1,rt);
1595 emit_cmovl_imm(0,rt);
1596}
1597void emit_set_nz32(int rs, int rt)
1598{
1599 //assem_debug("set_nz32\n");
1600 if(rs!=rt) emit_movs(rs,rt);
1601 else emit_test(rs,rs);
1602 emit_cmovne_imm(1,rt);
1603}
1604void emit_set_gz64_32(int rsh, int rsl, int rt)
1605{
1606 //assem_debug("set_gz64\n");
1607 emit_set_gz32(rsl,rt);
1608 emit_test(rsh,rsh);
1609 emit_cmovne_imm(1,rt);
1610 emit_cmovs_imm(0,rt);
1611}
1612void emit_set_nz64_32(int rsh, int rsl, int rt)
1613{
1614 //assem_debug("set_nz64\n");
1615 emit_or_and_set_flags(rsh,rsl,rt);
1616 emit_cmovne_imm(1,rt);
1617}
1618void emit_set_if_less32(int rs1, int rs2, int rt)
1619{
1620 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1621 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1622 emit_cmp(rs1,rs2);
1623 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1624 emit_cmovl_imm(1,rt);
1625}
1626void emit_set_if_carry32(int rs1, int rs2, int rt)
1627{
1628 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1629 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1630 emit_cmp(rs1,rs2);
1631 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1632 emit_cmovb_imm(1,rt);
1633}
1634void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1635{
1636 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1637 assert(u1!=rt);
1638 assert(u2!=rt);
1639 emit_cmp(l1,l2);
1640 emit_movimm(0,rt);
1641 emit_sbcs(u1,u2,HOST_TEMPREG);
1642 emit_cmovl_imm(1,rt);
1643}
1644void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1645{
1646 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1647 assert(u1!=rt);
1648 assert(u2!=rt);
1649 emit_cmp(l1,l2);
1650 emit_movimm(0,rt);
1651 emit_sbcs(u1,u2,HOST_TEMPREG);
1652 emit_cmovb_imm(1,rt);
1653}
1654
1655void emit_call(int a)
1656{
1657 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1658 u_int offset=genjmp(a);
1659 output_w32(0xeb000000|offset);
1660}
1661void emit_jmp(int a)
1662{
1663 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1664 u_int offset=genjmp(a);
1665 output_w32(0xea000000|offset);
1666}
1667void emit_jne(int a)
1668{
1669 assem_debug("bne %x\n",a);
1670 u_int offset=genjmp(a);
1671 output_w32(0x1a000000|offset);
1672}
1673void emit_jeq(int a)
1674{
1675 assem_debug("beq %x\n",a);
1676 u_int offset=genjmp(a);
1677 output_w32(0x0a000000|offset);
1678}
1679void emit_js(int a)
1680{
1681 assem_debug("bmi %x\n",a);
1682 u_int offset=genjmp(a);
1683 output_w32(0x4a000000|offset);
1684}
1685void emit_jns(int a)
1686{
1687 assem_debug("bpl %x\n",a);
1688 u_int offset=genjmp(a);
1689 output_w32(0x5a000000|offset);
1690}
1691void emit_jl(int a)
1692{
1693 assem_debug("blt %x\n",a);
1694 u_int offset=genjmp(a);
1695 output_w32(0xba000000|offset);
1696}
1697void emit_jge(int a)
1698{
1699 assem_debug("bge %x\n",a);
1700 u_int offset=genjmp(a);
1701 output_w32(0xaa000000|offset);
1702}
1703void emit_jno(int a)
1704{
1705 assem_debug("bvc %x\n",a);
1706 u_int offset=genjmp(a);
1707 output_w32(0x7a000000|offset);
1708}
1709void emit_jc(int a)
1710{
1711 assem_debug("bcs %x\n",a);
1712 u_int offset=genjmp(a);
1713 output_w32(0x2a000000|offset);
1714}
1715void emit_jcc(int a)
1716{
1717 assem_debug("bcc %x\n",a);
1718 u_int offset=genjmp(a);
1719 output_w32(0x3a000000|offset);
1720}
1721
1722void emit_pushimm(int imm)
1723{
1724 assem_debug("push $%x\n",imm);
1725 assert(0);
1726}
1727void emit_pusha()
1728{
1729 assem_debug("pusha\n");
1730 assert(0);
1731}
1732void emit_popa()
1733{
1734 assem_debug("popa\n");
1735 assert(0);
1736}
1737void emit_pushreg(u_int r)
1738{
1739 assem_debug("push %%%s\n",regname[r]);
1740 assert(0);
1741}
1742void emit_popreg(u_int r)
1743{
1744 assem_debug("pop %%%s\n",regname[r]);
1745 assert(0);
1746}
1747void emit_callreg(u_int r)
1748{
1749 assem_debug("call *%%%s\n",regname[r]);
1750 assert(0);
1751}
1752void emit_jmpreg(u_int r)
1753{
1754 assem_debug("mov pc,%s\n",regname[r]);
1755 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1756}
1757
1758void emit_readword_indexed(int offset, int rs, int rt)
1759{
1760 assert(offset>-4096&&offset<4096);
1761 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1762 if(offset>=0) {
1763 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1764 }else{
1765 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1766 }
1767}
1768void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1769{
1770 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1771 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1772}
1773void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1774{
1775 if(map<0) emit_readword_indexed(addr, rs, rt);
1776 else {
1777 assert(addr==0);
1778 emit_readword_dualindexedx4(rs, map, rt);
1779 }
1780}
1781void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1782{
1783 if(map<0) {
1784 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1785 emit_readword_indexed(addr+4, rs, rl);
1786 }else{
1787 assert(rh!=rs);
1788 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1789 emit_addimm(map,1,map);
1790 emit_readword_indexed_tlb(addr, rs, map, rl);
1791 }
1792}
1793void emit_movsbl_indexed(int offset, int rs, int rt)
1794{
1795 assert(offset>-256&&offset<256);
1796 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1797 if(offset>=0) {
1798 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1799 }else{
1800 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1801 }
1802}
1803void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1804{
1805 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1806 else {
1807 if(addr==0) {
1808 emit_shlimm(map,2,map);
1809 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1810 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1811 }else{
1812 assert(addr>-256&&addr<256);
1813 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1814 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1815 emit_movsbl_indexed(addr, rt, rt);
1816 }
1817 }
1818}
1819void emit_movswl_indexed(int offset, int rs, int rt)
1820{
1821 assert(offset>-256&&offset<256);
1822 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1823 if(offset>=0) {
1824 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1825 }else{
1826 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1827 }
1828}
1829void emit_movzbl_indexed(int offset, int rs, int rt)
1830{
1831 assert(offset>-4096&&offset<4096);
1832 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1833 if(offset>=0) {
1834 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1835 }else{
1836 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1837 }
1838}
1839void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1840{
1841 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1842 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1843}
1844void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1845{
1846 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1847 else {
1848 if(addr==0) {
1849 emit_movzbl_dualindexedx4(rs, map, rt);
1850 }else{
1851 emit_addimm(rs,addr,rt);
1852 emit_movzbl_dualindexedx4(rt, map, rt);
1853 }
1854 }
1855}
1856void emit_movzwl_indexed(int offset, int rs, int rt)
1857{
1858 assert(offset>-256&&offset<256);
1859 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1860 if(offset>=0) {
1861 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1862 }else{
1863 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1864 }
1865}
1866void emit_readword(int addr, int rt)
1867{
1868 u_int offset = addr-(u_int)&dynarec_local;
1869 assert(offset<4096);
1870 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1871 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1872}
1873void emit_movsbl(int addr, int rt)
1874{
1875 u_int offset = addr-(u_int)&dynarec_local;
1876 assert(offset<256);
1877 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1878 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1879}
1880void emit_movswl(int addr, int rt)
1881{
1882 u_int offset = addr-(u_int)&dynarec_local;
1883 assert(offset<256);
1884 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1885 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1886}
1887void emit_movzbl(int addr, int rt)
1888{
1889 u_int offset = addr-(u_int)&dynarec_local;
1890 assert(offset<4096);
1891 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1892 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1893}
1894void emit_movzwl(int addr, int rt)
1895{
1896 u_int offset = addr-(u_int)&dynarec_local;
1897 assert(offset<256);
1898 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1899 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1900}
1901void emit_movzwl_reg(int rs, int rt)
1902{
1903 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1904 assert(0);
1905}
1906
1907void emit_xchg(int rs, int rt)
1908{
1909 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1910 assert(0);
1911}
1912void emit_writeword_indexed(int rt, int offset, int rs)
1913{
1914 assert(offset>-4096&&offset<4096);
1915 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1916 if(offset>=0) {
1917 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1918 }else{
1919 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1920 }
1921}
1922void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1923{
1924 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1925 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1926}
1927void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1928{
1929 if(map<0) emit_writeword_indexed(rt, addr, rs);
1930 else {
1931 assert(addr==0);
1932 emit_writeword_dualindexedx4(rt, rs, map);
1933 }
1934}
1935void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1936{
1937 if(map<0) {
1938 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1939 emit_writeword_indexed(rl, addr+4, rs);
1940 }else{
1941 assert(rh>=0);
1942 if(temp!=rs) emit_addimm(map,1,temp);
1943 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1944 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1945 else {
1946 emit_addimm(rs,4,rs);
1947 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1948 }
1949 }
1950}
1951void emit_writehword_indexed(int rt, int offset, int rs)
1952{
1953 assert(offset>-256&&offset<256);
1954 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1955 if(offset>=0) {
1956 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1957 }else{
1958 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1959 }
1960}
1961void emit_writebyte_indexed(int rt, int offset, int rs)
1962{
1963 assert(offset>-4096&&offset<4096);
1964 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1965 if(offset>=0) {
1966 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1967 }else{
1968 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1969 }
1970}
1971void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1972{
1973 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1974 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1975}
1976void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1977{
1978 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1979 else {
1980 if(addr==0) {
1981 emit_writebyte_dualindexedx4(rt, rs, map);
1982 }else{
1983 emit_addimm(rs,addr,temp);
1984 emit_writebyte_dualindexedx4(rt, temp, map);
1985 }
1986 }
1987}
1988void emit_writeword(int rt, int addr)
1989{
1990 u_int offset = addr-(u_int)&dynarec_local;
1991 assert(offset<4096);
1992 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1993 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1994}
1995void emit_writehword(int rt, int addr)
1996{
1997 u_int offset = addr-(u_int)&dynarec_local;
1998 assert(offset<256);
1999 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2000 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2001}
2002void emit_writebyte(int rt, int addr)
2003{
2004 u_int offset = addr-(u_int)&dynarec_local;
2005 assert(offset<4096);
74426039 2006 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2007 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2008}
2009void emit_writeword_imm(int imm, int addr)
2010{
2011 assem_debug("movl $%x,%x\n",imm,addr);
2012 assert(0);
2013}
2014void emit_writebyte_imm(int imm, int addr)
2015{
2016 assem_debug("movb $%x,%x\n",imm,addr);
2017 assert(0);
2018}
2019
2020void emit_mul(int rs)
2021{
2022 assem_debug("mul %%%s\n",regname[rs]);
2023 assert(0);
2024}
2025void emit_imul(int rs)
2026{
2027 assem_debug("imul %%%s\n",regname[rs]);
2028 assert(0);
2029}
2030void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2031{
2032 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2033 assert(rs1<16);
2034 assert(rs2<16);
2035 assert(hi<16);
2036 assert(lo<16);
2037 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2038}
2039void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2040{
2041 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2042 assert(rs1<16);
2043 assert(rs2<16);
2044 assert(hi<16);
2045 assert(lo<16);
2046 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2047}
2048
2049void emit_div(int rs)
2050{
2051 assem_debug("div %%%s\n",regname[rs]);
2052 assert(0);
2053}
2054void emit_idiv(int rs)
2055{
2056 assem_debug("idiv %%%s\n",regname[rs]);
2057 assert(0);
2058}
2059void emit_cdq()
2060{
2061 assem_debug("cdq\n");
2062 assert(0);
2063}
2064
2065void emit_clz(int rs,int rt)
2066{
2067 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2068 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2069}
2070
2071void emit_subcs(int rs1,int rs2,int rt)
2072{
2073 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2074 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2075}
2076
2077void emit_shrcc_imm(int rs,u_int imm,int rt)
2078{
2079 assert(imm>0);
2080 assert(imm<32);
2081 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2082 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2083}
2084
2085void emit_negmi(int rs, int rt)
2086{
2087 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2088 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2089}
2090
2091void emit_negsmi(int rs, int rt)
2092{
2093 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2094 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2095}
2096
2097void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2098{
2099 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2100 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2101}
2102
2103void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2104{
2105 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2106 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2107}
2108
2109void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2110{
2111 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2112 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2113}
2114
2115void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2116{
2117 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2118 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2119}
2120
2121void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2122{
2123 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2124 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2125}
2126
2127void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2128{
2129 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2130 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2131}
2132
2133void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2134{
2135 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2136 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2137}
2138
2139void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2140{
2141 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2142 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2143}
2144
2145void emit_teq(int rs, int rt)
2146{
2147 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2148 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2149}
2150
2151void emit_rsbimm(int rs, int imm, int rt)
2152{
2153 u_int armval;
cfbd3c6e 2154 genimm_checked(imm,&armval);
57871462 2155 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2156 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2157}
2158
2159// Load 2 immediates optimizing for small code size
2160void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2161{
2162 emit_movimm(imm1,rt1);
2163 u_int armval;
2164 if(genimm(imm2-imm1,&armval)) {
2165 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2166 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2167 }else if(genimm(imm1-imm2,&armval)) {
2168 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2169 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2170 }
2171 else emit_movimm(imm2,rt2);
2172}
2173
2174// Conditionally select one of two immediates, optimizing for small code size
2175// This will only be called if HAVE_CMOV_IMM is defined
2176void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2177{
2178 u_int armval;
2179 if(genimm(imm2-imm1,&armval)) {
2180 emit_movimm(imm1,rt);
2181 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2182 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2183 }else if(genimm(imm1-imm2,&armval)) {
2184 emit_movimm(imm1,rt);
2185 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2186 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2187 }
2188 else {
2189 #ifdef ARMv5_ONLY
2190 emit_movimm(imm1,rt);
2191 add_literal((int)out,imm2);
2192 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2193 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2194 #else
2195 emit_movw(imm1&0x0000FFFF,rt);
2196 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2197 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2198 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2199 }
2200 emit_movt(imm1&0xFFFF0000,rt);
2201 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2202 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2203 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2204 }
2205 #endif
2206 }
2207}
2208
2209// special case for checking invalid_code
2210void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2211{
2212 assert(0);
2213}
2214
2215// special case for checking invalid_code
2216void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2217{
2218 assert(imm<128&&imm>=0);
2219 assert(r>=0&&r<16);
2220 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2221 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2222 emit_cmpimm(HOST_TEMPREG,imm);
2223}
2224
2225// special case for tlb mapping
2226void emit_addsr12(int rs1,int rs2,int rt)
2227{
2228 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2229 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2230}
2231
0bbd1454 2232void emit_callne(int a)
2233{
2234 assem_debug("blne %x\n",a);
2235 u_int offset=genjmp(a);
2236 output_w32(0x1b000000|offset);
2237}
2238
57871462 2239// Used to preload hash table entries
2240void emit_prefetch(void *addr)
2241{
2242 assem_debug("prefetch %x\n",(int)addr);
2243 output_byte(0x0F);
2244 output_byte(0x18);
2245 output_modrm(0,5,1);
2246 output_w32((int)addr);
2247}
2248void emit_prefetchreg(int r)
2249{
2250 assem_debug("pld %s\n",regname[r]);
2251 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2252}
2253
2254// Special case for mini_ht
2255void emit_ldreq_indexed(int rs, u_int offset, int rt)
2256{
2257 assert(offset<4096);
2258 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2259 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2260}
2261
2262void emit_flds(int r,int sr)
2263{
2264 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2265 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2266}
2267
2268void emit_vldr(int r,int vr)
2269{
2270 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2271 output_w32(0xed900b00|(vr<<12)|(r<<16));
2272}
2273
2274void emit_fsts(int sr,int r)
2275{
2276 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2277 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2278}
2279
2280void emit_vstr(int vr,int r)
2281{
2282 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2283 output_w32(0xed800b00|(vr<<12)|(r<<16));
2284}
2285
2286void emit_ftosizs(int s,int d)
2287{
2288 assem_debug("ftosizs s%d,s%d\n",d,s);
2289 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2290}
2291
2292void emit_ftosizd(int s,int d)
2293{
2294 assem_debug("ftosizd s%d,d%d\n",d,s);
2295 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2296}
2297
2298void emit_fsitos(int s,int d)
2299{
2300 assem_debug("fsitos s%d,s%d\n",d,s);
2301 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2302}
2303
2304void emit_fsitod(int s,int d)
2305{
2306 assem_debug("fsitod d%d,s%d\n",d,s);
2307 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2308}
2309
2310void emit_fcvtds(int s,int d)
2311{
2312 assem_debug("fcvtds d%d,s%d\n",d,s);
2313 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2314}
2315
2316void emit_fcvtsd(int s,int d)
2317{
2318 assem_debug("fcvtsd s%d,d%d\n",d,s);
2319 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2320}
2321
2322void emit_fsqrts(int s,int d)
2323{
2324 assem_debug("fsqrts d%d,s%d\n",d,s);
2325 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2326}
2327
2328void emit_fsqrtd(int s,int d)
2329{
2330 assem_debug("fsqrtd s%d,d%d\n",d,s);
2331 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2332}
2333
2334void emit_fabss(int s,int d)
2335{
2336 assem_debug("fabss d%d,s%d\n",d,s);
2337 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2338}
2339
2340void emit_fabsd(int s,int d)
2341{
2342 assem_debug("fabsd s%d,d%d\n",d,s);
2343 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2344}
2345
2346void emit_fnegs(int s,int d)
2347{
2348 assem_debug("fnegs d%d,s%d\n",d,s);
2349 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2350}
2351
2352void emit_fnegd(int s,int d)
2353{
2354 assem_debug("fnegd s%d,d%d\n",d,s);
2355 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2356}
2357
2358void emit_fadds(int s1,int s2,int d)
2359{
2360 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2361 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2362}
2363
2364void emit_faddd(int s1,int s2,int d)
2365{
2366 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2367 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2368}
2369
2370void emit_fsubs(int s1,int s2,int d)
2371{
2372 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2373 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2374}
2375
2376void emit_fsubd(int s1,int s2,int d)
2377{
2378 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2379 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2380}
2381
2382void emit_fmuls(int s1,int s2,int d)
2383{
2384 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2385 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2386}
2387
2388void emit_fmuld(int s1,int s2,int d)
2389{
2390 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2391 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2392}
2393
2394void emit_fdivs(int s1,int s2,int d)
2395{
2396 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2397 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2398}
2399
2400void emit_fdivd(int s1,int s2,int d)
2401{
2402 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2403 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2404}
2405
2406void emit_fcmps(int x,int y)
2407{
2408 assem_debug("fcmps s14, s15\n");
2409 output_w32(0xeeb47a67);
2410}
2411
2412void emit_fcmpd(int x,int y)
2413{
2414 assem_debug("fcmpd d6, d7\n");
2415 output_w32(0xeeb46b47);
2416}
2417
2418void emit_fmstat()
2419{
2420 assem_debug("fmstat\n");
2421 output_w32(0xeef1fa10);
2422}
2423
2424void emit_bicne_imm(int rs,int imm,int rt)
2425{
2426 u_int armval;
cfbd3c6e 2427 genimm_checked(imm,&armval);
57871462 2428 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2429 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2430}
2431
2432void emit_biccs_imm(int rs,int imm,int rt)
2433{
2434 u_int armval;
cfbd3c6e 2435 genimm_checked(imm,&armval);
57871462 2436 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2437 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2438}
2439
2440void emit_bicvc_imm(int rs,int imm,int rt)
2441{
2442 u_int armval;
cfbd3c6e 2443 genimm_checked(imm,&armval);
57871462 2444 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2445 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2446}
2447
2448void emit_bichi_imm(int rs,int imm,int rt)
2449{
2450 u_int armval;
cfbd3c6e 2451 genimm_checked(imm,&armval);
57871462 2452 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2453 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2454}
2455
2456void emit_orrvs_imm(int rs,int imm,int rt)
2457{
2458 u_int armval;
cfbd3c6e 2459 genimm_checked(imm,&armval);
57871462 2460 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2461 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2462}
2463
b9b61529 2464void emit_orrne_imm(int rs,int imm,int rt)
2465{
2466 u_int armval;
cfbd3c6e 2467 genimm_checked(imm,&armval);
b9b61529 2468 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2469 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2470}
2471
2472void emit_andne_imm(int rs,int imm,int rt)
2473{
2474 u_int armval;
cfbd3c6e 2475 genimm_checked(imm,&armval);
b9b61529 2476 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2477 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2478}
2479
57871462 2480void emit_jno_unlikely(int a)
2481{
2482 //emit_jno(a);
2483 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2484 output_w32(0x72800000|rd_rn_rm(15,15,0));
2485}
2486
2487// Save registers before function call
2488void save_regs(u_int reglist)
2489{
2490 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2491 if(!reglist) return;
2492 assem_debug("stmia fp,{");
2493 if(reglist&1) assem_debug("r0, ");
2494 if(reglist&2) assem_debug("r1, ");
2495 if(reglist&4) assem_debug("r2, ");
2496 if(reglist&8) assem_debug("r3, ");
2497 if(reglist&0x1000) assem_debug("r12");
2498 assem_debug("}\n");
2499 output_w32(0xe88b0000|reglist);
2500}
2501// Restore registers after function call
2502void restore_regs(u_int reglist)
2503{
2504 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2505 if(!reglist) return;
2506 assem_debug("ldmia fp,{");
2507 if(reglist&1) assem_debug("r0, ");
2508 if(reglist&2) assem_debug("r1, ");
2509 if(reglist&4) assem_debug("r2, ");
2510 if(reglist&8) assem_debug("r3, ");
2511 if(reglist&0x1000) assem_debug("r12");
2512 assem_debug("}\n");
2513 output_w32(0xe89b0000|reglist);
2514}
2515
2516// Write back consts using r14 so we don't disturb the other registers
2517void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2518{
2519 int hr;
2520 for(hr=0;hr<HOST_REGS;hr++) {
2521 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2522 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2523 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2524 int value=constmap[i][hr];
2525 if(value==0) {
2526 emit_zeroreg(HOST_TEMPREG);
2527 }
2528 else {
2529 emit_movimm(value,HOST_TEMPREG);
2530 }
2531 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2532#ifndef FORCE32
57871462 2533 if((i_is32>>i_regmap[hr])&1) {
2534 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2535 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2536 }
24385cae 2537#endif
57871462 2538 }
2539 }
2540 }
2541 }
2542}
2543
2544/* Stubs/epilogue */
2545
2546void literal_pool(int n)
2547{
2548 if(!literalcount) return;
2549 if(n) {
2550 if((int)out-literals[0][0]<4096-n) return;
2551 }
2552 u_int *ptr;
2553 int i;
2554 for(i=0;i<literalcount;i++)
2555 {
2556 ptr=(u_int *)literals[i][0];
2557 u_int offset=(u_int)out-(u_int)ptr-8;
2558 assert(offset<4096);
2559 assert(!(offset&3));
2560 *ptr|=offset;
2561 output_w32(literals[i][1]);
2562 }
2563 literalcount=0;
2564}
2565
2566void literal_pool_jumpover(int n)
2567{
2568 if(!literalcount) return;
2569 if(n) {
2570 if((int)out-literals[0][0]<4096-n) return;
2571 }
2572 int jaddr=(int)out;
2573 emit_jmp(0);
2574 literal_pool(0);
2575 set_jump_target(jaddr,(int)out);
2576}
2577
2578emit_extjump2(int addr, int target, int linker)
2579{
2580 u_char *ptr=(u_char *)addr;
2581 assert((ptr[3]&0x0e)==0xa);
2582 emit_loadlp(target,0);
2583 emit_loadlp(addr,1);
24385cae 2584 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2585 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2586//DEBUG >
2587#ifdef DEBUG_CYCLE_COUNT
2588 emit_readword((int)&last_count,ECX);
2589 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2590 emit_readword((int)&next_interupt,ECX);
2591 emit_writeword(HOST_CCREG,(int)&Count);
2592 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2593 emit_writeword(ECX,(int)&last_count);
2594#endif
2595//DEBUG <
2596 emit_jmp(linker);
2597}
2598
2599emit_extjump(int addr, int target)
2600{
2601 emit_extjump2(addr, target, (int)dyna_linker);
2602}
2603emit_extjump_ds(int addr, int target)
2604{
2605 emit_extjump2(addr, target, (int)dyna_linker_ds);
2606}
2607
cbbab9cd 2608#ifdef PCSX
2609#include "pcsxmem_inline.c"
2610#endif
2611
57871462 2612do_readstub(int n)
2613{
2614 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2615 literal_pool(256);
2616 set_jump_target(stubs[n][1],(int)out);
2617 int type=stubs[n][0];
2618 int i=stubs[n][3];
2619 int rs=stubs[n][4];
2620 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2621 u_int reglist=stubs[n][7];
2622 signed char *i_regmap=i_regs->regmap;
2623 int addr=get_reg(i_regmap,AGEN1+(i&1));
2624 int rth,rt;
2625 int ds;
b9b61529 2626 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2627 rth=get_reg(i_regmap,FTEMP|64);
2628 rt=get_reg(i_regmap,FTEMP);
2629 }else{
2630 rth=get_reg(i_regmap,rt1[i]|64);
2631 rt=get_reg(i_regmap,rt1[i]);
2632 }
2633 assert(rs>=0);
57871462 2634 if(addr<0) addr=rt;
535d208a 2635 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2636 assert(addr>=0);
2637 int ftable=0;
2638 if(type==LOADB_STUB||type==LOADBU_STUB)
2639 ftable=(int)readmemb;
2640 if(type==LOADH_STUB||type==LOADHU_STUB)
2641 ftable=(int)readmemh;
2642 if(type==LOADW_STUB)
2643 ftable=(int)readmem;
24385cae 2644#ifndef FORCE32
57871462 2645 if(type==LOADD_STUB)
2646 ftable=(int)readmemd;
24385cae 2647#endif
2648 assert(ftable!=0);
57871462 2649 emit_writeword(rs,(int)&address);
2650 //emit_pusha();
2651 save_regs(reglist);
97a238a6 2652#ifndef PCSX
57871462 2653 ds=i_regs!=&regs[i];
2654 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2655 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2656 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2657 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2658 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2659#endif
57871462 2660 emit_shrimm(rs,16,1);
2661 int cc=get_reg(i_regmap,CCREG);
2662 if(cc<0) {
2663 emit_loadreg(CCREG,2);
2664 }
2665 emit_movimm(ftable,0);
2666 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2667#ifndef PCSX
57871462 2668 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2669#endif
57871462 2670 //emit_readword((int)&last_count,temp);
2671 //emit_add(cc,temp,cc);
2672 //emit_writeword(cc,(int)&Count);
2673 //emit_mov(15,14);
2674 emit_call((int)&indirect_jump_indexed);
2675 //emit_callreg(rs);
2676 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2677#ifndef PCSX
57871462 2678 // We really shouldn't need to update the count here,
2679 // but not doing so causes random crashes...
2680 emit_readword((int)&Count,HOST_TEMPREG);
2681 emit_readword((int)&next_interupt,2);
2682 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2683 emit_writeword(2,(int)&last_count);
2684 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2685 if(cc<0) {
2686 emit_storereg(CCREG,HOST_TEMPREG);
2687 }
f51dc36c 2688#endif
57871462 2689 //emit_popa();
2690 restore_regs(reglist);
2691 //if((cc=get_reg(regmap,CCREG))>=0) {
2692 // emit_loadreg(CCREG,cc);
2693 //}
f18c0f46 2694 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2695 assert(rt>=0);
2696 if(type==LOADB_STUB)
2697 emit_movsbl((int)&readmem_dword,rt);
2698 if(type==LOADBU_STUB)
2699 emit_movzbl((int)&readmem_dword,rt);
2700 if(type==LOADH_STUB)
2701 emit_movswl((int)&readmem_dword,rt);
2702 if(type==LOADHU_STUB)
2703 emit_movzwl((int)&readmem_dword,rt);
2704 if(type==LOADW_STUB)
2705 emit_readword((int)&readmem_dword,rt);
2706 if(type==LOADD_STUB) {
2707 emit_readword((int)&readmem_dword,rt);
2708 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2709 }
57871462 2710 }
2711 emit_jmp(stubs[n][2]); // return address
2712}
2713
2714inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2715{
2716 int rs=get_reg(regmap,target);
2717 int rth=get_reg(regmap,target|64);
2718 int rt=get_reg(regmap,target);
535d208a 2719 if(rs<0) rs=get_reg(regmap,-1);
57871462 2720 assert(rs>=0);
57871462 2721 int ftable=0;
2722 if(type==LOADB_STUB||type==LOADBU_STUB)
2723 ftable=(int)readmemb;
2724 if(type==LOADH_STUB||type==LOADHU_STUB)
2725 ftable=(int)readmemh;
2726 if(type==LOADW_STUB)
2727 ftable=(int)readmem;
24385cae 2728#ifndef FORCE32
57871462 2729 if(type==LOADD_STUB)
2730 ftable=(int)readmemd;
24385cae 2731#endif
2732 assert(ftable!=0);
cbbab9cd 2733#ifdef PCSX
2734 if(pcsx_direct_read(type,addr,target?rs:-1,rt))
2735 return;
2736#endif
fd99c415 2737 if(target==0)
2738 emit_movimm(addr,rs);
57871462 2739 emit_writeword(rs,(int)&address);
2740 //emit_pusha();
2741 save_regs(reglist);
2742 //emit_shrimm(rs,16,1);
2743 int cc=get_reg(regmap,CCREG);
2744 if(cc<0) {
2745 emit_loadreg(CCREG,2);
2746 }
2747 //emit_movimm(ftable,0);
2748 emit_movimm(((u_int *)ftable)[addr>>16],0);
2749 //emit_readword((int)&last_count,12);
2750 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2751#ifndef PCSX
57871462 2752 if((signed int)addr>=(signed int)0xC0000000) {
2753 // Pagefault address
2754 int ds=regmap!=regs[i].regmap;
2755 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2756 }
f51dc36c 2757#endif
57871462 2758 //emit_add(12,2,2);
2759 //emit_writeword(2,(int)&Count);
2760 //emit_call(((u_int *)ftable)[addr>>16]);
2761 emit_call((int)&indirect_jump);
f51dc36c 2762#ifndef PCSX
57871462 2763 // We really shouldn't need to update the count here,
2764 // but not doing so causes random crashes...
2765 emit_readword((int)&Count,HOST_TEMPREG);
2766 emit_readword((int)&next_interupt,2);
2767 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2768 emit_writeword(2,(int)&last_count);
2769 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2770 if(cc<0) {
2771 emit_storereg(CCREG,HOST_TEMPREG);
2772 }
f51dc36c 2773#endif
57871462 2774 //emit_popa();
2775 restore_regs(reglist);
fd99c415 2776 if(rt>=0) {
2777 if(type==LOADB_STUB)
2778 emit_movsbl((int)&readmem_dword,rt);
2779 if(type==LOADBU_STUB)
2780 emit_movzbl((int)&readmem_dword,rt);
2781 if(type==LOADH_STUB)
2782 emit_movswl((int)&readmem_dword,rt);
2783 if(type==LOADHU_STUB)
2784 emit_movzwl((int)&readmem_dword,rt);
2785 if(type==LOADW_STUB)
2786 emit_readword((int)&readmem_dword,rt);
2787 if(type==LOADD_STUB) {
2788 emit_readword((int)&readmem_dword,rt);
2789 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2790 }
57871462 2791 }
2792}
2793
2794do_writestub(int n)
2795{
2796 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2797 literal_pool(256);
2798 set_jump_target(stubs[n][1],(int)out);
2799 int type=stubs[n][0];
2800 int i=stubs[n][3];
2801 int rs=stubs[n][4];
2802 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2803 u_int reglist=stubs[n][7];
2804 signed char *i_regmap=i_regs->regmap;
2805 int addr=get_reg(i_regmap,AGEN1+(i&1));
2806 int rth,rt,r;
2807 int ds;
b9b61529 2808 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2809 rth=get_reg(i_regmap,FTEMP|64);
2810 rt=get_reg(i_regmap,r=FTEMP);
2811 }else{
2812 rth=get_reg(i_regmap,rs2[i]|64);
2813 rt=get_reg(i_regmap,r=rs2[i]);
2814 }
2815 assert(rs>=0);
2816 assert(rt>=0);
2817 if(addr<0) addr=get_reg(i_regmap,-1);
2818 assert(addr>=0);
2819 int ftable=0;
2820 if(type==STOREB_STUB)
2821 ftable=(int)writememb;
2822 if(type==STOREH_STUB)
2823 ftable=(int)writememh;
2824 if(type==STOREW_STUB)
2825 ftable=(int)writemem;
24385cae 2826#ifndef FORCE32
57871462 2827 if(type==STORED_STUB)
2828 ftable=(int)writememd;
24385cae 2829#endif
2830 assert(ftable!=0);
57871462 2831 emit_writeword(rs,(int)&address);
2832 //emit_shrimm(rs,16,rs);
2833 //emit_movmem_indexedx4(ftable,rs,rs);
2834 if(type==STOREB_STUB)
2835 emit_writebyte(rt,(int)&byte);
2836 if(type==STOREH_STUB)
2837 emit_writehword(rt,(int)&hword);
2838 if(type==STOREW_STUB)
2839 emit_writeword(rt,(int)&word);
2840 if(type==STORED_STUB) {
3d624f89 2841#ifndef FORCE32
57871462 2842 emit_writeword(rt,(int)&dword);
2843 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2844#else
2845 printf("STORED_STUB\n");
2846#endif
57871462 2847 }
2848 //emit_pusha();
2849 save_regs(reglist);
97a238a6 2850#ifndef PCSX
57871462 2851 ds=i_regs!=&regs[i];
2852 int real_rs=get_reg(i_regmap,rs1[i]);
2853 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2854 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2855 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2856 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2857#endif
57871462 2858 emit_shrimm(rs,16,1);
2859 int cc=get_reg(i_regmap,CCREG);
2860 if(cc<0) {
2861 emit_loadreg(CCREG,2);
2862 }
2863 emit_movimm(ftable,0);
2864 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2865#ifndef PCSX
57871462 2866 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2867#endif
57871462 2868 //emit_readword((int)&last_count,temp);
2869 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2870 //emit_add(cc,temp,cc);
2871 //emit_writeword(cc,(int)&Count);
2872 emit_call((int)&indirect_jump_indexed);
2873 //emit_callreg(rs);
2874 emit_readword((int)&Count,HOST_TEMPREG);
2875 emit_readword((int)&next_interupt,2);
2876 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2877 emit_writeword(2,(int)&last_count);
2878 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2879 if(cc<0) {
2880 emit_storereg(CCREG,HOST_TEMPREG);
2881 }
2882 //emit_popa();
2883 restore_regs(reglist);
2884 //if((cc=get_reg(regmap,CCREG))>=0) {
2885 // emit_loadreg(CCREG,cc);
2886 //}
2887 emit_jmp(stubs[n][2]); // return address
2888}
2889
2890inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2891{
2892 int rs=get_reg(regmap,-1);
2893 int rth=get_reg(regmap,target|64);
2894 int rt=get_reg(regmap,target);
2895 assert(rs>=0);
2896 assert(rt>=0);
cbbab9cd 2897#ifdef PCSX
2898 if(pcsx_direct_write(type,addr,rs,rt,regmap))
2899 return;
2900#endif
57871462 2901 int ftable=0;
2902 if(type==STOREB_STUB)
2903 ftable=(int)writememb;
2904 if(type==STOREH_STUB)
2905 ftable=(int)writememh;
2906 if(type==STOREW_STUB)
2907 ftable=(int)writemem;
24385cae 2908#ifndef FORCE32
57871462 2909 if(type==STORED_STUB)
2910 ftable=(int)writememd;
24385cae 2911#endif
2912 assert(ftable!=0);
57871462 2913 emit_writeword(rs,(int)&address);
2914 //emit_shrimm(rs,16,rs);
2915 //emit_movmem_indexedx4(ftable,rs,rs);
2916 if(type==STOREB_STUB)
2917 emit_writebyte(rt,(int)&byte);
2918 if(type==STOREH_STUB)
2919 emit_writehword(rt,(int)&hword);
2920 if(type==STOREW_STUB)
2921 emit_writeword(rt,(int)&word);
2922 if(type==STORED_STUB) {
3d624f89 2923#ifndef FORCE32
57871462 2924 emit_writeword(rt,(int)&dword);
2925 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2926#else
2927 printf("STORED_STUB\n");
2928#endif
57871462 2929 }
2930 //emit_pusha();
2931 save_regs(reglist);
2932 //emit_shrimm(rs,16,1);
2933 int cc=get_reg(regmap,CCREG);
2934 if(cc<0) {
2935 emit_loadreg(CCREG,2);
2936 }
2937 //emit_movimm(ftable,0);
2938 emit_movimm(((u_int *)ftable)[addr>>16],0);
2939 //emit_readword((int)&last_count,12);
2940 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2941#ifndef PCSX
57871462 2942 if((signed int)addr>=(signed int)0xC0000000) {
2943 // Pagefault address
2944 int ds=regmap!=regs[i].regmap;
2945 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2946 }
f51dc36c 2947#endif
57871462 2948 //emit_add(12,2,2);
2949 //emit_writeword(2,(int)&Count);
2950 //emit_call(((u_int *)ftable)[addr>>16]);
2951 emit_call((int)&indirect_jump);
2952 emit_readword((int)&Count,HOST_TEMPREG);
2953 emit_readword((int)&next_interupt,2);
2954 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2955 emit_writeword(2,(int)&last_count);
2956 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2957 if(cc<0) {
2958 emit_storereg(CCREG,HOST_TEMPREG);
2959 }
2960 //emit_popa();
2961 restore_regs(reglist);
2962}
2963
2964do_unalignedwritestub(int n)
2965{
b7918751 2966 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2967 literal_pool(256);
57871462 2968 set_jump_target(stubs[n][1],(int)out);
b7918751 2969
2970 int i=stubs[n][3];
2971 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2972 int addr=stubs[n][5];
2973 u_int reglist=stubs[n][7];
2974 signed char *i_regmap=i_regs->regmap;
2975 int temp2=get_reg(i_regmap,FTEMP);
2976 int rt;
2977 int ds, real_rs;
2978 rt=get_reg(i_regmap,rs2[i]);
2979 assert(rt>=0);
2980 assert(addr>=0);
2981 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2982 reglist|=(1<<addr);
2983 reglist&=~(1<<temp2);
2984
2985 emit_andimm(addr,0xfffffffc,temp2);
2986 emit_writeword(temp2,(int)&address);
2987
2988 save_regs(reglist);
97a238a6 2989#ifndef PCSX
b7918751 2990 ds=i_regs!=&regs[i];
2991 real_rs=get_reg(i_regmap,rs1[i]);
2992 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2993 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2994 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2995 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2996#endif
b7918751 2997 emit_shrimm(addr,16,1);
2998 int cc=get_reg(i_regmap,CCREG);
2999 if(cc<0) {
3000 emit_loadreg(CCREG,2);
3001 }
3002 emit_movimm((u_int)readmem,0);
3003 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3004#ifndef PCSX
3005 // pagefault address
3006 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3007#endif
b7918751 3008 emit_call((int)&indirect_jump_indexed);
3009 restore_regs(reglist);
3010
3011 emit_readword((int)&readmem_dword,temp2);
3012 int temp=addr; //hmh
3013 emit_shlimm(addr,3,temp);
3014 emit_andimm(temp,24,temp);
3015#ifdef BIG_ENDIAN_MIPS
3016 if (opcode[i]==0x2e) // SWR
3017#else
3018 if (opcode[i]==0x2a) // SWL
3019#endif
3020 emit_xorimm(temp,24,temp);
3021 emit_movimm(-1,HOST_TEMPREG);
55439448 3022 if (opcode[i]==0x2a) { // SWL
b7918751 3023 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3024 emit_orrshr(rt,temp,temp2);
3025 }else{
3026 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3027 emit_orrshl(rt,temp,temp2);
3028 }
3029 emit_readword((int)&address,addr);
3030 emit_writeword(temp2,(int)&word);
3031 //save_regs(reglist); // don't need to, no state changes
3032 emit_shrimm(addr,16,1);
3033 emit_movimm((u_int)writemem,0);
3034 //emit_call((int)&indirect_jump_indexed);
3035 emit_mov(15,14);
3036 emit_readword_dualindexedx4(0,1,15);
3037 emit_readword((int)&Count,HOST_TEMPREG);
3038 emit_readword((int)&next_interupt,2);
3039 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3040 emit_writeword(2,(int)&last_count);
3041 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3042 if(cc<0) {
3043 emit_storereg(CCREG,HOST_TEMPREG);
3044 }
3045 restore_regs(reglist);
57871462 3046 emit_jmp(stubs[n][2]); // return address
3047}
3048
3049void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3050{
3051 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3052}
3053
3054do_invstub(int n)
3055{
3056 literal_pool(20);
3057 u_int reglist=stubs[n][3];
3058 set_jump_target(stubs[n][1],(int)out);
3059 save_regs(reglist);
3060 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3061 emit_call((int)&invalidate_addr);
3062 restore_regs(reglist);
3063 emit_jmp(stubs[n][2]); // return address
3064}
3065
3066int do_dirty_stub(int i)
3067{
3068 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3069 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3070 #ifdef PCSX
3071 addr=(u_int)source;
3072 #endif
57871462 3073 // Careful about the code output here, verify_dirty needs to parse it.
3074 #ifdef ARMv5_ONLY
ac545b3a 3075 emit_loadlp(addr,1);
57871462 3076 emit_loadlp((int)copy,2);
3077 emit_loadlp(slen*4,3);
3078 #else
ac545b3a 3079 emit_movw(addr&0x0000FFFF,1);
57871462 3080 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3081 emit_movt(addr&0xFFFF0000,1);
57871462 3082 emit_movt(((u_int)copy)&0xFFFF0000,2);
3083 emit_movw(slen*4,3);
3084 #endif
3085 emit_movimm(start+i*4,0);
3086 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3087 int entry=(int)out;
3088 load_regs_entry(i);
3089 if(entry==(int)out) entry=instr_addr[i];
3090 emit_jmp(instr_addr[i]);
3091 return entry;
3092}
3093
3094void do_dirty_stub_ds()
3095{
3096 // Careful about the code output here, verify_dirty needs to parse it.
3097 #ifdef ARMv5_ONLY
3098 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3099 emit_loadlp((int)copy,2);
3100 emit_loadlp(slen*4,3);
3101 #else
3102 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3103 emit_movw(((u_int)copy)&0x0000FFFF,2);
3104 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3105 emit_movt(((u_int)copy)&0xFFFF0000,2);
3106 emit_movw(slen*4,3);
3107 #endif
3108 emit_movimm(start+1,0);
3109 emit_call((int)&verify_code_ds);
3110}
3111
3112do_cop1stub(int n)
3113{
3114 literal_pool(256);
3115 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3116 set_jump_target(stubs[n][1],(int)out);
3117 int i=stubs[n][3];
3d624f89 3118// int rs=stubs[n][4];
57871462 3119 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3120 int ds=stubs[n][6];
3121 if(!ds) {
3122 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3123 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3124 }
3125 //else {printf("fp exception in delay slot\n");}
3126 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3127 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3128 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3129 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3130 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3131}
3132
3133/* TLB */
3134
3135int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3136{
3137 if(c) {
3138 if((signed int)addr>=(signed int)0xC0000000) {
3139 // address_generation already loaded the const
3140 emit_readword_dualindexedx4(FP,map,map);
3141 }
3142 else
3143 return -1; // No mapping
3144 }
3145 else {
3146 assert(s!=map);
3147 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3148 emit_addsr12(map,s,map);
3149 // Schedule this while we wait on the load
3150 //if(x) emit_xorimm(s,x,ar);
3151 if(shift>=0) emit_shlimm(s,3,shift);
3152 if(~a) emit_andimm(s,a,ar);
3153 emit_readword_dualindexedx4(FP,map,map);
3154 }
3155 return map;
3156}
3157int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3158{
3159 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3160 emit_test(map,map);
3161 *jaddr=(int)out;
3162 emit_js(0);
3163 }
3164 return map;
3165}
3166
3167int gen_tlb_addr_r(int ar, int map) {
3168 if(map>=0) {
3169 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3170 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3171 }
3172}
3173
3174int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3175{
3176 if(c) {
3177 if(addr<0x80800000||addr>=0xC0000000) {
3178 // address_generation already loaded the const
3179 emit_readword_dualindexedx4(FP,map,map);
3180 }
3181 else
3182 return -1; // No mapping
3183 }
3184 else {
3185 assert(s!=map);
3186 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3187 emit_addsr12(map,s,map);
3188 // Schedule this while we wait on the load
3189 //if(x) emit_xorimm(s,x,ar);
3190 emit_readword_dualindexedx4(FP,map,map);
3191 }
3192 return map;
3193}
3194int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3195{
3196 if(!c||addr<0x80800000||addr>=0xC0000000) {
3197 emit_testimm(map,0x40000000);
3198 *jaddr=(int)out;
3199 emit_jne(0);
3200 }
3201}
3202
3203int gen_tlb_addr_w(int ar, int map) {
3204 if(map>=0) {
3205 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3206 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3207 }
3208}
3209
3210// Generate the address of the memory_map entry, relative to dynarec_local
3211generate_map_const(u_int addr,int reg) {
3212 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3213 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3214}
3215
3216/* Special assem */
3217
3218void shift_assemble_arm(int i,struct regstat *i_regs)
3219{
3220 if(rt1[i]) {
3221 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3222 {
3223 signed char s,t,shift;
3224 t=get_reg(i_regs->regmap,rt1[i]);
3225 s=get_reg(i_regs->regmap,rs1[i]);
3226 shift=get_reg(i_regs->regmap,rs2[i]);
3227 if(t>=0){
3228 if(rs1[i]==0)
3229 {
3230 emit_zeroreg(t);
3231 }
3232 else if(rs2[i]==0)
3233 {
3234 assert(s>=0);
3235 if(s!=t) emit_mov(s,t);
3236 }
3237 else
3238 {
3239 emit_andimm(shift,31,HOST_TEMPREG);
3240 if(opcode2[i]==4) // SLLV
3241 {
3242 emit_shl(s,HOST_TEMPREG,t);
3243 }
3244 if(opcode2[i]==6) // SRLV
3245 {
3246 emit_shr(s,HOST_TEMPREG,t);
3247 }
3248 if(opcode2[i]==7) // SRAV
3249 {
3250 emit_sar(s,HOST_TEMPREG,t);
3251 }
3252 }
3253 }
3254 } else { // DSLLV/DSRLV/DSRAV
3255 signed char sh,sl,th,tl,shift;
3256 th=get_reg(i_regs->regmap,rt1[i]|64);
3257 tl=get_reg(i_regs->regmap,rt1[i]);
3258 sh=get_reg(i_regs->regmap,rs1[i]|64);
3259 sl=get_reg(i_regs->regmap,rs1[i]);
3260 shift=get_reg(i_regs->regmap,rs2[i]);
3261 if(tl>=0){
3262 if(rs1[i]==0)
3263 {
3264 emit_zeroreg(tl);
3265 if(th>=0) emit_zeroreg(th);
3266 }
3267 else if(rs2[i]==0)
3268 {
3269 assert(sl>=0);
3270 if(sl!=tl) emit_mov(sl,tl);
3271 if(th>=0&&sh!=th) emit_mov(sh,th);
3272 }
3273 else
3274 {
3275 // FIXME: What if shift==tl ?
3276 assert(shift!=tl);
3277 int temp=get_reg(i_regs->regmap,-1);
3278 int real_th=th;
3279 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3280 assert(sl>=0);
3281 assert(sh>=0);
3282 emit_andimm(shift,31,HOST_TEMPREG);
3283 if(opcode2[i]==0x14) // DSLLV
3284 {
3285 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3286 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3287 emit_orrshr(sl,HOST_TEMPREG,th);
3288 emit_andimm(shift,31,HOST_TEMPREG);
3289 emit_testimm(shift,32);
3290 emit_shl(sl,HOST_TEMPREG,tl);
3291 if(th>=0) emit_cmovne_reg(tl,th);
3292 emit_cmovne_imm(0,tl);
3293 }
3294 if(opcode2[i]==0x16) // DSRLV
3295 {
3296 assert(th>=0);
3297 emit_shr(sl,HOST_TEMPREG,tl);
3298 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3299 emit_orrshl(sh,HOST_TEMPREG,tl);
3300 emit_andimm(shift,31,HOST_TEMPREG);
3301 emit_testimm(shift,32);
3302 emit_shr(sh,HOST_TEMPREG,th);
3303 emit_cmovne_reg(th,tl);
3304 if(real_th>=0) emit_cmovne_imm(0,th);
3305 }
3306 if(opcode2[i]==0x17) // DSRAV
3307 {
3308 assert(th>=0);
3309 emit_shr(sl,HOST_TEMPREG,tl);
3310 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3311 if(real_th>=0) {
3312 assert(temp>=0);
3313 emit_sarimm(th,31,temp);
3314 }
3315 emit_orrshl(sh,HOST_TEMPREG,tl);
3316 emit_andimm(shift,31,HOST_TEMPREG);
3317 emit_testimm(shift,32);
3318 emit_sar(sh,HOST_TEMPREG,th);
3319 emit_cmovne_reg(th,tl);
3320 if(real_th>=0) emit_cmovne_reg(temp,th);
3321 }
3322 }
3323 }
3324 }
3325 }
3326}
3327#define shift_assemble shift_assemble_arm
3328
3329void loadlr_assemble_arm(int i,struct regstat *i_regs)
3330{
3331 int s,th,tl,temp,temp2,addr,map=-1;
3332 int offset;
3333 int jaddr=0;
af4ee1fe 3334 int memtarget=0,c=0;
57871462 3335 u_int hr,reglist=0;
3336 th=get_reg(i_regs->regmap,rt1[i]|64);
3337 tl=get_reg(i_regs->regmap,rt1[i]);
3338 s=get_reg(i_regs->regmap,rs1[i]);
3339 temp=get_reg(i_regs->regmap,-1);
3340 temp2=get_reg(i_regs->regmap,FTEMP);
3341 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3342 assert(addr<0);
3343 offset=imm[i];
3344 for(hr=0;hr<HOST_REGS;hr++) {
3345 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3346 }
3347 reglist|=1<<temp;
3348 if(offset||s<0||c) addr=temp2;
3349 else addr=s;
3350 if(s>=0) {
3351 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3352 if(c) {
3353 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3354 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3355 }
57871462 3356 }
535d208a 3357 if(!using_tlb) {
3358 if(!c) {
3359 #ifdef RAM_OFFSET
3360 map=get_reg(i_regs->regmap,ROREG);
3361 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3362 #endif
3363 emit_shlimm(addr,3,temp);
3364 if (opcode[i]==0x22||opcode[i]==0x26) {
3365 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 3366 }else{
535d208a 3367 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 3368 }
535d208a 3369 emit_cmpimm(addr,RAM_SIZE);
3370 jaddr=(int)out;
3371 emit_jno(0);
3372 }
3373 else {
3374 if (opcode[i]==0x22||opcode[i]==0x26) {
3375 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3376 }else{
3377 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3378 }
57871462 3379 }
535d208a 3380 }else{ // using tlb
3381 int a;
3382 if(c) {
3383 a=-1;
3384 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3385 a=0xFFFFFFFC; // LWL/LWR
3386 }else{
3387 a=0xFFFFFFF8; // LDL/LDR
3388 }
3389 map=get_reg(i_regs->regmap,TLREG);
3390 assert(map>=0);
3391 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3392 if(c) {
3393 if (opcode[i]==0x22||opcode[i]==0x26) {
3394 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3395 }else{
3396 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3397 }
535d208a 3398 }
3399 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3400 }
3401 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3402 if(!c||memtarget) {
3403 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3404 emit_readword_indexed_tlb(0,temp2,map,temp2);
3405 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3406 }
3407 else
3408 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3409 if(rt1[i]) {
3410 assert(tl>=0);
57871462 3411 emit_andimm(temp,24,temp);
2002a1db 3412#ifdef BIG_ENDIAN_MIPS
3413 if (opcode[i]==0x26) // LWR
3414#else
3415 if (opcode[i]==0x22) // LWL
3416#endif
3417 emit_xorimm(temp,24,temp);
57871462 3418 emit_movimm(-1,HOST_TEMPREG);
3419 if (opcode[i]==0x26) {
3420 emit_shr(temp2,temp,temp2);
3421 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3422 }else{
3423 emit_shl(temp2,temp,temp2);
3424 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3425 }
3426 emit_or(temp2,tl,tl);
57871462 3427 }
535d208a 3428 //emit_storereg(rt1[i],tl); // DEBUG
3429 }
3430 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3431 // FIXME: little endian
3432 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3433 if(!c||memtarget) {
3434 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3435 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3436 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3437 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3438 }
3439 else
3440 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3441 if(rt1[i]) {
3442 assert(th>=0);
3443 assert(tl>=0);
57871462 3444 emit_testimm(temp,32);
3445 emit_andimm(temp,24,temp);
3446 if (opcode[i]==0x1A) { // LDL
3447 emit_rsbimm(temp,32,HOST_TEMPREG);
3448 emit_shl(temp2h,temp,temp2h);
3449 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3450 emit_movimm(-1,HOST_TEMPREG);
3451 emit_shl(temp2,temp,temp2);
3452 emit_cmove_reg(temp2h,th);
3453 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3454 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3455 emit_orreq(temp2,tl,tl);
3456 emit_orrne(temp2,th,th);
3457 }
3458 if (opcode[i]==0x1B) { // LDR
3459 emit_xorimm(temp,24,temp);
3460 emit_rsbimm(temp,32,HOST_TEMPREG);
3461 emit_shr(temp2,temp,temp2);
3462 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3463 emit_movimm(-1,HOST_TEMPREG);
3464 emit_shr(temp2h,temp,temp2h);
3465 emit_cmovne_reg(temp2,tl);
3466 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3467 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3468 emit_orrne(temp2h,th,th);
3469 emit_orreq(temp2h,tl,tl);
3470 }
3471 }
3472 }
3473}
3474#define loadlr_assemble loadlr_assemble_arm
3475
3476void cop0_assemble(int i,struct regstat *i_regs)
3477{
3478 if(opcode2[i]==0) // MFC0
3479 {
3480 signed char t=get_reg(i_regs->regmap,rt1[i]);
3481 char copr=(source[i]>>11)&0x1f;
3482 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3483 if(t>=0&&rt1[i]!=0) {
7139f3c8 3484#ifdef MUPEN64
57871462 3485 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3486 emit_movimm((source[i]>>11)&0x1f,1);
3487 emit_writeword(0,(int)&PC);
3488 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3489 if(copr==9) {
3490 emit_readword((int)&last_count,ECX);
3491 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3492 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3493 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3494 emit_writeword(HOST_CCREG,(int)&Count);
3495 }
3496 emit_call((int)MFC0);
3497 emit_readword((int)&readmem_dword,t);
7139f3c8 3498#else
3499 emit_readword((int)&reg_cop0+copr*4,t);
3500#endif
57871462 3501 }
3502 }
3503 else if(opcode2[i]==4) // MTC0
3504 {
3505 signed char s=get_reg(i_regs->regmap,rs1[i]);
3506 char copr=(source[i]>>11)&0x1f;
3507 assert(s>=0);
3508 emit_writeword(s,(int)&readmem_dword);
3509 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
fca1aef2 3510#ifdef MUPEN64
57871462 3511 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3512 emit_movimm((source[i]>>11)&0x1f,1);
3513 emit_writeword(0,(int)&PC);
3514 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
7139f3c8 3515#endif
3516 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3517 emit_readword((int)&last_count,ECX);
3518 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3519 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3520 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3521 emit_writeword(HOST_CCREG,(int)&Count);
3522 }
3523 // What a mess. The status register (12) can enable interrupts,
3524 // so needs a special case to handle a pending interrupt.
3525 // The interrupt must be taken immediately, because a subsequent
3526 // instruction might disable interrupts again.
7139f3c8 3527 if(copr==12||copr==13) {
fca1aef2 3528#ifdef PCSX
3529 if (is_delayslot) {
3530 // burn cycles to cause cc_interrupt, which will
3531 // reschedule next_interupt. Relies on CCREG from above.
3532 assem_debug("MTC0 DS %d\n", copr);
3533 emit_writeword(HOST_CCREG,(int)&last_count);
3534 emit_movimm(0,HOST_CCREG);
3535 emit_storereg(CCREG,HOST_CCREG);
3536 emit_movimm(copr,0);
3537 emit_call((int)pcsx_mtc0_ds);
3538 return;
3539 }
3540#endif
57871462 3541 emit_movimm(start+i*4+4,0);
3542 emit_movimm(0,1);
3543 emit_writeword(0,(int)&pcaddr);
3544 emit_writeword(1,(int)&pending_exception);
3545 }
3546 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3547 //else
fca1aef2 3548#ifdef PCSX
3549 emit_movimm(copr,0);
3550 emit_call((int)pcsx_mtc0);
3551#else
57871462 3552 emit_call((int)MTC0);
fca1aef2 3553#endif
7139f3c8 3554 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3555 emit_readword((int)&Count,HOST_CCREG);
3556 emit_readword((int)&next_interupt,ECX);
3557 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3558 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3559 emit_writeword(ECX,(int)&last_count);
3560 emit_storereg(CCREG,HOST_CCREG);
3561 }
7139f3c8 3562 if(copr==12||copr==13) {
57871462 3563 assert(!is_delayslot);
3564 emit_readword((int)&pending_exception,14);
3565 }
3566 emit_loadreg(rs1[i],s);
3567 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3568 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 3569 if(copr==12||copr==13) {
57871462 3570 emit_test(14,14);
3571 emit_jne((int)&do_interrupt);
3572 }
3573 cop1_usable=0;
3574 }
3575 else
3576 {
3577 assert(opcode2[i]==0x10);
3d624f89 3578#ifndef DISABLE_TLB
57871462 3579 if((source[i]&0x3f)==0x01) // TLBR
3580 emit_call((int)TLBR);
3581 if((source[i]&0x3f)==0x02) // TLBWI
3582 emit_call((int)TLBWI_new);
3583 if((source[i]&0x3f)==0x06) { // TLBWR
3584 // The TLB entry written by TLBWR is dependent on the count,
3585 // so update the cycle count
3586 emit_readword((int)&last_count,ECX);
3587 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3588 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3589 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3590 emit_writeword(HOST_CCREG,(int)&Count);
3591 emit_call((int)TLBWR_new);
3592 }
3593 if((source[i]&0x3f)==0x08) // TLBP
3594 emit_call((int)TLBP);
3d624f89 3595#endif
576bbd8f 3596#ifdef PCSX
3597 if((source[i]&0x3f)==0x10) // RFE
3598 {
3599 emit_readword((int)&Status,0);
3600 emit_andimm(0,0x3c,1);
3601 emit_andimm(0,~0xf,0);
3602 emit_orrshr_imm(1,2,0);
3603 emit_writeword(0,(int)&Status);
3604 }
3605#else
57871462 3606 if((source[i]&0x3f)==0x18) // ERET
3607 {
3608 int count=ccadj[i];
3609 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3610 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3611 emit_jmp((int)jump_eret);
3612 }
576bbd8f 3613#endif
57871462 3614 }
3615}
3616
b9b61529 3617static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3618{
3619 switch (copr) {
3620 case 1:
3621 case 3:
3622 case 5:
3623 case 8:
3624 case 9:
3625 case 10:
3626 case 11:
3627 emit_readword((int)&reg_cop2d[copr],tl);
3628 emit_signextend16(tl,tl);
3629 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3630 break;
3631 case 7:
3632 case 16:
3633 case 17:
3634 case 18:
3635 case 19:
3636 emit_readword((int)&reg_cop2d[copr],tl);
3637 emit_andimm(tl,0xffff,tl);
3638 emit_writeword(tl,(int)&reg_cop2d[copr]);
3639 break;
3640 case 15:
3641 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3642 emit_writeword(tl,(int)&reg_cop2d[copr]);
3643 break;
3644 case 28:
b9b61529 3645 case 29:
3646 emit_readword((int)&reg_cop2d[9],temp);
3647 emit_testimm(temp,0x8000); // do we need this?
3648 emit_andimm(temp,0xf80,temp);
3649 emit_andne_imm(temp,0,temp);
f70d384d 3650 emit_shrimm(temp,7,tl);
b9b61529 3651 emit_readword((int)&reg_cop2d[10],temp);
3652 emit_testimm(temp,0x8000);
3653 emit_andimm(temp,0xf80,temp);
3654 emit_andne_imm(temp,0,temp);
f70d384d 3655 emit_orrshr_imm(temp,2,tl);
b9b61529 3656 emit_readword((int)&reg_cop2d[11],temp);
3657 emit_testimm(temp,0x8000);
3658 emit_andimm(temp,0xf80,temp);
3659 emit_andne_imm(temp,0,temp);
f70d384d 3660 emit_orrshl_imm(temp,3,tl);
b9b61529 3661 emit_writeword(tl,(int)&reg_cop2d[copr]);
3662 break;
3663 default:
3664 emit_readword((int)&reg_cop2d[copr],tl);
3665 break;
3666 }
3667}
3668
3669static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3670{
3671 switch (copr) {
3672 case 15:
3673 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3674 emit_writeword(sl,(int)&reg_cop2d[copr]);
3675 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3676 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3677 emit_writeword(sl,(int)&reg_cop2d[14]);
3678 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3679 break;
3680 case 28:
3681 emit_andimm(sl,0x001f,temp);
f70d384d 3682 emit_shlimm(temp,7,temp);
b9b61529 3683 emit_writeword(temp,(int)&reg_cop2d[9]);
3684 emit_andimm(sl,0x03e0,temp);
f70d384d 3685 emit_shlimm(temp,2,temp);
b9b61529 3686 emit_writeword(temp,(int)&reg_cop2d[10]);
3687 emit_andimm(sl,0x7c00,temp);
f70d384d 3688 emit_shrimm(temp,3,temp);
b9b61529 3689 emit_writeword(temp,(int)&reg_cop2d[11]);
3690 emit_writeword(sl,(int)&reg_cop2d[28]);
3691 break;
3692 case 30:
3693 emit_movs(sl,temp);
3694 emit_mvnmi(temp,temp);
3695 emit_clz(temp,temp);
3696 emit_writeword(sl,(int)&reg_cop2d[30]);
3697 emit_writeword(temp,(int)&reg_cop2d[31]);
3698 break;
b9b61529 3699 case 31:
3700 break;
3701 default:
3702 emit_writeword(sl,(int)&reg_cop2d[copr]);
3703 break;
3704 }
3705}
3706
3707void cop2_assemble(int i,struct regstat *i_regs)
3708{
3709 u_int copr=(source[i]>>11)&0x1f;
3710 signed char temp=get_reg(i_regs->regmap,-1);
3711 if (opcode2[i]==0) { // MFC2
3712 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3713 if(tl>=0&&rt1[i]!=0)
b9b61529 3714 cop2_get_dreg(copr,tl,temp);
3715 }
3716 else if (opcode2[i]==4) { // MTC2
3717 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3718 cop2_put_dreg(copr,sl,temp);
3719 }
3720 else if (opcode2[i]==2) // CFC2
3721 {
3722 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3723 if(tl>=0&&rt1[i]!=0)
b9b61529 3724 emit_readword((int)&reg_cop2c[copr],tl);
3725 }
3726 else if (opcode2[i]==6) // CTC2
3727 {
3728 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3729 switch(copr) {
3730 case 4:
3731 case 12:
3732 case 20:
3733 case 26:
3734 case 27:
3735 case 29:
3736 case 30:
3737 emit_signextend16(sl,temp);
3738 break;
3739 case 31:
3740 //value = value & 0x7ffff000;
3741 //if (value & 0x7f87e000) value |= 0x80000000;
3742 emit_shrimm(sl,12,temp);
3743 emit_shlimm(temp,12,temp);
3744 emit_testimm(temp,0x7f000000);
3745 emit_testeqimm(temp,0x00870000);
3746 emit_testeqimm(temp,0x0000e000);
3747 emit_orrne_imm(temp,0x80000000,temp);
3748 break;
3749 default:
3750 temp=sl;
3751 break;
3752 }
3753 emit_writeword(temp,(int)&reg_cop2c[copr]);
3754 assert(sl>=0);
3755 }
3756}
3757
3758void c2op_assemble(int i,struct regstat *i_regs)
3759{
3760 signed char temp=get_reg(i_regs->regmap,-1);
3761 u_int c2op=source[i]&0x3f;
3762 u_int hr,reglist=0;
3763 for(hr=0;hr<HOST_REGS;hr++) {
3764 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3765 }
3766 if(i==0||itype[i-1]!=C2OP)
3767 save_regs(reglist);
3768
3769 if (gte_handlers[c2op]!=NULL) {
3770 int cc=get_reg(i_regs->regmap,CCREG);
009faf24 3771 emit_movimm(source[i],1); // opcode
b9b61529 3772 if (cc>=0&&gte_cycletab[c2op])
009faf24 3773 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: could just adjust ccadj?
3774 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3775 emit_writeword(1,(int)&psxRegs.code);
b9b61529 3776 emit_call((int)gte_handlers[c2op]);
3777 }
3778
3779 if(i>=slen-1||itype[i+1]!=C2OP)
3780 restore_regs(reglist);
3781}
3782
3783void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3784{
3785 // XXX: should just just do the exception instead
3786 if(!cop1_usable) {
3787 int jaddr=(int)out;
3788 emit_jmp(0);
3789 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3790 cop1_usable=1;
3791 }
3792}
3793
57871462 3794void cop1_assemble(int i,struct regstat *i_regs)
3795{
3d624f89 3796#ifndef DISABLE_COP1
57871462 3797 // Check cop1 unusable
3798 if(!cop1_usable) {
3799 signed char rs=get_reg(i_regs->regmap,CSREG);
3800 assert(rs>=0);
3801 emit_testimm(rs,0x20000000);
3802 int jaddr=(int)out;
3803 emit_jeq(0);
3804 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3805 cop1_usable=1;
3806 }
3807 if (opcode2[i]==0) { // MFC1
3808 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3809 if(tl>=0) {
3810 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3811 emit_readword_indexed(0,tl,tl);
3812 }
3813 }
3814 else if (opcode2[i]==1) { // DMFC1
3815 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3816 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3817 if(tl>=0) {
3818 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3819 if(th>=0) emit_readword_indexed(4,tl,th);
3820 emit_readword_indexed(0,tl,tl);
3821 }
3822 }
3823 else if (opcode2[i]==4) { // MTC1
3824 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3825 signed char temp=get_reg(i_regs->regmap,-1);
3826 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3827 emit_writeword_indexed(sl,0,temp);
3828 }
3829 else if (opcode2[i]==5) { // DMTC1
3830 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3831 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3832 signed char temp=get_reg(i_regs->regmap,-1);
3833 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3834 emit_writeword_indexed(sh,4,temp);
3835 emit_writeword_indexed(sl,0,temp);
3836 }
3837 else if (opcode2[i]==2) // CFC1
3838 {
3839 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3840 if(tl>=0) {
3841 u_int copr=(source[i]>>11)&0x1f;
3842 if(copr==0) emit_readword((int)&FCR0,tl);
3843 if(copr==31) emit_readword((int)&FCR31,tl);
3844 }
3845 }
3846 else if (opcode2[i]==6) // CTC1
3847 {
3848 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3849 u_int copr=(source[i]>>11)&0x1f;
3850 assert(sl>=0);
3851 if(copr==31)
3852 {
3853 emit_writeword(sl,(int)&FCR31);
3854 // Set the rounding mode
3855 //FIXME
3856 //char temp=get_reg(i_regs->regmap,-1);
3857 //emit_andimm(sl,3,temp);
3858 //emit_fldcw_indexed((int)&rounding_modes,temp);
3859 }
3860 }
3d624f89 3861#else
3862 cop1_unusable(i, i_regs);
3863#endif
57871462 3864}
3865
3866void fconv_assemble_arm(int i,struct regstat *i_regs)
3867{
3d624f89 3868#ifndef DISABLE_COP1
57871462 3869 signed char temp=get_reg(i_regs->regmap,-1);
3870 assert(temp>=0);
3871 // Check cop1 unusable
3872 if(!cop1_usable) {
3873 signed char rs=get_reg(i_regs->regmap,CSREG);
3874 assert(rs>=0);
3875 emit_testimm(rs,0x20000000);
3876 int jaddr=(int)out;
3877 emit_jeq(0);
3878 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3879 cop1_usable=1;
3880 }
3881
3882 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3883 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3884 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3885 emit_flds(temp,15);
3886 emit_ftosizs(15,15); // float->int, truncate
3887 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3888 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3889 emit_fsts(15,temp);
3890 return;
3891 }
3892 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3893 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3894 emit_vldr(temp,7);
3895 emit_ftosizd(7,13); // double->int, truncate
3896 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3897 emit_fsts(13,temp);
3898 return;
3899 }
3900
3901 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3902 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3903 emit_flds(temp,13);
3904 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3905 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3906 emit_fsitos(13,15);
3907 emit_fsts(15,temp);
3908 return;
3909 }
3910 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3911 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3912 emit_flds(temp,13);
3913 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3914 emit_fsitod(13,7);
3915 emit_vstr(7,temp);
3916 return;
3917 }
3918
3919 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3920 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3921 emit_flds(temp,13);
3922 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3923 emit_fcvtds(13,7);
3924 emit_vstr(7,temp);
3925 return;
3926 }
3927 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3928 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3929 emit_vldr(temp,7);
3930 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3931 emit_fcvtsd(7,13);
3932 emit_fsts(13,temp);
3933 return;
3934 }
3935 #endif
3936
3937 // C emulation code
3938
3939 u_int hr,reglist=0;
3940 for(hr=0;hr<HOST_REGS;hr++) {
3941 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3942 }
3943 save_regs(reglist);
3944
3945 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3946 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3947 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3948 emit_call((int)cvt_s_w);
3949 }
3950 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3951 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3952 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3953 emit_call((int)cvt_d_w);
3954 }
3955 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3956 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3957 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3958 emit_call((int)cvt_s_l);
3959 }
3960 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3961 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3962 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3963 emit_call((int)cvt_d_l);
3964 }
3965
3966 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3967 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3968 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3969 emit_call((int)cvt_d_s);
3970 }
3971 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3972 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3973 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3974 emit_call((int)cvt_w_s);
3975 }
3976 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3977 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3978 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3979 emit_call((int)cvt_l_s);
3980 }
3981
3982 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3983 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3984 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3985 emit_call((int)cvt_s_d);
3986 }
3987 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3988 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3989 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3990 emit_call((int)cvt_w_d);
3991 }
3992 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3993 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3994 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3995 emit_call((int)cvt_l_d);
3996 }
3997
3998 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3999 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4000 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4001 emit_call((int)round_l_s);
4002 }
4003 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4004 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4005 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4006 emit_call((int)trunc_l_s);
4007 }
4008 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4009 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4010 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4011 emit_call((int)ceil_l_s);
4012 }
4013 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4014 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4015 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4016 emit_call((int)floor_l_s);
4017 }
4018 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4019 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4020 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4021 emit_call((int)round_w_s);
4022 }
4023 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4024 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4025 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4026 emit_call((int)trunc_w_s);
4027 }
4028 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4029 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4030 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4031 emit_call((int)ceil_w_s);
4032 }
4033 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4034 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4035 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4036 emit_call((int)floor_w_s);
4037 }
4038
4039 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4040 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4041 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4042 emit_call((int)round_l_d);
4043 }
4044 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4045 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4046 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4047 emit_call((int)trunc_l_d);
4048 }
4049 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4050 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4051 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4052 emit_call((int)ceil_l_d);
4053 }
4054 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4055 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4056 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4057 emit_call((int)floor_l_d);
4058 }
4059 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4060 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4061 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4062 emit_call((int)round_w_d);
4063 }
4064 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4065 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4066 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4067 emit_call((int)trunc_w_d);
4068 }
4069 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4070 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4071 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4072 emit_call((int)ceil_w_d);
4073 }
4074 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4075 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4076 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4077 emit_call((int)floor_w_d);
4078 }
4079
4080 restore_regs(reglist);
3d624f89 4081#else
4082 cop1_unusable(i, i_regs);
4083#endif
57871462 4084}
4085#define fconv_assemble fconv_assemble_arm
4086
4087void fcomp_assemble(int i,struct regstat *i_regs)
4088{
3d624f89 4089#ifndef DISABLE_COP1
57871462 4090 signed char fs=get_reg(i_regs->regmap,FSREG);
4091 signed char temp=get_reg(i_regs->regmap,-1);
4092 assert(temp>=0);
4093 // Check cop1 unusable
4094 if(!cop1_usable) {
4095 signed char cs=get_reg(i_regs->regmap,CSREG);
4096 assert(cs>=0);
4097 emit_testimm(cs,0x20000000);
4098 int jaddr=(int)out;
4099 emit_jeq(0);
4100 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4101 cop1_usable=1;
4102 }
4103
4104 if((source[i]&0x3f)==0x30) {
4105 emit_andimm(fs,~0x800000,fs);
4106 return;
4107 }
4108
4109 if((source[i]&0x3e)==0x38) {
4110 // sf/ngle - these should throw exceptions for NaNs
4111 emit_andimm(fs,~0x800000,fs);
4112 return;
4113 }
4114
4115 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4116 if(opcode2[i]==0x10) {
4117 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4118 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4119 emit_orimm(fs,0x800000,fs);
4120 emit_flds(temp,14);
4121 emit_flds(HOST_TEMPREG,15);
4122 emit_fcmps(14,15);
4123 emit_fmstat();
4124 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4125 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4126 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4127 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4128 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4129 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4130 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4131 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4132 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4133 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4134 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4135 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4136 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4137 return;
4138 }
4139 if(opcode2[i]==0x11) {
4140 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4141 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4142 emit_orimm(fs,0x800000,fs);
4143 emit_vldr(temp,6);
4144 emit_vldr(HOST_TEMPREG,7);
4145 emit_fcmpd(6,7);
4146 emit_fmstat();
4147 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4148 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4149 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4150 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4151 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4152 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4153 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4154 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4155 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4156 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4157 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4158 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4159 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4160 return;
4161 }
4162 #endif
4163
4164 // C only
4165
4166 u_int hr,reglist=0;
4167 for(hr=0;hr<HOST_REGS;hr++) {
4168 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4169 }
4170 reglist&=~(1<<fs);
4171 save_regs(reglist);
4172 if(opcode2[i]==0x10) {
4173 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4174 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4175 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4176 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4177 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4178 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4179 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4180 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4181 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4182 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4183 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4184 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4185 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4186 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4187 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4188 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4189 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4190 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4191 }
4192 if(opcode2[i]==0x11) {
4193 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4194 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4195 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4196 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4197 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4198 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4199 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4200 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4201 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4202 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4203 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4204 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4205 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4206 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4207 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4208 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4209 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4210 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4211 }
4212 restore_regs(reglist);
4213 emit_loadreg(FSREG,fs);
3d624f89 4214#else
4215 cop1_unusable(i, i_regs);
4216#endif
57871462 4217}
4218
4219void float_assemble(int i,struct regstat *i_regs)
4220{
3d624f89 4221#ifndef DISABLE_COP1
57871462 4222 signed char temp=get_reg(i_regs->regmap,-1);
4223 assert(temp>=0);
4224 // Check cop1 unusable
4225 if(!cop1_usable) {
4226 signed char cs=get_reg(i_regs->regmap,CSREG);
4227 assert(cs>=0);
4228 emit_testimm(cs,0x20000000);
4229 int jaddr=(int)out;
4230 emit_jeq(0);
4231 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4232 cop1_usable=1;
4233 }
4234
4235 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4236 if((source[i]&0x3f)==6) // mov
4237 {
4238 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4239 if(opcode2[i]==0x10) {
4240 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4241 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4242 emit_readword_indexed(0,temp,temp);
4243 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4244 }
4245 if(opcode2[i]==0x11) {
4246 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4247 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4248 emit_vldr(temp,7);
4249 emit_vstr(7,HOST_TEMPREG);
4250 }
4251 }
4252 return;
4253 }
4254
4255 if((source[i]&0x3f)>3)
4256 {
4257 if(opcode2[i]==0x10) {
4258 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4259 emit_flds(temp,15);
4260 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4261 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4262 }
4263 if((source[i]&0x3f)==4) // sqrt
4264 emit_fsqrts(15,15);
4265 if((source[i]&0x3f)==5) // abs
4266 emit_fabss(15,15);
4267 if((source[i]&0x3f)==7) // neg
4268 emit_fnegs(15,15);
4269 emit_fsts(15,temp);
4270 }
4271 if(opcode2[i]==0x11) {
4272 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4273 emit_vldr(temp,7);
4274 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4275 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4276 }
4277 if((source[i]&0x3f)==4) // sqrt
4278 emit_fsqrtd(7,7);
4279 if((source[i]&0x3f)==5) // abs
4280 emit_fabsd(7,7);
4281 if((source[i]&0x3f)==7) // neg
4282 emit_fnegd(7,7);
4283 emit_vstr(7,temp);
4284 }
4285 return;
4286 }
4287 if((source[i]&0x3f)<4)
4288 {
4289 if(opcode2[i]==0x10) {
4290 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4291 }
4292 if(opcode2[i]==0x11) {
4293 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4294 }
4295 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4296 if(opcode2[i]==0x10) {
4297 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4298 emit_flds(temp,15);
4299 emit_flds(HOST_TEMPREG,13);
4300 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4301 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4302 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4303 }
4304 }
4305 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4306 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4307 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4308 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4309 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4310 emit_fsts(15,HOST_TEMPREG);
4311 }else{
4312 emit_fsts(15,temp);
4313 }
4314 }
4315 else if(opcode2[i]==0x11) {
4316 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4317 emit_vldr(temp,7);
4318 emit_vldr(HOST_TEMPREG,6);
4319 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4320 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4321 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4322 }
4323 }
4324 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4325 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4326 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4327 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4328 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4329 emit_vstr(7,HOST_TEMPREG);
4330 }else{
4331 emit_vstr(7,temp);
4332 }
4333 }
4334 }
4335 else {
4336 if(opcode2[i]==0x10) {
4337 emit_flds(temp,15);
4338 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4339 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4340 }
4341 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4342 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4343 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4344 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4345 emit_fsts(15,temp);
4346 }
4347 else if(opcode2[i]==0x11) {
4348 emit_vldr(temp,7);
4349 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4350 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4351 }
4352 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4353 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4354 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4355 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4356 emit_vstr(7,temp);
4357 }
4358 }
4359 return;
4360 }
4361 #endif
4362
4363 u_int hr,reglist=0;
4364 for(hr=0;hr<HOST_REGS;hr++) {
4365 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4366 }
4367 if(opcode2[i]==0x10) { // Single precision
4368 save_regs(reglist);
4369 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4370 if((source[i]&0x3f)<4) {
4371 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4372 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4373 }else{
4374 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4375 }
4376 switch(source[i]&0x3f)
4377 {
4378 case 0x00: emit_call((int)add_s);break;
4379 case 0x01: emit_call((int)sub_s);break;
4380 case 0x02: emit_call((int)mul_s);break;
4381 case 0x03: emit_call((int)div_s);break;
4382 case 0x04: emit_call((int)sqrt_s);break;
4383 case 0x05: emit_call((int)abs_s);break;
4384 case 0x06: emit_call((int)mov_s);break;
4385 case 0x07: emit_call((int)neg_s);break;
4386 }
4387 restore_regs(reglist);
4388 }
4389 if(opcode2[i]==0x11) { // Double precision
4390 save_regs(reglist);
4391 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4392 if((source[i]&0x3f)<4) {
4393 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4394 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4395 }else{
4396 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4397 }
4398 switch(source[i]&0x3f)
4399 {
4400 case 0x00: emit_call((int)add_d);break;
4401 case 0x01: emit_call((int)sub_d);break;
4402 case 0x02: emit_call((int)mul_d);break;
4403 case 0x03: emit_call((int)div_d);break;
4404 case 0x04: emit_call((int)sqrt_d);break;
4405 case 0x05: emit_call((int)abs_d);break;
4406 case 0x06: emit_call((int)mov_d);break;
4407 case 0x07: emit_call((int)neg_d);break;
4408 }
4409 restore_regs(reglist);
4410 }
3d624f89 4411#else
4412 cop1_unusable(i, i_regs);
4413#endif
57871462 4414}
4415
4416void multdiv_assemble_arm(int i,struct regstat *i_regs)
4417{
4418 // case 0x18: MULT
4419 // case 0x19: MULTU
4420 // case 0x1A: DIV
4421 // case 0x1B: DIVU
4422 // case 0x1C: DMULT
4423 // case 0x1D: DMULTU
4424 // case 0x1E: DDIV
4425 // case 0x1F: DDIVU
4426 if(rs1[i]&&rs2[i])
4427 {
4428 if((opcode2[i]&4)==0) // 32-bit
4429 {
4430 if(opcode2[i]==0x18) // MULT
4431 {
4432 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4433 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4434 signed char hi=get_reg(i_regs->regmap,HIREG);
4435 signed char lo=get_reg(i_regs->regmap,LOREG);
4436 assert(m1>=0);
4437 assert(m2>=0);
4438 assert(hi>=0);
4439 assert(lo>=0);
4440 emit_smull(m1,m2,hi,lo);
4441 }
4442 if(opcode2[i]==0x19) // MULTU
4443 {
4444 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4445 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4446 signed char hi=get_reg(i_regs->regmap,HIREG);
4447 signed char lo=get_reg(i_regs->regmap,LOREG);
4448 assert(m1>=0);
4449 assert(m2>=0);
4450 assert(hi>=0);
4451 assert(lo>=0);
4452 emit_umull(m1,m2,hi,lo);
4453 }
4454 if(opcode2[i]==0x1A) // DIV
4455 {
4456 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4457 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4458 assert(d1>=0);
4459 assert(d2>=0);
4460 signed char quotient=get_reg(i_regs->regmap,LOREG);
4461 signed char remainder=get_reg(i_regs->regmap,HIREG);
4462 assert(quotient>=0);
4463 assert(remainder>=0);
4464 emit_movs(d1,remainder);
4465 emit_negmi(remainder,remainder);
4466 emit_movs(d2,HOST_TEMPREG);
4467 emit_jeq((int)out+52); // Division by zero
4468 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4469 emit_clz(HOST_TEMPREG,quotient);
4470 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4471 emit_orimm(quotient,1<<31,quotient);
4472 emit_shr(quotient,quotient,quotient);
4473 emit_cmp(remainder,HOST_TEMPREG);
4474 emit_subcs(remainder,HOST_TEMPREG,remainder);
4475 emit_adcs(quotient,quotient,quotient);
4476 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4477 emit_jcc((int)out-16); // -4
4478 emit_teq(d1,d2);
4479 emit_negmi(quotient,quotient);
4480 emit_test(d1,d1);
4481 emit_negmi(remainder,remainder);
4482 }
4483 if(opcode2[i]==0x1B) // DIVU
4484 {
4485 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4486 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4487 assert(d1>=0);
4488 assert(d2>=0);
4489 signed char quotient=get_reg(i_regs->regmap,LOREG);
4490 signed char remainder=get_reg(i_regs->regmap,HIREG);
4491 assert(quotient>=0);
4492 assert(remainder>=0);
4493 emit_test(d2,d2);
4494 emit_jeq((int)out+44); // Division by zero
4495 emit_clz(d2,HOST_TEMPREG);
4496 emit_movimm(1<<31,quotient);
4497 emit_shl(d2,HOST_TEMPREG,d2);
4498 emit_mov(d1,remainder);
4499 emit_shr(quotient,HOST_TEMPREG,quotient);
4500 emit_cmp(remainder,d2);
4501 emit_subcs(remainder,d2,remainder);
4502 emit_adcs(quotient,quotient,quotient);
4503 emit_shrcc_imm(d2,1,d2);
4504 emit_jcc((int)out-16); // -4
4505 }
4506 }
4507 else // 64-bit
4508 {
4509 if(opcode2[i]==0x1C) // DMULT
4510 {
4511 assert(opcode2[i]!=0x1C);
4512 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4513 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4514 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4515 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4516 assert(m1h>=0);
4517 assert(m2h>=0);
4518 assert(m1l>=0);
4519 assert(m2l>=0);
4520 emit_pushreg(m2h);
4521 emit_pushreg(m2l);
4522 emit_pushreg(m1h);
4523 emit_pushreg(m1l);
4524 emit_call((int)&mult64);
4525 emit_popreg(m1l);
4526 emit_popreg(m1h);
4527 emit_popreg(m2l);
4528 emit_popreg(m2h);
4529 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4530 signed char hil=get_reg(i_regs->regmap,HIREG);
4531 if(hih>=0) emit_loadreg(HIREG|64,hih);
4532 if(hil>=0) emit_loadreg(HIREG,hil);
4533 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4534 signed char lol=get_reg(i_regs->regmap,LOREG);
4535 if(loh>=0) emit_loadreg(LOREG|64,loh);
4536 if(lol>=0) emit_loadreg(LOREG,lol);
4537 }
4538 if(opcode2[i]==0x1D) // DMULTU
4539 {
4540 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4541 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4542 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4543 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4544 assert(m1h>=0);
4545 assert(m2h>=0);
4546 assert(m1l>=0);
4547 assert(m2l>=0);
4548 save_regs(0x100f);
4549 if(m1l!=0) emit_mov(m1l,0);
4550 if(m1h==0) emit_readword((int)&dynarec_local,1);
4551 else if(m1h>1) emit_mov(m1h,1);
4552 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4553 else if(m2l>2) emit_mov(m2l,2);
4554 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4555 else if(m2h>3) emit_mov(m2h,3);
4556 emit_call((int)&multu64);
4557 restore_regs(0x100f);
4558 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4559 signed char hil=get_reg(i_regs->regmap,HIREG);
4560 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4561 signed char lol=get_reg(i_regs->regmap,LOREG);
4562 /*signed char temp=get_reg(i_regs->regmap,-1);
4563 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4564 signed char rl=get_reg(i_regs->regmap,HIREG);
4565 assert(m1h>=0);
4566 assert(m2h>=0);
4567 assert(m1l>=0);
4568 assert(m2l>=0);
4569 assert(temp>=0);
4570 //emit_mov(m1l,EAX);
4571 //emit_mul(m2l);
4572 emit_umull(rl,rh,m1l,m2l);
4573 emit_storereg(LOREG,rl);
4574 emit_mov(rh,temp);
4575 //emit_mov(m1h,EAX);
4576 //emit_mul(m2l);
4577 emit_umull(rl,rh,m1h,m2l);
4578 emit_adds(rl,temp,temp);
4579 emit_adcimm(rh,0,rh);
4580 emit_storereg(HIREG,rh);
4581 //emit_mov(m2h,EAX);
4582 //emit_mul(m1l);
4583 emit_umull(rl,rh,m1l,m2h);
4584 emit_adds(rl,temp,temp);
4585 emit_adcimm(rh,0,rh);
4586 emit_storereg(LOREG|64,temp);
4587 emit_mov(rh,temp);
4588 //emit_mov(m2h,EAX);
4589 //emit_mul(m1h);
4590 emit_umull(rl,rh,m1h,m2h);
4591 emit_adds(rl,temp,rl);
4592 emit_loadreg(HIREG,temp);
4593 emit_adcimm(rh,0,rh);
4594 emit_adds(rl,temp,rl);
4595 emit_adcimm(rh,0,rh);
4596 // DEBUG
4597 /*
4598 emit_pushreg(m2h);
4599 emit_pushreg(m2l);
4600 emit_pushreg(m1h);
4601 emit_pushreg(m1l);
4602 emit_call((int)&multu64);
4603 emit_popreg(m1l);
4604 emit_popreg(m1h);
4605 emit_popreg(m2l);
4606 emit_popreg(m2h);
4607 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4608 signed char hil=get_reg(i_regs->regmap,HIREG);
4609 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4610 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4611 */
4612 // Shouldn't be necessary
4613 //char loh=get_reg(i_regs->regmap,LOREG|64);
4614 //char lol=get_reg(i_regs->regmap,LOREG);
4615 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4616 //if(lol>=0) emit_loadreg(LOREG,lol);
4617 }
4618 if(opcode2[i]==0x1E) // DDIV
4619 {
4620 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4621 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4622 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4623 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4624 assert(d1h>=0);
4625 assert(d2h>=0);
4626 assert(d1l>=0);
4627 assert(d2l>=0);
4628 save_regs(0x100f);
4629 if(d1l!=0) emit_mov(d1l,0);
4630 if(d1h==0) emit_readword((int)&dynarec_local,1);
4631 else if(d1h>1) emit_mov(d1h,1);
4632 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4633 else if(d2l>2) emit_mov(d2l,2);
4634 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4635 else if(d2h>3) emit_mov(d2h,3);
4636 emit_call((int)&div64);
4637 restore_regs(0x100f);
4638 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4639 signed char hil=get_reg(i_regs->regmap,HIREG);
4640 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4641 signed char lol=get_reg(i_regs->regmap,LOREG);
4642 if(hih>=0) emit_loadreg(HIREG|64,hih);
4643 if(hil>=0) emit_loadreg(HIREG,hil);
4644 if(loh>=0) emit_loadreg(LOREG|64,loh);
4645 if(lol>=0) emit_loadreg(LOREG,lol);
4646 }
4647 if(opcode2[i]==0x1F) // DDIVU
4648 {
4649 //u_int hr,reglist=0;
4650 //for(hr=0;hr<HOST_REGS;hr++) {
4651 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4652 //}
4653 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4654 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4655 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4656 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4657 assert(d1h>=0);
4658 assert(d2h>=0);
4659 assert(d1l>=0);
4660 assert(d2l>=0);
4661 save_regs(0x100f);
4662 if(d1l!=0) emit_mov(d1l,0);
4663 if(d1h==0) emit_readword((int)&dynarec_local,1);
4664 else if(d1h>1) emit_mov(d1h,1);
4665 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4666 else if(d2l>2) emit_mov(d2l,2);
4667 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4668 else if(d2h>3) emit_mov(d2h,3);
4669 emit_call((int)&divu64);
4670 restore_regs(0x100f);
4671 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4672 signed char hil=get_reg(i_regs->regmap,HIREG);
4673 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4674 signed char lol=get_reg(i_regs->regmap,LOREG);
4675 if(hih>=0) emit_loadreg(HIREG|64,hih);
4676 if(hil>=0) emit_loadreg(HIREG,hil);
4677 if(loh>=0) emit_loadreg(LOREG|64,loh);
4678 if(lol>=0) emit_loadreg(LOREG,lol);
4679 }
4680 }
4681 }
4682 else
4683 {
4684 // Multiply by zero is zero.
4685 // MIPS does not have a divide by zero exception.
4686 // The result is undefined, we return zero.
4687 signed char hr=get_reg(i_regs->regmap,HIREG);
4688 signed char lr=get_reg(i_regs->regmap,LOREG);
4689 if(hr>=0) emit_zeroreg(hr);
4690 if(lr>=0) emit_zeroreg(lr);
4691 }
4692}
4693#define multdiv_assemble multdiv_assemble_arm
4694
4695void do_preload_rhash(int r) {
4696 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4697 // register. On ARM the hash can be done with a single instruction (below)
4698}
4699
4700void do_preload_rhtbl(int ht) {
4701 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4702}
4703
4704void do_rhash(int rs,int rh) {
4705 emit_andimm(rs,0xf8,rh);
4706}
4707
4708void do_miniht_load(int ht,int rh) {
4709 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4710 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4711}
4712
4713void do_miniht_jump(int rs,int rh,int ht) {
4714 emit_cmp(rh,rs);
4715 emit_ldreq_indexed(ht,4,15);
4716 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4717 emit_mov(rs,7);
4718 emit_jmp(jump_vaddr_reg[7]);
4719 #else
4720 emit_jmp(jump_vaddr_reg[rs]);
4721 #endif
4722}
4723
4724void do_miniht_insert(u_int return_address,int rt,int temp) {
4725 #ifdef ARMv5_ONLY
4726 emit_movimm(return_address,rt); // PC into link register
4727 add_to_linker((int)out,return_address,1);
4728 emit_pcreladdr(temp);
4729 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4730 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4731 #else
4732 emit_movw(return_address&0x0000FFFF,rt);
4733 add_to_linker((int)out,return_address,1);
4734 emit_pcreladdr(temp);
4735 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4736 emit_movt(return_address&0xFFFF0000,rt);
4737 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4738 #endif
4739}
4740
4741// Sign-extend to 64 bits and write out upper half of a register
4742// This is useful where we have a 32-bit value in a register, and want to
4743// keep it in a 32-bit register, but can't guarantee that it won't be read
4744// as a 64-bit value later.
4745void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4746{
24385cae 4747#ifndef FORCE32
57871462 4748 if(is32_pre==is32) return;
4749 int hr,reg;
4750 for(hr=0;hr<HOST_REGS;hr++) {
4751 if(hr!=EXCLUDE_REG) {
4752 //if(pre[hr]==entry[hr]) {
4753 if((reg=pre[hr])>=0) {
4754 if((dirty>>hr)&1) {
4755 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4756 emit_sarimm(hr,31,HOST_TEMPREG);
4757 emit_storereg(reg|64,HOST_TEMPREG);
4758 }
4759 }
4760 }
4761 //}
4762 }
4763 }
24385cae 4764#endif
57871462 4765}
4766
4767void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4768{
4769 //if(dirty_pre==dirty) return;
4770 int hr,reg,new_hr;
4771 for(hr=0;hr<HOST_REGS;hr++) {
4772 if(hr!=EXCLUDE_REG) {
4773 reg=pre[hr];
4774 if(((~u)>>(reg&63))&1) {
4775 if(reg==entry[hr]||(reg>0&&entry[hr]<0)) {
4776 if(((dirty_pre&~dirty)>>hr)&1) {
4777 if(reg>0&&reg<34) {
4778 emit_storereg(reg,hr);
4779 if( ((is32_pre&~uu)>>reg)&1 ) {
4780 emit_sarimm(hr,31,HOST_TEMPREG);
4781 emit_storereg(reg|64,HOST_TEMPREG);
4782 }
4783 }
4784 else if(reg>=64) {
4785 emit_storereg(reg,hr);
4786 }
4787 }
4788 }
4789 else // Check if register moved to a different register
4790 if((new_hr=get_reg(entry,reg))>=0) {
4791 if((dirty_pre>>hr)&(~dirty>>new_hr)&1) {
4792 if(reg>0&&reg<34) {
4793 emit_storereg(reg,hr);
4794 if( ((is32_pre&~uu)>>reg)&1 ) {
4795 emit_sarimm(hr,31,HOST_TEMPREG);
4796 emit_storereg(reg|64,HOST_TEMPREG);
4797 }
4798 }
4799 else if(reg>=64) {
4800 emit_storereg(reg,hr);
4801 }
4802 }
4803 }
4804 }
4805 }
4806 }
4807}
4808
4809
4810/* using strd could possibly help but you'd have to allocate registers in pairs
4811void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4812{
4813 int hr;
4814 int wrote=-1;
4815 for(hr=HOST_REGS-1;hr>=0;hr--) {
4816 if(hr!=EXCLUDE_REG) {
4817 if(pre[hr]!=entry[hr]) {
4818 if(pre[hr]>=0) {
4819 if((dirty>>hr)&1) {
4820 if(get_reg(entry,pre[hr])<0) {
4821 if(pre[hr]<64) {
4822 if(!((u>>pre[hr])&1)) {
4823 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4824 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4825 emit_sarimm(hr,31,hr+1);
4826 emit_strdreg(pre[hr],hr);
4827 }
4828 else
4829 emit_storereg(pre[hr],hr);
4830 }else{
4831 emit_storereg(pre[hr],hr);
4832 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4833 emit_sarimm(hr,31,hr);
4834 emit_storereg(pre[hr]|64,hr);
4835 }
4836 }
4837 }
4838 }else{
4839 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4840 emit_storereg(pre[hr],hr);
4841 }
4842 }
4843 wrote=hr;
4844 }
4845 }
4846 }
4847 }
4848 }
4849 }
4850 for(hr=0;hr<HOST_REGS;hr++) {
4851 if(hr!=EXCLUDE_REG) {
4852 if(pre[hr]!=entry[hr]) {
4853 if(pre[hr]>=0) {
4854 int nr;
4855 if((nr=get_reg(entry,pre[hr]))>=0) {
4856 emit_mov(hr,nr);
4857 }
4858 }
4859 }
4860 }
4861 }
4862}
4863#define wb_invalidate wb_invalidate_arm
4864*/
4865
dd3a91a1 4866// Clearing the cache is rather slow on ARM Linux, so mark the areas
4867// that need to be cleared, and then only clear these areas once.
4868void do_clear_cache()
4869{
4870 int i,j;
4871 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4872 {
4873 u_int bitmap=needs_clear_cache[i];
4874 if(bitmap) {
4875 u_int start,end;
4876 for(j=0;j<32;j++)
4877 {
4878 if(bitmap&(1<<j)) {
4879 start=BASE_ADDR+i*131072+j*4096;
4880 end=start+4095;
4881 j++;
4882 while(j<32) {
4883 if(bitmap&(1<<j)) {
4884 end+=4096;
4885 j++;
4886 }else{
4887 __clear_cache((void *)start,(void *)end);
4888 break;
4889 }
4890 }
4891 }
4892 }
4893 needs_clear_cache[i]=0;
4894 }
4895 }
4896}
4897
57871462 4898// CPU-architecture-specific initialization
4899void arch_init() {
3d624f89 4900#ifndef DISABLE_COP1
57871462 4901 rounding_modes[0]=0x0<<22; // round
4902 rounding_modes[1]=0x3<<22; // trunc
4903 rounding_modes[2]=0x1<<22; // ceil
4904 rounding_modes[3]=0x2<<22; // floor
3d624f89 4905#endif
57871462 4906}
b9b61529 4907
4908// vim:shiftwidth=2:expandtab