drc: merge Ari64's patch: 07_clear_cache
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69#include "fpu.h"
70
dd3a91a1 71unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
72
57871462 73/* Linker */
74
75void set_jump_target(int addr,u_int target)
76{
77 u_char *ptr=(u_char *)addr;
78 u_int *ptr2=(u_int *)ptr;
79 if(ptr[3]==0xe2) {
80 assert((target-(u_int)ptr2-8)<1024);
81 assert((addr&3)==0);
82 assert((target&3)==0);
83 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
84 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
85 }
86 else if(ptr[3]==0x72) {
87 // generated by emit_jno_unlikely
88 if((target-(u_int)ptr2-8)<1024) {
89 assert((addr&3)==0);
90 assert((target&3)==0);
91 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
92 }
93 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
94 assert((addr&3)==0);
95 assert((target&3)==0);
96 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
97 }
98 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
99 }
100 else {
101 assert((ptr[3]&0x0e)==0xa);
102 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
103 }
104}
105
106// This optionally copies the instruction from the target of the branch into
107// the space before the branch. Works, but the difference in speed is
108// usually insignificant.
109void set_jump_target_fillslot(int addr,u_int target,int copy)
110{
111 u_char *ptr=(u_char *)addr;
112 u_int *ptr2=(u_int *)ptr;
113 assert(!copy||ptr2[-1]==0xe28dd000);
114 if(ptr[3]==0xe2) {
115 assert(!copy);
116 assert((target-(u_int)ptr2-8)<4096);
117 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
118 }
119 else {
120 assert((ptr[3]&0x0e)==0xa);
121 u_int target_insn=*(u_int *)target;
122 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
123 copy=0;
124 }
125 if((target_insn&0x0c100000)==0x04100000) { // Load
126 copy=0;
127 }
128 if(target_insn&0x08000000) {
129 copy=0;
130 }
131 if(copy) {
132 ptr2[-1]=target_insn;
133 target+=4;
134 }
135 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
136 }
137}
138
139/* Literal pool */
140add_literal(int addr,int val)
141{
142 literals[literalcount][0]=addr;
143 literals[literalcount][1]=val;
144 literalcount++;
145}
146
f76eeef9 147void *kill_pointer(void *stub)
57871462 148{
149 int *ptr=(int *)(stub+4);
150 assert((*ptr&0x0ff00000)==0x05900000);
151 u_int offset=*ptr&0xfff;
152 int **l_ptr=(void *)ptr+offset+8;
153 int *i_ptr=*l_ptr;
154 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 155 return i_ptr;
57871462 156}
157
158int get_pointer(void *stub)
159{
160 //printf("get_pointer(%x)\n",(int)stub);
161 int *ptr=(int *)(stub+4);
162 assert((*ptr&0x0ff00000)==0x05900000);
163 u_int offset=*ptr&0xfff;
164 int **l_ptr=(void *)ptr+offset+8;
165 int *i_ptr=*l_ptr;
166 assert((*i_ptr&0x0f000000)==0x0a000000);
167 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
168}
169
170// Find the "clean" entry point from a "dirty" entry point
171// by skipping past the call to verify_code
172u_int get_clean_addr(int addr)
173{
174 int *ptr=(int *)addr;
175 #ifdef ARMv5_ONLY
176 ptr+=4;
177 #else
178 ptr+=6;
179 #endif
180 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
181 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
182 ptr++;
183 if((*ptr&0xFF000000)==0xea000000) {
184 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
185 }
186 return (u_int)ptr;
187}
188
189int verify_dirty(int addr)
190{
191 u_int *ptr=(u_int *)addr;
192 #ifdef ARMv5_ONLY
193 // get from literal pool
194 assert((*ptr&0xFFF00000)==0xe5900000);
195 u_int offset=*ptr&0xfff;
196 u_int *l_ptr=(void *)ptr+offset+8;
197 u_int source=l_ptr[0];
198 u_int copy=l_ptr[1];
199 u_int len=l_ptr[2];
200 ptr+=4;
201 #else
202 // ARMv7 movw/movt
203 assert((*ptr&0xFFF00000)==0xe3000000);
204 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
205 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
206 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
207 ptr+=6;
208 #endif
209 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
210 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 211 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 212 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
213 unsigned int page=source>>12;
214 unsigned int map_value=memory_map[page];
215 if(map_value>=0x80000000) return 0;
216 while(page<((source+len-1)>>12)) {
217 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
218 }
219 source = source+(map_value<<2);
220 }
221 //printf("verify_dirty: %x %x %x\n",source,copy,len);
222 return !memcmp((void *)source,(void *)copy,len);
223}
224
225// This doesn't necessarily find all clean entry points, just
226// guarantees that it's not dirty
227int isclean(int addr)
228{
229 #ifdef ARMv5_ONLY
230 int *ptr=((u_int *)addr)+4;
231 #else
232 int *ptr=((u_int *)addr)+6;
233 #endif
234 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
235 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
236 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
237 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
238 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
239 return 1;
240}
241
242void get_bounds(int addr,u_int *start,u_int *end)
243{
244 u_int *ptr=(u_int *)addr;
245 #ifdef ARMv5_ONLY
246 // get from literal pool
247 assert((*ptr&0xFFF00000)==0xe5900000);
248 u_int offset=*ptr&0xfff;
249 u_int *l_ptr=(void *)ptr+offset+8;
250 u_int source=l_ptr[0];
251 //u_int copy=l_ptr[1];
252 u_int len=l_ptr[2];
253 ptr+=4;
254 #else
255 // ARMv7 movw/movt
256 assert((*ptr&0xFFF00000)==0xe3000000);
257 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
258 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
259 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
260 ptr+=6;
261 #endif
262 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
263 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 264 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 265 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
266 if(memory_map[source>>12]>=0x80000000) source = 0;
267 else source = source+(memory_map[source>>12]<<2);
268 }
269 *start=source;
270 *end=source+len;
271}
272
273/* Register allocation */
274
275// Note: registers are allocated clean (unmodified state)
276// if you intend to modify the register, you must call dirty_reg().
277void alloc_reg(struct regstat *cur,int i,signed char reg)
278{
279 int r,hr;
280 int preferred_reg = (reg&7);
281 if(reg==CCREG) preferred_reg=HOST_CCREG;
282 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
283
284 // Don't allocate unused registers
285 if((cur->u>>reg)&1) return;
286
287 // see if it's already allocated
288 for(hr=0;hr<HOST_REGS;hr++)
289 {
290 if(cur->regmap[hr]==reg) return;
291 }
292
293 // Keep the same mapping if the register was already allocated in a loop
294 preferred_reg = loop_reg(i,reg,preferred_reg);
295
296 // Try to allocate the preferred register
297 if(cur->regmap[preferred_reg]==-1) {
298 cur->regmap[preferred_reg]=reg;
299 cur->dirty&=~(1<<preferred_reg);
300 cur->isconst&=~(1<<preferred_reg);
301 return;
302 }
303 r=cur->regmap[preferred_reg];
304 if(r<64&&((cur->u>>r)&1)) {
305 cur->regmap[preferred_reg]=reg;
306 cur->dirty&=~(1<<preferred_reg);
307 cur->isconst&=~(1<<preferred_reg);
308 return;
309 }
310 if(r>=64&&((cur->uu>>(r&63))&1)) {
311 cur->regmap[preferred_reg]=reg;
312 cur->dirty&=~(1<<preferred_reg);
313 cur->isconst&=~(1<<preferred_reg);
314 return;
315 }
316
317 // Clear any unneeded registers
318 // We try to keep the mapping consistent, if possible, because it
319 // makes branches easier (especially loops). So we try to allocate
320 // first (see above) before removing old mappings. If this is not
321 // possible then go ahead and clear out the registers that are no
322 // longer needed.
323 for(hr=0;hr<HOST_REGS;hr++)
324 {
325 r=cur->regmap[hr];
326 if(r>=0) {
327 if(r<64) {
328 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
329 }
330 else
331 {
332 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
333 }
334 }
335 }
336 // Try to allocate any available register, but prefer
337 // registers that have not been used recently.
338 if(i>0) {
339 for(hr=0;hr<HOST_REGS;hr++) {
340 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
341 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
342 cur->regmap[hr]=reg;
343 cur->dirty&=~(1<<hr);
344 cur->isconst&=~(1<<hr);
345 return;
346 }
347 }
348 }
349 }
350 // Try to allocate any available register
351 for(hr=0;hr<HOST_REGS;hr++) {
352 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
353 cur->regmap[hr]=reg;
354 cur->dirty&=~(1<<hr);
355 cur->isconst&=~(1<<hr);
356 return;
357 }
358 }
359
360 // Ok, now we have to evict someone
361 // Pick a register we hopefully won't need soon
362 u_char hsn[MAXREG+1];
363 memset(hsn,10,sizeof(hsn));
364 int j;
365 lsn(hsn,i,&preferred_reg);
366 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
367 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
368 if(i>0) {
369 // Don't evict the cycle count at entry points, otherwise the entry
370 // stub will have to write it.
371 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
372 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
373 for(j=10;j>=3;j--)
374 {
375 // Alloc preferred register if available
376 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
377 for(hr=0;hr<HOST_REGS;hr++) {
378 // Evict both parts of a 64-bit register
379 if((cur->regmap[hr]&63)==r) {
380 cur->regmap[hr]=-1;
381 cur->dirty&=~(1<<hr);
382 cur->isconst&=~(1<<hr);
383 }
384 }
385 cur->regmap[preferred_reg]=reg;
386 return;
387 }
388 for(r=1;r<=MAXREG;r++)
389 {
390 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
391 for(hr=0;hr<HOST_REGS;hr++) {
392 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
393 if(cur->regmap[hr]==r+64) {
394 cur->regmap[hr]=reg;
395 cur->dirty&=~(1<<hr);
396 cur->isconst&=~(1<<hr);
397 return;
398 }
399 }
400 }
401 for(hr=0;hr<HOST_REGS;hr++) {
402 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
403 if(cur->regmap[hr]==r) {
404 cur->regmap[hr]=reg;
405 cur->dirty&=~(1<<hr);
406 cur->isconst&=~(1<<hr);
407 return;
408 }
409 }
410 }
411 }
412 }
413 }
414 }
415 for(j=10;j>=0;j--)
416 {
417 for(r=1;r<=MAXREG;r++)
418 {
419 if(hsn[r]==j) {
420 for(hr=0;hr<HOST_REGS;hr++) {
421 if(cur->regmap[hr]==r+64) {
422 cur->regmap[hr]=reg;
423 cur->dirty&=~(1<<hr);
424 cur->isconst&=~(1<<hr);
425 return;
426 }
427 }
428 for(hr=0;hr<HOST_REGS;hr++) {
429 if(cur->regmap[hr]==r) {
430 cur->regmap[hr]=reg;
431 cur->dirty&=~(1<<hr);
432 cur->isconst&=~(1<<hr);
433 return;
434 }
435 }
436 }
437 }
438 }
439 printf("This shouldn't happen (alloc_reg)");exit(1);
440}
441
442void alloc_reg64(struct regstat *cur,int i,signed char reg)
443{
444 int preferred_reg = 8+(reg&1);
445 int r,hr;
446
447 // allocate the lower 32 bits
448 alloc_reg(cur,i,reg);
449
450 // Don't allocate unused registers
451 if((cur->uu>>reg)&1) return;
452
453 // see if the upper half is already allocated
454 for(hr=0;hr<HOST_REGS;hr++)
455 {
456 if(cur->regmap[hr]==reg+64) return;
457 }
458
459 // Keep the same mapping if the register was already allocated in a loop
460 preferred_reg = loop_reg(i,reg,preferred_reg);
461
462 // Try to allocate the preferred register
463 if(cur->regmap[preferred_reg]==-1) {
464 cur->regmap[preferred_reg]=reg|64;
465 cur->dirty&=~(1<<preferred_reg);
466 cur->isconst&=~(1<<preferred_reg);
467 return;
468 }
469 r=cur->regmap[preferred_reg];
470 if(r<64&&((cur->u>>r)&1)) {
471 cur->regmap[preferred_reg]=reg|64;
472 cur->dirty&=~(1<<preferred_reg);
473 cur->isconst&=~(1<<preferred_reg);
474 return;
475 }
476 if(r>=64&&((cur->uu>>(r&63))&1)) {
477 cur->regmap[preferred_reg]=reg|64;
478 cur->dirty&=~(1<<preferred_reg);
479 cur->isconst&=~(1<<preferred_reg);
480 return;
481 }
482
483 // Clear any unneeded registers
484 // We try to keep the mapping consistent, if possible, because it
485 // makes branches easier (especially loops). So we try to allocate
486 // first (see above) before removing old mappings. If this is not
487 // possible then go ahead and clear out the registers that are no
488 // longer needed.
489 for(hr=HOST_REGS-1;hr>=0;hr--)
490 {
491 r=cur->regmap[hr];
492 if(r>=0) {
493 if(r<64) {
494 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
495 }
496 else
497 {
498 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
499 }
500 }
501 }
502 // Try to allocate any available register, but prefer
503 // registers that have not been used recently.
504 if(i>0) {
505 for(hr=0;hr<HOST_REGS;hr++) {
506 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
507 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
508 cur->regmap[hr]=reg|64;
509 cur->dirty&=~(1<<hr);
510 cur->isconst&=~(1<<hr);
511 return;
512 }
513 }
514 }
515 }
516 // Try to allocate any available register
517 for(hr=0;hr<HOST_REGS;hr++) {
518 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
519 cur->regmap[hr]=reg|64;
520 cur->dirty&=~(1<<hr);
521 cur->isconst&=~(1<<hr);
522 return;
523 }
524 }
525
526 // Ok, now we have to evict someone
527 // Pick a register we hopefully won't need soon
528 u_char hsn[MAXREG+1];
529 memset(hsn,10,sizeof(hsn));
530 int j;
531 lsn(hsn,i,&preferred_reg);
532 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
533 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
534 if(i>0) {
535 // Don't evict the cycle count at entry points, otherwise the entry
536 // stub will have to write it.
537 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
538 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
539 for(j=10;j>=3;j--)
540 {
541 // Alloc preferred register if available
542 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
543 for(hr=0;hr<HOST_REGS;hr++) {
544 // Evict both parts of a 64-bit register
545 if((cur->regmap[hr]&63)==r) {
546 cur->regmap[hr]=-1;
547 cur->dirty&=~(1<<hr);
548 cur->isconst&=~(1<<hr);
549 }
550 }
551 cur->regmap[preferred_reg]=reg|64;
552 return;
553 }
554 for(r=1;r<=MAXREG;r++)
555 {
556 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
557 for(hr=0;hr<HOST_REGS;hr++) {
558 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
559 if(cur->regmap[hr]==r+64) {
560 cur->regmap[hr]=reg|64;
561 cur->dirty&=~(1<<hr);
562 cur->isconst&=~(1<<hr);
563 return;
564 }
565 }
566 }
567 for(hr=0;hr<HOST_REGS;hr++) {
568 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
569 if(cur->regmap[hr]==r) {
570 cur->regmap[hr]=reg|64;
571 cur->dirty&=~(1<<hr);
572 cur->isconst&=~(1<<hr);
573 return;
574 }
575 }
576 }
577 }
578 }
579 }
580 }
581 for(j=10;j>=0;j--)
582 {
583 for(r=1;r<=MAXREG;r++)
584 {
585 if(hsn[r]==j) {
586 for(hr=0;hr<HOST_REGS;hr++) {
587 if(cur->regmap[hr]==r+64) {
588 cur->regmap[hr]=reg|64;
589 cur->dirty&=~(1<<hr);
590 cur->isconst&=~(1<<hr);
591 return;
592 }
593 }
594 for(hr=0;hr<HOST_REGS;hr++) {
595 if(cur->regmap[hr]==r) {
596 cur->regmap[hr]=reg|64;
597 cur->dirty&=~(1<<hr);
598 cur->isconst&=~(1<<hr);
599 return;
600 }
601 }
602 }
603 }
604 }
605 printf("This shouldn't happen");exit(1);
606}
607
608// Allocate a temporary register. This is done without regard to
609// dirty status or whether the register we request is on the unneeded list
610// Note: This will only allocate one register, even if called multiple times
611void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
612{
613 int r,hr;
614 int preferred_reg = -1;
615
616 // see if it's already allocated
617 for(hr=0;hr<HOST_REGS;hr++)
618 {
619 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
620 }
621
622 // Try to allocate any available register
623 for(hr=HOST_REGS-1;hr>=0;hr--) {
624 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
625 cur->regmap[hr]=reg;
626 cur->dirty&=~(1<<hr);
627 cur->isconst&=~(1<<hr);
628 return;
629 }
630 }
631
632 // Find an unneeded register
633 for(hr=HOST_REGS-1;hr>=0;hr--)
634 {
635 r=cur->regmap[hr];
636 if(r>=0) {
637 if(r<64) {
638 if((cur->u>>r)&1) {
639 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
640 cur->regmap[hr]=reg;
641 cur->dirty&=~(1<<hr);
642 cur->isconst&=~(1<<hr);
643 return;
644 }
645 }
646 }
647 else
648 {
649 if((cur->uu>>(r&63))&1) {
650 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
651 cur->regmap[hr]=reg;
652 cur->dirty&=~(1<<hr);
653 cur->isconst&=~(1<<hr);
654 return;
655 }
656 }
657 }
658 }
659 }
660
661 // Ok, now we have to evict someone
662 // Pick a register we hopefully won't need soon
663 // TODO: we might want to follow unconditional jumps here
664 // TODO: get rid of dupe code and make this into a function
665 u_char hsn[MAXREG+1];
666 memset(hsn,10,sizeof(hsn));
667 int j;
668 lsn(hsn,i,&preferred_reg);
669 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
670 if(i>0) {
671 // Don't evict the cycle count at entry points, otherwise the entry
672 // stub will have to write it.
673 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
674 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
675 for(j=10;j>=3;j--)
676 {
677 for(r=1;r<=MAXREG;r++)
678 {
679 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
680 for(hr=0;hr<HOST_REGS;hr++) {
681 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
682 if(cur->regmap[hr]==r+64) {
683 cur->regmap[hr]=reg;
684 cur->dirty&=~(1<<hr);
685 cur->isconst&=~(1<<hr);
686 return;
687 }
688 }
689 }
690 for(hr=0;hr<HOST_REGS;hr++) {
691 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
692 if(cur->regmap[hr]==r) {
693 cur->regmap[hr]=reg;
694 cur->dirty&=~(1<<hr);
695 cur->isconst&=~(1<<hr);
696 return;
697 }
698 }
699 }
700 }
701 }
702 }
703 }
704 for(j=10;j>=0;j--)
705 {
706 for(r=1;r<=MAXREG;r++)
707 {
708 if(hsn[r]==j) {
709 for(hr=0;hr<HOST_REGS;hr++) {
710 if(cur->regmap[hr]==r+64) {
711 cur->regmap[hr]=reg;
712 cur->dirty&=~(1<<hr);
713 cur->isconst&=~(1<<hr);
714 return;
715 }
716 }
717 for(hr=0;hr<HOST_REGS;hr++) {
718 if(cur->regmap[hr]==r) {
719 cur->regmap[hr]=reg;
720 cur->dirty&=~(1<<hr);
721 cur->isconst&=~(1<<hr);
722 return;
723 }
724 }
725 }
726 }
727 }
728 printf("This shouldn't happen");exit(1);
729}
730// Allocate a specific ARM register.
731void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
732{
733 int n;
734
735 // see if it's already allocated (and dealloc it)
736 for(n=0;n<HOST_REGS;n++)
737 {
738 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
739 }
740
741 cur->regmap[hr]=reg;
742 cur->dirty&=~(1<<hr);
743 cur->isconst&=~(1<<hr);
744}
745
746// Alloc cycle count into dedicated register
747alloc_cc(struct regstat *cur,int i)
748{
749 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
750}
751
752/* Special alloc */
753
754
755/* Assembler */
756
757char regname[16][4] = {
758 "r0",
759 "r1",
760 "r2",
761 "r3",
762 "r4",
763 "r5",
764 "r6",
765 "r7",
766 "r8",
767 "r9",
768 "r10",
769 "fp",
770 "r12",
771 "sp",
772 "lr",
773 "pc"};
774
775void output_byte(u_char byte)
776{
777 *(out++)=byte;
778}
779void output_modrm(u_char mod,u_char rm,u_char ext)
780{
781 assert(mod<4);
782 assert(rm<8);
783 assert(ext<8);
784 u_char byte=(mod<<6)|(ext<<3)|rm;
785 *(out++)=byte;
786}
787void output_sib(u_char scale,u_char index,u_char base)
788{
789 assert(scale<4);
790 assert(index<8);
791 assert(base<8);
792 u_char byte=(scale<<6)|(index<<3)|base;
793 *(out++)=byte;
794}
795void output_w32(u_int word)
796{
797 *((u_int *)out)=word;
798 out+=4;
799}
800u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
801{
802 assert(rd<16);
803 assert(rn<16);
804 assert(rm<16);
805 return((rn<<16)|(rd<<12)|rm);
806}
807u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
808{
809 assert(rd<16);
810 assert(rn<16);
811 assert(imm<256);
812 assert((shift&1)==0);
813 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
814}
815u_int genimm(u_int imm,u_int *encoded)
816{
817 if(imm==0) {*encoded=0;return 1;}
818 int i=32;
819 while(i>0)
820 {
821 if(imm<256) {
822 *encoded=((i&30)<<7)|imm;
823 return 1;
824 }
825 imm=(imm>>2)|(imm<<30);i-=2;
826 }
827 return 0;
828}
cfbd3c6e 829void genimm_checked(u_int imm,u_int *encoded)
830{
831 u_int ret=genimm(imm,encoded);
832 assert(ret);
833}
57871462 834u_int genjmp(u_int addr)
835{
836 int offset=addr-(int)out-8;
e80343e2 837 if(offset<-33554432||offset>=33554432) {
838 if (addr>2) {
839 printf("genjmp: out of range: %08x\n", offset);
840 exit(1);
841 }
842 return 0;
843 }
57871462 844 return ((u_int)offset>>2)&0xffffff;
845}
846
847void emit_mov(int rs,int rt)
848{
849 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
850 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
851}
852
853void emit_movs(int rs,int rt)
854{
855 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
856 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
857}
858
859void emit_add(int rs1,int rs2,int rt)
860{
861 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
862 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
863}
864
865void emit_adds(int rs1,int rs2,int rt)
866{
867 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
868 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
869}
870
871void emit_adcs(int rs1,int rs2,int rt)
872{
873 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
874 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
875}
876
877void emit_sbc(int rs1,int rs2,int rt)
878{
879 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
880 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
881}
882
883void emit_sbcs(int rs1,int rs2,int rt)
884{
885 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
886 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
887}
888
889void emit_neg(int rs, int rt)
890{
891 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
892 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
893}
894
895void emit_negs(int rs, int rt)
896{
897 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
898 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
899}
900
901void emit_sub(int rs1,int rs2,int rt)
902{
903 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
904 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
905}
906
907void emit_subs(int rs1,int rs2,int rt)
908{
909 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
910 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
911}
912
913void emit_zeroreg(int rt)
914{
915 assem_debug("mov %s,#0\n",regname[rt]);
916 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
917}
918
790ee18e 919void emit_loadlp(u_int imm,u_int rt)
920{
921 add_literal((int)out,imm);
922 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
923 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
924}
925void emit_movw(u_int imm,u_int rt)
926{
927 assert(imm<65536);
928 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
929 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
930}
931void emit_movt(u_int imm,u_int rt)
932{
933 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
934 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
935}
936void emit_movimm(u_int imm,u_int rt)
937{
938 u_int armval;
939 if(genimm(imm,&armval)) {
940 assem_debug("mov %s,#%d\n",regname[rt],imm);
941 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
942 }else if(genimm(~imm,&armval)) {
943 assem_debug("mvn %s,#%d\n",regname[rt],imm);
944 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
945 }else if(imm<65536) {
946 #ifdef ARMv5_ONLY
947 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
948 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
949 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
950 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
951 #else
952 emit_movw(imm,rt);
953 #endif
954 }else{
955 #ifdef ARMv5_ONLY
956 emit_loadlp(imm,rt);
957 #else
958 emit_movw(imm&0x0000FFFF,rt);
959 emit_movt(imm&0xFFFF0000,rt);
960 #endif
961 }
962}
963void emit_pcreladdr(u_int rt)
964{
965 assem_debug("add %s,pc,#?\n",regname[rt]);
966 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
967}
968
57871462 969void emit_loadreg(int r, int hr)
970{
3d624f89 971#ifdef FORCE32
972 if(r&64) {
973 printf("64bit load in 32bit mode!\n");
974 exit(1);
975 }
976#endif
57871462 977 if((r&63)==0)
978 emit_zeroreg(hr);
979 else {
3d624f89 980 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 981 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
982 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
983 if(r==CCREG) addr=(int)&cycle_count;
984 if(r==CSREG) addr=(int)&Status;
985 if(r==FSREG) addr=(int)&FCR31;
986 if(r==INVCP) addr=(int)&invc_ptr;
987 u_int offset = addr-(u_int)&dynarec_local;
988 assert(offset<4096);
989 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
990 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
991 }
992}
993void emit_storereg(int r, int hr)
994{
3d624f89 995#ifdef FORCE32
996 if(r&64) {
997 printf("64bit store in 32bit mode!\n");
998 exit(1);
999 }
1000#endif
1001 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1002 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1003 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1004 if(r==CCREG) addr=(int)&cycle_count;
1005 if(r==FSREG) addr=(int)&FCR31;
1006 u_int offset = addr-(u_int)&dynarec_local;
1007 assert(offset<4096);
1008 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1009 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1010}
1011
1012void emit_test(int rs, int rt)
1013{
1014 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1015 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1016}
1017
1018void emit_testimm(int rs,int imm)
1019{
1020 u_int armval;
1021 assem_debug("tst %s,$%d\n",regname[rs],imm);
cfbd3c6e 1022 genimm_checked(imm,&armval);
57871462 1023 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1024}
1025
b9b61529 1026void emit_testeqimm(int rs,int imm)
1027{
1028 u_int armval;
1029 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1030 genimm_checked(imm,&armval);
b9b61529 1031 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1032}
1033
57871462 1034void emit_not(int rs,int rt)
1035{
1036 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1037 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1038}
1039
b9b61529 1040void emit_mvnmi(int rs,int rt)
1041{
1042 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1043 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1044}
1045
57871462 1046void emit_and(u_int rs1,u_int rs2,u_int rt)
1047{
1048 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1049 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1050}
1051
1052void emit_or(u_int rs1,u_int rs2,u_int rt)
1053{
1054 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1055 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1056}
1057void emit_or_and_set_flags(int rs1,int rs2,int rt)
1058{
1059 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1060 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1061}
1062
f70d384d 1063void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1064{
1065 assert(rs<16);
1066 assert(rt<16);
1067 assert(imm<32);
1068 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1069 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1070}
1071
576bbd8f 1072void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1073{
1074 assert(rs<16);
1075 assert(rt<16);
1076 assert(imm<32);
1077 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1078 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1079}
1080
57871462 1081void emit_xor(u_int rs1,u_int rs2,u_int rt)
1082{
1083 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1084 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1085}
1086
57871462 1087void emit_addimm(u_int rs,int imm,u_int rt)
1088{
1089 assert(rs<16);
1090 assert(rt<16);
1091 if(imm!=0) {
1092 assert(imm>-65536&&imm<65536);
1093 u_int armval;
1094 if(genimm(imm,&armval)) {
1095 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1096 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1097 }else if(genimm(-imm,&armval)) {
1098 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1099 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1100 }else if(imm<0) {
1101 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1102 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1103 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1104 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1105 }else{
1106 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1107 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1108 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1109 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1110 }
1111 }
1112 else if(rs!=rt) emit_mov(rs,rt);
1113}
1114
1115void emit_addimm_and_set_flags(int imm,int rt)
1116{
1117 assert(imm>-65536&&imm<65536);
1118 u_int armval;
1119 if(genimm(imm,&armval)) {
1120 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1121 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1122 }else if(genimm(-imm,&armval)) {
1123 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1124 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1125 }else if(imm<0) {
1126 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1127 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1128 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1129 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1130 }else{
1131 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1132 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1133 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1134 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1135 }
1136}
1137void emit_addimm_no_flags(u_int imm,u_int rt)
1138{
1139 emit_addimm(rt,imm,rt);
1140}
1141
1142void emit_addnop(u_int r)
1143{
1144 assert(r<16);
1145 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1146 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1147}
1148
1149void emit_adcimm(u_int rs,int imm,u_int rt)
1150{
1151 u_int armval;
cfbd3c6e 1152 genimm_checked(imm,&armval);
57871462 1153 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1154 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1155}
1156/*void emit_sbcimm(int imm,u_int rt)
1157{
1158 u_int armval;
cfbd3c6e 1159 genimm_checked(imm,&armval);
57871462 1160 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1161 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1162}*/
1163void emit_sbbimm(int imm,u_int rt)
1164{
1165 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1166 assert(rt<8);
1167 if(imm<128&&imm>=-128) {
1168 output_byte(0x83);
1169 output_modrm(3,rt,3);
1170 output_byte(imm);
1171 }
1172 else
1173 {
1174 output_byte(0x81);
1175 output_modrm(3,rt,3);
1176 output_w32(imm);
1177 }
1178}
1179void emit_rscimm(int rs,int imm,u_int rt)
1180{
1181 assert(0);
1182 u_int armval;
cfbd3c6e 1183 genimm_checked(imm,&armval);
57871462 1184 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1185 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1186}
1187
1188void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1189{
1190 // TODO: if(genimm(imm,&armval)) ...
1191 // else
1192 emit_movimm(imm,HOST_TEMPREG);
1193 emit_adds(HOST_TEMPREG,rsl,rtl);
1194 emit_adcimm(rsh,0,rth);
1195}
1196
1197void emit_sbb(int rs1,int rs2)
1198{
1199 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1200 output_byte(0x19);
1201 output_modrm(3,rs1,rs2);
1202}
1203
1204void emit_andimm(int rs,int imm,int rt)
1205{
1206 u_int armval;
790ee18e 1207 if(imm==0) {
1208 emit_zeroreg(rt);
1209 }else if(genimm(imm,&armval)) {
57871462 1210 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1211 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1212 }else if(genimm(~imm,&armval)) {
1213 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1214 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1215 }else if(imm==65535) {
1216 #ifdef ARMv5_ONLY
1217 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1218 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1219 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1220 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1221 #else
1222 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1223 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1224 #endif
1225 }else{
1226 assert(imm>0&&imm<65535);
1227 #ifdef ARMv5_ONLY
1228 assem_debug("mov r14,#%d\n",imm&0xFF00);
1229 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1230 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1231 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1232 #else
1233 emit_movw(imm,HOST_TEMPREG);
1234 #endif
1235 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1236 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1237 }
1238}
1239
1240void emit_orimm(int rs,int imm,int rt)
1241{
1242 u_int armval;
790ee18e 1243 if(imm==0) {
1244 if(rs!=rt) emit_mov(rs,rt);
1245 }else if(genimm(imm,&armval)) {
57871462 1246 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1247 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1248 }else{
1249 assert(imm>0&&imm<65536);
1250 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1251 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1252 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1253 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1254 }
1255}
1256
1257void emit_xorimm(int rs,int imm,int rt)
1258{
57871462 1259 u_int armval;
790ee18e 1260 if(imm==0) {
1261 if(rs!=rt) emit_mov(rs,rt);
1262 }else if(genimm(imm,&armval)) {
57871462 1263 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1264 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1265 }else{
514ed0d9 1266 assert(imm>0&&imm<65536);
57871462 1267 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1268 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1269 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1270 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1271 }
1272}
1273
1274void emit_shlimm(int rs,u_int imm,int rt)
1275{
1276 assert(imm>0);
1277 assert(imm<32);
1278 //if(imm==1) ...
1279 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1280 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1281}
1282
1283void emit_shrimm(int rs,u_int imm,int rt)
1284{
1285 assert(imm>0);
1286 assert(imm<32);
1287 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1288 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1289}
1290
1291void emit_sarimm(int rs,u_int imm,int rt)
1292{
1293 assert(imm>0);
1294 assert(imm<32);
1295 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1296 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1297}
1298
1299void emit_rorimm(int rs,u_int imm,int rt)
1300{
1301 assert(imm>0);
1302 assert(imm<32);
1303 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1304 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1305}
1306
1307void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1308{
1309 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1310 assert(imm>0);
1311 assert(imm<32);
1312 //if(imm==1) ...
1313 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1314 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1315 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1316 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1317}
1318
1319void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1320{
1321 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1322 assert(imm>0);
1323 assert(imm<32);
1324 //if(imm==1) ...
1325 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1326 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1327 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1328 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1329}
1330
b9b61529 1331void emit_signextend16(int rs,int rt)
1332{
1333 #ifdef ARMv5_ONLY
1334 emit_shlimm(rs,16,rt);
1335 emit_sarimm(rt,16,rt);
1336 #else
1337 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1338 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1339 #endif
1340}
1341
57871462 1342void emit_shl(u_int rs,u_int shift,u_int rt)
1343{
1344 assert(rs<16);
1345 assert(rt<16);
1346 assert(shift<16);
1347 //if(imm==1) ...
1348 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1349 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1350}
1351void emit_shr(u_int rs,u_int shift,u_int rt)
1352{
1353 assert(rs<16);
1354 assert(rt<16);
1355 assert(shift<16);
1356 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1357 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1358}
1359void emit_sar(u_int rs,u_int shift,u_int rt)
1360{
1361 assert(rs<16);
1362 assert(rt<16);
1363 assert(shift<16);
1364 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1365 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1366}
1367void emit_shlcl(int r)
1368{
1369 assem_debug("shl %%%s,%%cl\n",regname[r]);
1370 assert(0);
1371}
1372void emit_shrcl(int r)
1373{
1374 assem_debug("shr %%%s,%%cl\n",regname[r]);
1375 assert(0);
1376}
1377void emit_sarcl(int r)
1378{
1379 assem_debug("sar %%%s,%%cl\n",regname[r]);
1380 assert(0);
1381}
1382
1383void emit_shldcl(int r1,int r2)
1384{
1385 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1386 assert(0);
1387}
1388void emit_shrdcl(int r1,int r2)
1389{
1390 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1391 assert(0);
1392}
1393void emit_orrshl(u_int rs,u_int shift,u_int rt)
1394{
1395 assert(rs<16);
1396 assert(rt<16);
1397 assert(shift<16);
1398 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1399 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1400}
1401void emit_orrshr(u_int rs,u_int shift,u_int rt)
1402{
1403 assert(rs<16);
1404 assert(rt<16);
1405 assert(shift<16);
1406 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1407 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1408}
1409
1410void emit_cmpimm(int rs,int imm)
1411{
1412 u_int armval;
1413 if(genimm(imm,&armval)) {
1414 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1415 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1416 }else if(genimm(-imm,&armval)) {
1417 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1418 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1419 }else if(imm>0) {
1420 assert(imm<65536);
1421 #ifdef ARMv5_ONLY
1422 emit_movimm(imm,HOST_TEMPREG);
1423 #else
1424 emit_movw(imm,HOST_TEMPREG);
1425 #endif
1426 assem_debug("cmp %s,r14\n",regname[rs]);
1427 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1428 }else{
1429 assert(imm>-65536);
1430 #ifdef ARMv5_ONLY
1431 emit_movimm(-imm,HOST_TEMPREG);
1432 #else
1433 emit_movw(-imm,HOST_TEMPREG);
1434 #endif
1435 assem_debug("cmn %s,r14\n",regname[rs]);
1436 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1437 }
1438}
1439
1440void emit_cmovne(u_int *addr,int rt)
1441{
1442 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1443 assert(0);
1444}
1445void emit_cmovl(u_int *addr,int rt)
1446{
1447 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1448 assert(0);
1449}
1450void emit_cmovs(u_int *addr,int rt)
1451{
1452 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1453 assert(0);
1454}
1455void emit_cmovne_imm(int imm,int rt)
1456{
1457 assem_debug("movne %s,#%d\n",regname[rt],imm);
1458 u_int armval;
cfbd3c6e 1459 genimm_checked(imm,&armval);
57871462 1460 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1461}
1462void emit_cmovl_imm(int imm,int rt)
1463{
1464 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1465 u_int armval;
cfbd3c6e 1466 genimm_checked(imm,&armval);
57871462 1467 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1468}
1469void emit_cmovb_imm(int imm,int rt)
1470{
1471 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1472 u_int armval;
cfbd3c6e 1473 genimm_checked(imm,&armval);
57871462 1474 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1475}
1476void emit_cmovs_imm(int imm,int rt)
1477{
1478 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1479 u_int armval;
cfbd3c6e 1480 genimm_checked(imm,&armval);
57871462 1481 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1482}
1483void emit_cmove_reg(int rs,int rt)
1484{
1485 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1486 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1487}
1488void emit_cmovne_reg(int rs,int rt)
1489{
1490 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1491 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1492}
1493void emit_cmovl_reg(int rs,int rt)
1494{
1495 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1496 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1497}
1498void emit_cmovs_reg(int rs,int rt)
1499{
1500 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1501 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1502}
1503
1504void emit_slti32(int rs,int imm,int rt)
1505{
1506 if(rs!=rt) emit_zeroreg(rt);
1507 emit_cmpimm(rs,imm);
1508 if(rs==rt) emit_movimm(0,rt);
1509 emit_cmovl_imm(1,rt);
1510}
1511void emit_sltiu32(int rs,int imm,int rt)
1512{
1513 if(rs!=rt) emit_zeroreg(rt);
1514 emit_cmpimm(rs,imm);
1515 if(rs==rt) emit_movimm(0,rt);
1516 emit_cmovb_imm(1,rt);
1517}
1518void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1519{
1520 assert(rsh!=rt);
1521 emit_slti32(rsl,imm,rt);
1522 if(imm>=0)
1523 {
1524 emit_test(rsh,rsh);
1525 emit_cmovne_imm(0,rt);
1526 emit_cmovs_imm(1,rt);
1527 }
1528 else
1529 {
1530 emit_cmpimm(rsh,-1);
1531 emit_cmovne_imm(0,rt);
1532 emit_cmovl_imm(1,rt);
1533 }
1534}
1535void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1536{
1537 assert(rsh!=rt);
1538 emit_sltiu32(rsl,imm,rt);
1539 if(imm>=0)
1540 {
1541 emit_test(rsh,rsh);
1542 emit_cmovne_imm(0,rt);
1543 }
1544 else
1545 {
1546 emit_cmpimm(rsh,-1);
1547 emit_cmovne_imm(1,rt);
1548 }
1549}
1550
1551void emit_cmp(int rs,int rt)
1552{
1553 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1554 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1555}
1556void emit_set_gz32(int rs, int rt)
1557{
1558 //assem_debug("set_gz32\n");
1559 emit_cmpimm(rs,1);
1560 emit_movimm(1,rt);
1561 emit_cmovl_imm(0,rt);
1562}
1563void emit_set_nz32(int rs, int rt)
1564{
1565 //assem_debug("set_nz32\n");
1566 if(rs!=rt) emit_movs(rs,rt);
1567 else emit_test(rs,rs);
1568 emit_cmovne_imm(1,rt);
1569}
1570void emit_set_gz64_32(int rsh, int rsl, int rt)
1571{
1572 //assem_debug("set_gz64\n");
1573 emit_set_gz32(rsl,rt);
1574 emit_test(rsh,rsh);
1575 emit_cmovne_imm(1,rt);
1576 emit_cmovs_imm(0,rt);
1577}
1578void emit_set_nz64_32(int rsh, int rsl, int rt)
1579{
1580 //assem_debug("set_nz64\n");
1581 emit_or_and_set_flags(rsh,rsl,rt);
1582 emit_cmovne_imm(1,rt);
1583}
1584void emit_set_if_less32(int rs1, int rs2, int rt)
1585{
1586 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1587 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1588 emit_cmp(rs1,rs2);
1589 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1590 emit_cmovl_imm(1,rt);
1591}
1592void emit_set_if_carry32(int rs1, int rs2, int rt)
1593{
1594 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1595 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1596 emit_cmp(rs1,rs2);
1597 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1598 emit_cmovb_imm(1,rt);
1599}
1600void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1601{
1602 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1603 assert(u1!=rt);
1604 assert(u2!=rt);
1605 emit_cmp(l1,l2);
1606 emit_movimm(0,rt);
1607 emit_sbcs(u1,u2,HOST_TEMPREG);
1608 emit_cmovl_imm(1,rt);
1609}
1610void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1611{
1612 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1613 assert(u1!=rt);
1614 assert(u2!=rt);
1615 emit_cmp(l1,l2);
1616 emit_movimm(0,rt);
1617 emit_sbcs(u1,u2,HOST_TEMPREG);
1618 emit_cmovb_imm(1,rt);
1619}
1620
1621void emit_call(int a)
1622{
1623 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1624 u_int offset=genjmp(a);
1625 output_w32(0xeb000000|offset);
1626}
1627void emit_jmp(int a)
1628{
1629 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1630 u_int offset=genjmp(a);
1631 output_w32(0xea000000|offset);
1632}
1633void emit_jne(int a)
1634{
1635 assem_debug("bne %x\n",a);
1636 u_int offset=genjmp(a);
1637 output_w32(0x1a000000|offset);
1638}
1639void emit_jeq(int a)
1640{
1641 assem_debug("beq %x\n",a);
1642 u_int offset=genjmp(a);
1643 output_w32(0x0a000000|offset);
1644}
1645void emit_js(int a)
1646{
1647 assem_debug("bmi %x\n",a);
1648 u_int offset=genjmp(a);
1649 output_w32(0x4a000000|offset);
1650}
1651void emit_jns(int a)
1652{
1653 assem_debug("bpl %x\n",a);
1654 u_int offset=genjmp(a);
1655 output_w32(0x5a000000|offset);
1656}
1657void emit_jl(int a)
1658{
1659 assem_debug("blt %x\n",a);
1660 u_int offset=genjmp(a);
1661 output_w32(0xba000000|offset);
1662}
1663void emit_jge(int a)
1664{
1665 assem_debug("bge %x\n",a);
1666 u_int offset=genjmp(a);
1667 output_w32(0xaa000000|offset);
1668}
1669void emit_jno(int a)
1670{
1671 assem_debug("bvc %x\n",a);
1672 u_int offset=genjmp(a);
1673 output_w32(0x7a000000|offset);
1674}
1675void emit_jc(int a)
1676{
1677 assem_debug("bcs %x\n",a);
1678 u_int offset=genjmp(a);
1679 output_w32(0x2a000000|offset);
1680}
1681void emit_jcc(int a)
1682{
1683 assem_debug("bcc %x\n",a);
1684 u_int offset=genjmp(a);
1685 output_w32(0x3a000000|offset);
1686}
1687
1688void emit_pushimm(int imm)
1689{
1690 assem_debug("push $%x\n",imm);
1691 assert(0);
1692}
1693void emit_pusha()
1694{
1695 assem_debug("pusha\n");
1696 assert(0);
1697}
1698void emit_popa()
1699{
1700 assem_debug("popa\n");
1701 assert(0);
1702}
1703void emit_pushreg(u_int r)
1704{
1705 assem_debug("push %%%s\n",regname[r]);
1706 assert(0);
1707}
1708void emit_popreg(u_int r)
1709{
1710 assem_debug("pop %%%s\n",regname[r]);
1711 assert(0);
1712}
1713void emit_callreg(u_int r)
1714{
1715 assem_debug("call *%%%s\n",regname[r]);
1716 assert(0);
1717}
1718void emit_jmpreg(u_int r)
1719{
1720 assem_debug("mov pc,%s\n",regname[r]);
1721 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1722}
1723
1724void emit_readword_indexed(int offset, int rs, int rt)
1725{
1726 assert(offset>-4096&&offset<4096);
1727 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1728 if(offset>=0) {
1729 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1730 }else{
1731 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1732 }
1733}
1734void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1735{
1736 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1737 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1738}
1739void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1740{
1741 if(map<0) emit_readword_indexed(addr, rs, rt);
1742 else {
1743 assert(addr==0);
1744 emit_readword_dualindexedx4(rs, map, rt);
1745 }
1746}
1747void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1748{
1749 if(map<0) {
1750 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1751 emit_readword_indexed(addr+4, rs, rl);
1752 }else{
1753 assert(rh!=rs);
1754 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1755 emit_addimm(map,1,map);
1756 emit_readword_indexed_tlb(addr, rs, map, rl);
1757 }
1758}
1759void emit_movsbl_indexed(int offset, int rs, int rt)
1760{
1761 assert(offset>-256&&offset<256);
1762 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1763 if(offset>=0) {
1764 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1765 }else{
1766 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1767 }
1768}
1769void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1770{
1771 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1772 else {
1773 if(addr==0) {
1774 emit_shlimm(map,2,map);
1775 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1776 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1777 }else{
1778 assert(addr>-256&&addr<256);
1779 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1780 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1781 emit_movsbl_indexed(addr, rt, rt);
1782 }
1783 }
1784}
1785void emit_movswl_indexed(int offset, int rs, int rt)
1786{
1787 assert(offset>-256&&offset<256);
1788 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1789 if(offset>=0) {
1790 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1791 }else{
1792 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1793 }
1794}
1795void emit_movzbl_indexed(int offset, int rs, int rt)
1796{
1797 assert(offset>-4096&&offset<4096);
1798 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1799 if(offset>=0) {
1800 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1801 }else{
1802 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1803 }
1804}
1805void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1806{
1807 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1808 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1809}
1810void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1811{
1812 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1813 else {
1814 if(addr==0) {
1815 emit_movzbl_dualindexedx4(rs, map, rt);
1816 }else{
1817 emit_addimm(rs,addr,rt);
1818 emit_movzbl_dualindexedx4(rt, map, rt);
1819 }
1820 }
1821}
1822void emit_movzwl_indexed(int offset, int rs, int rt)
1823{
1824 assert(offset>-256&&offset<256);
1825 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1826 if(offset>=0) {
1827 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1828 }else{
1829 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1830 }
1831}
1832void emit_readword(int addr, int rt)
1833{
1834 u_int offset = addr-(u_int)&dynarec_local;
1835 assert(offset<4096);
1836 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1837 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1838}
1839void emit_movsbl(int addr, int rt)
1840{
1841 u_int offset = addr-(u_int)&dynarec_local;
1842 assert(offset<256);
1843 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1844 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1845}
1846void emit_movswl(int addr, int rt)
1847{
1848 u_int offset = addr-(u_int)&dynarec_local;
1849 assert(offset<256);
1850 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1851 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1852}
1853void emit_movzbl(int addr, int rt)
1854{
1855 u_int offset = addr-(u_int)&dynarec_local;
1856 assert(offset<4096);
1857 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1858 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1859}
1860void emit_movzwl(int addr, int rt)
1861{
1862 u_int offset = addr-(u_int)&dynarec_local;
1863 assert(offset<256);
1864 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1865 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1866}
1867void emit_movzwl_reg(int rs, int rt)
1868{
1869 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1870 assert(0);
1871}
1872
1873void emit_xchg(int rs, int rt)
1874{
1875 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1876 assert(0);
1877}
1878void emit_writeword_indexed(int rt, int offset, int rs)
1879{
1880 assert(offset>-4096&&offset<4096);
1881 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1882 if(offset>=0) {
1883 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1884 }else{
1885 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1886 }
1887}
1888void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1889{
1890 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1891 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1892}
1893void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1894{
1895 if(map<0) emit_writeword_indexed(rt, addr, rs);
1896 else {
1897 assert(addr==0);
1898 emit_writeword_dualindexedx4(rt, rs, map);
1899 }
1900}
1901void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1902{
1903 if(map<0) {
1904 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1905 emit_writeword_indexed(rl, addr+4, rs);
1906 }else{
1907 assert(rh>=0);
1908 if(temp!=rs) emit_addimm(map,1,temp);
1909 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1910 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1911 else {
1912 emit_addimm(rs,4,rs);
1913 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1914 }
1915 }
1916}
1917void emit_writehword_indexed(int rt, int offset, int rs)
1918{
1919 assert(offset>-256&&offset<256);
1920 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1921 if(offset>=0) {
1922 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1923 }else{
1924 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1925 }
1926}
1927void emit_writebyte_indexed(int rt, int offset, int rs)
1928{
1929 assert(offset>-4096&&offset<4096);
1930 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1931 if(offset>=0) {
1932 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1933 }else{
1934 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1935 }
1936}
1937void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1938{
1939 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1940 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1941}
1942void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1943{
1944 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1945 else {
1946 if(addr==0) {
1947 emit_writebyte_dualindexedx4(rt, rs, map);
1948 }else{
1949 emit_addimm(rs,addr,temp);
1950 emit_writebyte_dualindexedx4(rt, temp, map);
1951 }
1952 }
1953}
1954void emit_writeword(int rt, int addr)
1955{
1956 u_int offset = addr-(u_int)&dynarec_local;
1957 assert(offset<4096);
1958 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1959 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1960}
1961void emit_writehword(int rt, int addr)
1962{
1963 u_int offset = addr-(u_int)&dynarec_local;
1964 assert(offset<256);
1965 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1966 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1967}
1968void emit_writebyte(int rt, int addr)
1969{
1970 u_int offset = addr-(u_int)&dynarec_local;
1971 assert(offset<4096);
74426039 1972 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 1973 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1974}
1975void emit_writeword_imm(int imm, int addr)
1976{
1977 assem_debug("movl $%x,%x\n",imm,addr);
1978 assert(0);
1979}
1980void emit_writebyte_imm(int imm, int addr)
1981{
1982 assem_debug("movb $%x,%x\n",imm,addr);
1983 assert(0);
1984}
1985
1986void emit_mul(int rs)
1987{
1988 assem_debug("mul %%%s\n",regname[rs]);
1989 assert(0);
1990}
1991void emit_imul(int rs)
1992{
1993 assem_debug("imul %%%s\n",regname[rs]);
1994 assert(0);
1995}
1996void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1997{
1998 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1999 assert(rs1<16);
2000 assert(rs2<16);
2001 assert(hi<16);
2002 assert(lo<16);
2003 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2004}
2005void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2006{
2007 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2008 assert(rs1<16);
2009 assert(rs2<16);
2010 assert(hi<16);
2011 assert(lo<16);
2012 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2013}
2014
2015void emit_div(int rs)
2016{
2017 assem_debug("div %%%s\n",regname[rs]);
2018 assert(0);
2019}
2020void emit_idiv(int rs)
2021{
2022 assem_debug("idiv %%%s\n",regname[rs]);
2023 assert(0);
2024}
2025void emit_cdq()
2026{
2027 assem_debug("cdq\n");
2028 assert(0);
2029}
2030
2031void emit_clz(int rs,int rt)
2032{
2033 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2034 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2035}
2036
2037void emit_subcs(int rs1,int rs2,int rt)
2038{
2039 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2040 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2041}
2042
2043void emit_shrcc_imm(int rs,u_int imm,int rt)
2044{
2045 assert(imm>0);
2046 assert(imm<32);
2047 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2048 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2049}
2050
2051void emit_negmi(int rs, int rt)
2052{
2053 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2054 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2055}
2056
2057void emit_negsmi(int rs, int rt)
2058{
2059 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2060 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2061}
2062
2063void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2064{
2065 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2066 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2067}
2068
2069void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2070{
2071 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2072 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2073}
2074
2075void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2076{
2077 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2078 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2079}
2080
2081void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2082{
2083 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2084 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2085}
2086
2087void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2088{
2089 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2090 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2091}
2092
2093void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2094{
2095 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2096 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2097}
2098
2099void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2100{
2101 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2102 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2103}
2104
2105void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2106{
2107 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2108 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2109}
2110
2111void emit_teq(int rs, int rt)
2112{
2113 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2114 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2115}
2116
2117void emit_rsbimm(int rs, int imm, int rt)
2118{
2119 u_int armval;
cfbd3c6e 2120 genimm_checked(imm,&armval);
57871462 2121 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2122 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2123}
2124
2125// Load 2 immediates optimizing for small code size
2126void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2127{
2128 emit_movimm(imm1,rt1);
2129 u_int armval;
2130 if(genimm(imm2-imm1,&armval)) {
2131 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2132 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2133 }else if(genimm(imm1-imm2,&armval)) {
2134 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2135 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2136 }
2137 else emit_movimm(imm2,rt2);
2138}
2139
2140// Conditionally select one of two immediates, optimizing for small code size
2141// This will only be called if HAVE_CMOV_IMM is defined
2142void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2143{
2144 u_int armval;
2145 if(genimm(imm2-imm1,&armval)) {
2146 emit_movimm(imm1,rt);
2147 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2148 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2149 }else if(genimm(imm1-imm2,&armval)) {
2150 emit_movimm(imm1,rt);
2151 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2152 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2153 }
2154 else {
2155 #ifdef ARMv5_ONLY
2156 emit_movimm(imm1,rt);
2157 add_literal((int)out,imm2);
2158 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2159 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2160 #else
2161 emit_movw(imm1&0x0000FFFF,rt);
2162 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2163 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2164 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2165 }
2166 emit_movt(imm1&0xFFFF0000,rt);
2167 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2168 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2169 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2170 }
2171 #endif
2172 }
2173}
2174
2175// special case for checking invalid_code
2176void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2177{
2178 assert(0);
2179}
2180
2181// special case for checking invalid_code
2182void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2183{
2184 assert(imm<128&&imm>=0);
2185 assert(r>=0&&r<16);
2186 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2187 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2188 emit_cmpimm(HOST_TEMPREG,imm);
2189}
2190
2191// special case for tlb mapping
2192void emit_addsr12(int rs1,int rs2,int rt)
2193{
2194 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2195 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2196}
2197
2198// Used to preload hash table entries
2199void emit_prefetch(void *addr)
2200{
2201 assem_debug("prefetch %x\n",(int)addr);
2202 output_byte(0x0F);
2203 output_byte(0x18);
2204 output_modrm(0,5,1);
2205 output_w32((int)addr);
2206}
2207void emit_prefetchreg(int r)
2208{
2209 assem_debug("pld %s\n",regname[r]);
2210 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2211}
2212
2213// Special case for mini_ht
2214void emit_ldreq_indexed(int rs, u_int offset, int rt)
2215{
2216 assert(offset<4096);
2217 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2218 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2219}
2220
2221void emit_flds(int r,int sr)
2222{
2223 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2224 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2225}
2226
2227void emit_vldr(int r,int vr)
2228{
2229 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2230 output_w32(0xed900b00|(vr<<12)|(r<<16));
2231}
2232
2233void emit_fsts(int sr,int r)
2234{
2235 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2236 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2237}
2238
2239void emit_vstr(int vr,int r)
2240{
2241 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2242 output_w32(0xed800b00|(vr<<12)|(r<<16));
2243}
2244
2245void emit_ftosizs(int s,int d)
2246{
2247 assem_debug("ftosizs s%d,s%d\n",d,s);
2248 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2249}
2250
2251void emit_ftosizd(int s,int d)
2252{
2253 assem_debug("ftosizd s%d,d%d\n",d,s);
2254 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2255}
2256
2257void emit_fsitos(int s,int d)
2258{
2259 assem_debug("fsitos s%d,s%d\n",d,s);
2260 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2261}
2262
2263void emit_fsitod(int s,int d)
2264{
2265 assem_debug("fsitod d%d,s%d\n",d,s);
2266 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2267}
2268
2269void emit_fcvtds(int s,int d)
2270{
2271 assem_debug("fcvtds d%d,s%d\n",d,s);
2272 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2273}
2274
2275void emit_fcvtsd(int s,int d)
2276{
2277 assem_debug("fcvtsd s%d,d%d\n",d,s);
2278 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2279}
2280
2281void emit_fsqrts(int s,int d)
2282{
2283 assem_debug("fsqrts d%d,s%d\n",d,s);
2284 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2285}
2286
2287void emit_fsqrtd(int s,int d)
2288{
2289 assem_debug("fsqrtd s%d,d%d\n",d,s);
2290 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2291}
2292
2293void emit_fabss(int s,int d)
2294{
2295 assem_debug("fabss d%d,s%d\n",d,s);
2296 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2297}
2298
2299void emit_fabsd(int s,int d)
2300{
2301 assem_debug("fabsd s%d,d%d\n",d,s);
2302 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2303}
2304
2305void emit_fnegs(int s,int d)
2306{
2307 assem_debug("fnegs d%d,s%d\n",d,s);
2308 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2309}
2310
2311void emit_fnegd(int s,int d)
2312{
2313 assem_debug("fnegd s%d,d%d\n",d,s);
2314 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2315}
2316
2317void emit_fadds(int s1,int s2,int d)
2318{
2319 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2320 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2321}
2322
2323void emit_faddd(int s1,int s2,int d)
2324{
2325 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2326 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2327}
2328
2329void emit_fsubs(int s1,int s2,int d)
2330{
2331 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2332 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2333}
2334
2335void emit_fsubd(int s1,int s2,int d)
2336{
2337 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2338 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2339}
2340
2341void emit_fmuls(int s1,int s2,int d)
2342{
2343 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2344 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2345}
2346
2347void emit_fmuld(int s1,int s2,int d)
2348{
2349 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2350 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2351}
2352
2353void emit_fdivs(int s1,int s2,int d)
2354{
2355 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2356 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2357}
2358
2359void emit_fdivd(int s1,int s2,int d)
2360{
2361 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2362 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2363}
2364
2365void emit_fcmps(int x,int y)
2366{
2367 assem_debug("fcmps s14, s15\n");
2368 output_w32(0xeeb47a67);
2369}
2370
2371void emit_fcmpd(int x,int y)
2372{
2373 assem_debug("fcmpd d6, d7\n");
2374 output_w32(0xeeb46b47);
2375}
2376
2377void emit_fmstat()
2378{
2379 assem_debug("fmstat\n");
2380 output_w32(0xeef1fa10);
2381}
2382
2383void emit_bicne_imm(int rs,int imm,int rt)
2384{
2385 u_int armval;
cfbd3c6e 2386 genimm_checked(imm,&armval);
57871462 2387 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2388 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2389}
2390
2391void emit_biccs_imm(int rs,int imm,int rt)
2392{
2393 u_int armval;
cfbd3c6e 2394 genimm_checked(imm,&armval);
57871462 2395 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2396 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2397}
2398
2399void emit_bicvc_imm(int rs,int imm,int rt)
2400{
2401 u_int armval;
cfbd3c6e 2402 genimm_checked(imm,&armval);
57871462 2403 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2404 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2405}
2406
2407void emit_bichi_imm(int rs,int imm,int rt)
2408{
2409 u_int armval;
cfbd3c6e 2410 genimm_checked(imm,&armval);
57871462 2411 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2412 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2413}
2414
2415void emit_orrvs_imm(int rs,int imm,int rt)
2416{
2417 u_int armval;
cfbd3c6e 2418 genimm_checked(imm,&armval);
57871462 2419 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2420 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2421}
2422
b9b61529 2423void emit_orrne_imm(int rs,int imm,int rt)
2424{
2425 u_int armval;
cfbd3c6e 2426 genimm_checked(imm,&armval);
b9b61529 2427 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2428 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2429}
2430
2431void emit_andne_imm(int rs,int imm,int rt)
2432{
2433 u_int armval;
cfbd3c6e 2434 genimm_checked(imm,&armval);
b9b61529 2435 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2436 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2437}
2438
57871462 2439void emit_jno_unlikely(int a)
2440{
2441 //emit_jno(a);
2442 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2443 output_w32(0x72800000|rd_rn_rm(15,15,0));
2444}
2445
2446// Save registers before function call
2447void save_regs(u_int reglist)
2448{
2449 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2450 if(!reglist) return;
2451 assem_debug("stmia fp,{");
2452 if(reglist&1) assem_debug("r0, ");
2453 if(reglist&2) assem_debug("r1, ");
2454 if(reglist&4) assem_debug("r2, ");
2455 if(reglist&8) assem_debug("r3, ");
2456 if(reglist&0x1000) assem_debug("r12");
2457 assem_debug("}\n");
2458 output_w32(0xe88b0000|reglist);
2459}
2460// Restore registers after function call
2461void restore_regs(u_int reglist)
2462{
2463 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2464 if(!reglist) return;
2465 assem_debug("ldmia fp,{");
2466 if(reglist&1) assem_debug("r0, ");
2467 if(reglist&2) assem_debug("r1, ");
2468 if(reglist&4) assem_debug("r2, ");
2469 if(reglist&8) assem_debug("r3, ");
2470 if(reglist&0x1000) assem_debug("r12");
2471 assem_debug("}\n");
2472 output_w32(0xe89b0000|reglist);
2473}
2474
2475// Write back consts using r14 so we don't disturb the other registers
2476void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2477{
2478 int hr;
2479 for(hr=0;hr<HOST_REGS;hr++) {
2480 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2481 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2482 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2483 int value=constmap[i][hr];
2484 if(value==0) {
2485 emit_zeroreg(HOST_TEMPREG);
2486 }
2487 else {
2488 emit_movimm(value,HOST_TEMPREG);
2489 }
2490 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2491#ifndef FORCE32
57871462 2492 if((i_is32>>i_regmap[hr])&1) {
2493 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2494 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2495 }
24385cae 2496#endif
57871462 2497 }
2498 }
2499 }
2500 }
2501}
2502
2503/* Stubs/epilogue */
2504
2505void literal_pool(int n)
2506{
2507 if(!literalcount) return;
2508 if(n) {
2509 if((int)out-literals[0][0]<4096-n) return;
2510 }
2511 u_int *ptr;
2512 int i;
2513 for(i=0;i<literalcount;i++)
2514 {
2515 ptr=(u_int *)literals[i][0];
2516 u_int offset=(u_int)out-(u_int)ptr-8;
2517 assert(offset<4096);
2518 assert(!(offset&3));
2519 *ptr|=offset;
2520 output_w32(literals[i][1]);
2521 }
2522 literalcount=0;
2523}
2524
2525void literal_pool_jumpover(int n)
2526{
2527 if(!literalcount) return;
2528 if(n) {
2529 if((int)out-literals[0][0]<4096-n) return;
2530 }
2531 int jaddr=(int)out;
2532 emit_jmp(0);
2533 literal_pool(0);
2534 set_jump_target(jaddr,(int)out);
2535}
2536
2537emit_extjump2(int addr, int target, int linker)
2538{
2539 u_char *ptr=(u_char *)addr;
2540 assert((ptr[3]&0x0e)==0xa);
2541 emit_loadlp(target,0);
2542 emit_loadlp(addr,1);
24385cae 2543 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2544 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2545//DEBUG >
2546#ifdef DEBUG_CYCLE_COUNT
2547 emit_readword((int)&last_count,ECX);
2548 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2549 emit_readword((int)&next_interupt,ECX);
2550 emit_writeword(HOST_CCREG,(int)&Count);
2551 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2552 emit_writeword(ECX,(int)&last_count);
2553#endif
2554//DEBUG <
2555 emit_jmp(linker);
2556}
2557
2558emit_extjump(int addr, int target)
2559{
2560 emit_extjump2(addr, target, (int)dyna_linker);
2561}
2562emit_extjump_ds(int addr, int target)
2563{
2564 emit_extjump2(addr, target, (int)dyna_linker_ds);
2565}
2566
2567do_readstub(int n)
2568{
2569 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2570 literal_pool(256);
2571 set_jump_target(stubs[n][1],(int)out);
2572 int type=stubs[n][0];
2573 int i=stubs[n][3];
2574 int rs=stubs[n][4];
2575 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2576 u_int reglist=stubs[n][7];
2577 signed char *i_regmap=i_regs->regmap;
2578 int addr=get_reg(i_regmap,AGEN1+(i&1));
2579 int rth,rt;
2580 int ds;
b9b61529 2581 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2582 rth=get_reg(i_regmap,FTEMP|64);
2583 rt=get_reg(i_regmap,FTEMP);
2584 }else{
2585 rth=get_reg(i_regmap,rt1[i]|64);
2586 rt=get_reg(i_regmap,rt1[i]);
2587 }
2588 assert(rs>=0);
57871462 2589 if(addr<0) addr=rt;
535d208a 2590 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2591 assert(addr>=0);
2592 int ftable=0;
2593 if(type==LOADB_STUB||type==LOADBU_STUB)
2594 ftable=(int)readmemb;
2595 if(type==LOADH_STUB||type==LOADHU_STUB)
2596 ftable=(int)readmemh;
2597 if(type==LOADW_STUB)
2598 ftable=(int)readmem;
24385cae 2599#ifndef FORCE32
57871462 2600 if(type==LOADD_STUB)
2601 ftable=(int)readmemd;
24385cae 2602#endif
2603 assert(ftable!=0);
57871462 2604 emit_writeword(rs,(int)&address);
2605 //emit_pusha();
2606 save_regs(reglist);
2607 ds=i_regs!=&regs[i];
2608 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2609 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2610 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2611 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2612 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2613 emit_shrimm(rs,16,1);
2614 int cc=get_reg(i_regmap,CCREG);
2615 if(cc<0) {
2616 emit_loadreg(CCREG,2);
2617 }
2618 emit_movimm(ftable,0);
2619 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2620 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2621 //emit_readword((int)&last_count,temp);
2622 //emit_add(cc,temp,cc);
2623 //emit_writeword(cc,(int)&Count);
2624 //emit_mov(15,14);
2625 emit_call((int)&indirect_jump_indexed);
2626 //emit_callreg(rs);
2627 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2628 // We really shouldn't need to update the count here,
2629 // but not doing so causes random crashes...
2630 emit_readword((int)&Count,HOST_TEMPREG);
2631 emit_readword((int)&next_interupt,2);
2632 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2633 emit_writeword(2,(int)&last_count);
2634 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2635 if(cc<0) {
2636 emit_storereg(CCREG,HOST_TEMPREG);
2637 }
2638 //emit_popa();
2639 restore_regs(reglist);
2640 //if((cc=get_reg(regmap,CCREG))>=0) {
2641 // emit_loadreg(CCREG,cc);
2642 //}
f18c0f46 2643 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2644 assert(rt>=0);
2645 if(type==LOADB_STUB)
2646 emit_movsbl((int)&readmem_dword,rt);
2647 if(type==LOADBU_STUB)
2648 emit_movzbl((int)&readmem_dword,rt);
2649 if(type==LOADH_STUB)
2650 emit_movswl((int)&readmem_dword,rt);
2651 if(type==LOADHU_STUB)
2652 emit_movzwl((int)&readmem_dword,rt);
2653 if(type==LOADW_STUB)
2654 emit_readword((int)&readmem_dword,rt);
2655 if(type==LOADD_STUB) {
2656 emit_readword((int)&readmem_dword,rt);
2657 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2658 }
57871462 2659 }
2660 emit_jmp(stubs[n][2]); // return address
2661}
2662
2663inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2664{
2665 int rs=get_reg(regmap,target);
2666 int rth=get_reg(regmap,target|64);
2667 int rt=get_reg(regmap,target);
535d208a 2668 if(rs<0) rs=get_reg(regmap,-1);
57871462 2669 assert(rs>=0);
57871462 2670 int ftable=0;
2671 if(type==LOADB_STUB||type==LOADBU_STUB)
2672 ftable=(int)readmemb;
2673 if(type==LOADH_STUB||type==LOADHU_STUB)
2674 ftable=(int)readmemh;
2675 if(type==LOADW_STUB)
2676 ftable=(int)readmem;
24385cae 2677#ifndef FORCE32
57871462 2678 if(type==LOADD_STUB)
2679 ftable=(int)readmemd;
24385cae 2680#endif
2681 assert(ftable!=0);
fd99c415 2682 if(target==0)
2683 emit_movimm(addr,rs);
57871462 2684 emit_writeword(rs,(int)&address);
2685 //emit_pusha();
2686 save_regs(reglist);
2687 //emit_shrimm(rs,16,1);
2688 int cc=get_reg(regmap,CCREG);
2689 if(cc<0) {
2690 emit_loadreg(CCREG,2);
2691 }
2692 //emit_movimm(ftable,0);
2693 emit_movimm(((u_int *)ftable)[addr>>16],0);
2694 //emit_readword((int)&last_count,12);
2695 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2696 if((signed int)addr>=(signed int)0xC0000000) {
2697 // Pagefault address
2698 int ds=regmap!=regs[i].regmap;
2699 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2700 }
2701 //emit_add(12,2,2);
2702 //emit_writeword(2,(int)&Count);
2703 //emit_call(((u_int *)ftable)[addr>>16]);
2704 emit_call((int)&indirect_jump);
2705 // We really shouldn't need to update the count here,
2706 // but not doing so causes random crashes...
2707 emit_readword((int)&Count,HOST_TEMPREG);
2708 emit_readword((int)&next_interupt,2);
2709 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2710 emit_writeword(2,(int)&last_count);
2711 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2712 if(cc<0) {
2713 emit_storereg(CCREG,HOST_TEMPREG);
2714 }
2715 //emit_popa();
2716 restore_regs(reglist);
fd99c415 2717 if(rt>=0) {
2718 if(type==LOADB_STUB)
2719 emit_movsbl((int)&readmem_dword,rt);
2720 if(type==LOADBU_STUB)
2721 emit_movzbl((int)&readmem_dword,rt);
2722 if(type==LOADH_STUB)
2723 emit_movswl((int)&readmem_dword,rt);
2724 if(type==LOADHU_STUB)
2725 emit_movzwl((int)&readmem_dword,rt);
2726 if(type==LOADW_STUB)
2727 emit_readword((int)&readmem_dword,rt);
2728 if(type==LOADD_STUB) {
2729 emit_readword((int)&readmem_dword,rt);
2730 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2731 }
57871462 2732 }
2733}
2734
2735do_writestub(int n)
2736{
2737 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2738 literal_pool(256);
2739 set_jump_target(stubs[n][1],(int)out);
2740 int type=stubs[n][0];
2741 int i=stubs[n][3];
2742 int rs=stubs[n][4];
2743 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2744 u_int reglist=stubs[n][7];
2745 signed char *i_regmap=i_regs->regmap;
2746 int addr=get_reg(i_regmap,AGEN1+(i&1));
2747 int rth,rt,r;
2748 int ds;
b9b61529 2749 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2750 rth=get_reg(i_regmap,FTEMP|64);
2751 rt=get_reg(i_regmap,r=FTEMP);
2752 }else{
2753 rth=get_reg(i_regmap,rs2[i]|64);
2754 rt=get_reg(i_regmap,r=rs2[i]);
2755 }
2756 assert(rs>=0);
2757 assert(rt>=0);
2758 if(addr<0) addr=get_reg(i_regmap,-1);
2759 assert(addr>=0);
2760 int ftable=0;
2761 if(type==STOREB_STUB)
2762 ftable=(int)writememb;
2763 if(type==STOREH_STUB)
2764 ftable=(int)writememh;
2765 if(type==STOREW_STUB)
2766 ftable=(int)writemem;
24385cae 2767#ifndef FORCE32
57871462 2768 if(type==STORED_STUB)
2769 ftable=(int)writememd;
24385cae 2770#endif
2771 assert(ftable!=0);
57871462 2772 emit_writeword(rs,(int)&address);
2773 //emit_shrimm(rs,16,rs);
2774 //emit_movmem_indexedx4(ftable,rs,rs);
2775 if(type==STOREB_STUB)
2776 emit_writebyte(rt,(int)&byte);
2777 if(type==STOREH_STUB)
2778 emit_writehword(rt,(int)&hword);
2779 if(type==STOREW_STUB)
2780 emit_writeword(rt,(int)&word);
2781 if(type==STORED_STUB) {
3d624f89 2782#ifndef FORCE32
57871462 2783 emit_writeword(rt,(int)&dword);
2784 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2785#else
2786 printf("STORED_STUB\n");
2787#endif
57871462 2788 }
2789 //emit_pusha();
2790 save_regs(reglist);
2791 ds=i_regs!=&regs[i];
2792 int real_rs=get_reg(i_regmap,rs1[i]);
2793 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2794 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2795 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2796 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2797 emit_shrimm(rs,16,1);
2798 int cc=get_reg(i_regmap,CCREG);
2799 if(cc<0) {
2800 emit_loadreg(CCREG,2);
2801 }
2802 emit_movimm(ftable,0);
2803 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2804 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2805 //emit_readword((int)&last_count,temp);
2806 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2807 //emit_add(cc,temp,cc);
2808 //emit_writeword(cc,(int)&Count);
2809 emit_call((int)&indirect_jump_indexed);
2810 //emit_callreg(rs);
2811 emit_readword((int)&Count,HOST_TEMPREG);
2812 emit_readword((int)&next_interupt,2);
2813 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2814 emit_writeword(2,(int)&last_count);
2815 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2816 if(cc<0) {
2817 emit_storereg(CCREG,HOST_TEMPREG);
2818 }
2819 //emit_popa();
2820 restore_regs(reglist);
2821 //if((cc=get_reg(regmap,CCREG))>=0) {
2822 // emit_loadreg(CCREG,cc);
2823 //}
2824 emit_jmp(stubs[n][2]); // return address
2825}
2826
2827inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2828{
2829 int rs=get_reg(regmap,-1);
2830 int rth=get_reg(regmap,target|64);
2831 int rt=get_reg(regmap,target);
2832 assert(rs>=0);
2833 assert(rt>=0);
2834 int ftable=0;
2835 if(type==STOREB_STUB)
2836 ftable=(int)writememb;
2837 if(type==STOREH_STUB)
2838 ftable=(int)writememh;
2839 if(type==STOREW_STUB)
2840 ftable=(int)writemem;
24385cae 2841#ifndef FORCE32
57871462 2842 if(type==STORED_STUB)
2843 ftable=(int)writememd;
24385cae 2844#endif
2845 assert(ftable!=0);
57871462 2846 emit_writeword(rs,(int)&address);
2847 //emit_shrimm(rs,16,rs);
2848 //emit_movmem_indexedx4(ftable,rs,rs);
2849 if(type==STOREB_STUB)
2850 emit_writebyte(rt,(int)&byte);
2851 if(type==STOREH_STUB)
2852 emit_writehword(rt,(int)&hword);
2853 if(type==STOREW_STUB)
2854 emit_writeword(rt,(int)&word);
2855 if(type==STORED_STUB) {
3d624f89 2856#ifndef FORCE32
57871462 2857 emit_writeword(rt,(int)&dword);
2858 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2859#else
2860 printf("STORED_STUB\n");
2861#endif
57871462 2862 }
2863 //emit_pusha();
2864 save_regs(reglist);
2865 //emit_shrimm(rs,16,1);
2866 int cc=get_reg(regmap,CCREG);
2867 if(cc<0) {
2868 emit_loadreg(CCREG,2);
2869 }
2870 //emit_movimm(ftable,0);
2871 emit_movimm(((u_int *)ftable)[addr>>16],0);
2872 //emit_readword((int)&last_count,12);
2873 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2874 if((signed int)addr>=(signed int)0xC0000000) {
2875 // Pagefault address
2876 int ds=regmap!=regs[i].regmap;
2877 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2878 }
2879 //emit_add(12,2,2);
2880 //emit_writeword(2,(int)&Count);
2881 //emit_call(((u_int *)ftable)[addr>>16]);
2882 emit_call((int)&indirect_jump);
2883 emit_readword((int)&Count,HOST_TEMPREG);
2884 emit_readword((int)&next_interupt,2);
2885 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2886 emit_writeword(2,(int)&last_count);
2887 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2888 if(cc<0) {
2889 emit_storereg(CCREG,HOST_TEMPREG);
2890 }
2891 //emit_popa();
2892 restore_regs(reglist);
2893}
2894
2895do_unalignedwritestub(int n)
2896{
b7918751 2897 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2898 literal_pool(256);
57871462 2899 set_jump_target(stubs[n][1],(int)out);
b7918751 2900
2901 int i=stubs[n][3];
2902 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2903 int addr=stubs[n][5];
2904 u_int reglist=stubs[n][7];
2905 signed char *i_regmap=i_regs->regmap;
2906 int temp2=get_reg(i_regmap,FTEMP);
2907 int rt;
2908 int ds, real_rs;
2909 rt=get_reg(i_regmap,rs2[i]);
2910 assert(rt>=0);
2911 assert(addr>=0);
2912 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2913 reglist|=(1<<addr);
2914 reglist&=~(1<<temp2);
2915
2916 emit_andimm(addr,0xfffffffc,temp2);
2917 emit_writeword(temp2,(int)&address);
2918
2919 save_regs(reglist);
2920 ds=i_regs!=&regs[i];
2921 real_rs=get_reg(i_regmap,rs1[i]);
2922 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2923 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2924 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2925 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2926 emit_shrimm(addr,16,1);
2927 int cc=get_reg(i_regmap,CCREG);
2928 if(cc<0) {
2929 emit_loadreg(CCREG,2);
2930 }
2931 emit_movimm((u_int)readmem,0);
2932 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2933 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3); // XXX: can be rm'd?
2934 emit_call((int)&indirect_jump_indexed);
2935 restore_regs(reglist);
2936
2937 emit_readword((int)&readmem_dword,temp2);
2938 int temp=addr; //hmh
2939 emit_shlimm(addr,3,temp);
2940 emit_andimm(temp,24,temp);
2941#ifdef BIG_ENDIAN_MIPS
2942 if (opcode[i]==0x2e) // SWR
2943#else
2944 if (opcode[i]==0x2a) // SWL
2945#endif
2946 emit_xorimm(temp,24,temp);
2947 emit_movimm(-1,HOST_TEMPREG);
55439448 2948 if (opcode[i]==0x2a) { // SWL
b7918751 2949 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2950 emit_orrshr(rt,temp,temp2);
2951 }else{
2952 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2953 emit_orrshl(rt,temp,temp2);
2954 }
2955 emit_readword((int)&address,addr);
2956 emit_writeword(temp2,(int)&word);
2957 //save_regs(reglist); // don't need to, no state changes
2958 emit_shrimm(addr,16,1);
2959 emit_movimm((u_int)writemem,0);
2960 //emit_call((int)&indirect_jump_indexed);
2961 emit_mov(15,14);
2962 emit_readword_dualindexedx4(0,1,15);
2963 emit_readword((int)&Count,HOST_TEMPREG);
2964 emit_readword((int)&next_interupt,2);
2965 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2966 emit_writeword(2,(int)&last_count);
2967 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2968 if(cc<0) {
2969 emit_storereg(CCREG,HOST_TEMPREG);
2970 }
2971 restore_regs(reglist);
57871462 2972 emit_jmp(stubs[n][2]); // return address
2973}
2974
2975void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2976{
2977 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2978}
2979
2980do_invstub(int n)
2981{
2982 literal_pool(20);
2983 u_int reglist=stubs[n][3];
2984 set_jump_target(stubs[n][1],(int)out);
2985 save_regs(reglist);
2986 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2987 emit_call((int)&invalidate_addr);
2988 restore_regs(reglist);
2989 emit_jmp(stubs[n][2]); // return address
2990}
2991
2992int do_dirty_stub(int i)
2993{
2994 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 2995 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
2996 #ifdef PCSX
2997 addr=(u_int)source;
2998 #endif
57871462 2999 // Careful about the code output here, verify_dirty needs to parse it.
3000 #ifdef ARMv5_ONLY
ac545b3a 3001 emit_loadlp(addr,1);
57871462 3002 emit_loadlp((int)copy,2);
3003 emit_loadlp(slen*4,3);
3004 #else
ac545b3a 3005 emit_movw(addr&0x0000FFFF,1);
57871462 3006 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3007 emit_movt(addr&0xFFFF0000,1);
57871462 3008 emit_movt(((u_int)copy)&0xFFFF0000,2);
3009 emit_movw(slen*4,3);
3010 #endif
3011 emit_movimm(start+i*4,0);
3012 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3013 int entry=(int)out;
3014 load_regs_entry(i);
3015 if(entry==(int)out) entry=instr_addr[i];
3016 emit_jmp(instr_addr[i]);
3017 return entry;
3018}
3019
3020void do_dirty_stub_ds()
3021{
3022 // Careful about the code output here, verify_dirty needs to parse it.
3023 #ifdef ARMv5_ONLY
3024 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3025 emit_loadlp((int)copy,2);
3026 emit_loadlp(slen*4,3);
3027 #else
3028 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3029 emit_movw(((u_int)copy)&0x0000FFFF,2);
3030 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3031 emit_movt(((u_int)copy)&0xFFFF0000,2);
3032 emit_movw(slen*4,3);
3033 #endif
3034 emit_movimm(start+1,0);
3035 emit_call((int)&verify_code_ds);
3036}
3037
3038do_cop1stub(int n)
3039{
3040 literal_pool(256);
3041 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3042 set_jump_target(stubs[n][1],(int)out);
3043 int i=stubs[n][3];
3d624f89 3044// int rs=stubs[n][4];
57871462 3045 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3046 int ds=stubs[n][6];
3047 if(!ds) {
3048 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3049 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3050 }
3051 //else {printf("fp exception in delay slot\n");}
3052 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3053 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3054 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3055 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3056 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3057}
3058
3059/* TLB */
3060
3061int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3062{
3063 if(c) {
3064 if((signed int)addr>=(signed int)0xC0000000) {
3065 // address_generation already loaded the const
3066 emit_readword_dualindexedx4(FP,map,map);
3067 }
3068 else
3069 return -1; // No mapping
3070 }
3071 else {
3072 assert(s!=map);
3073 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3074 emit_addsr12(map,s,map);
3075 // Schedule this while we wait on the load
3076 //if(x) emit_xorimm(s,x,ar);
3077 if(shift>=0) emit_shlimm(s,3,shift);
3078 if(~a) emit_andimm(s,a,ar);
3079 emit_readword_dualindexedx4(FP,map,map);
3080 }
3081 return map;
3082}
3083int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3084{
3085 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3086 emit_test(map,map);
3087 *jaddr=(int)out;
3088 emit_js(0);
3089 }
3090 return map;
3091}
3092
3093int gen_tlb_addr_r(int ar, int map) {
3094 if(map>=0) {
3095 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3096 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3097 }
3098}
3099
3100int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3101{
3102 if(c) {
3103 if(addr<0x80800000||addr>=0xC0000000) {
3104 // address_generation already loaded the const
3105 emit_readword_dualindexedx4(FP,map,map);
3106 }
3107 else
3108 return -1; // No mapping
3109 }
3110 else {
3111 assert(s!=map);
3112 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3113 emit_addsr12(map,s,map);
3114 // Schedule this while we wait on the load
3115 //if(x) emit_xorimm(s,x,ar);
3116 emit_readword_dualindexedx4(FP,map,map);
3117 }
3118 return map;
3119}
3120int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3121{
3122 if(!c||addr<0x80800000||addr>=0xC0000000) {
3123 emit_testimm(map,0x40000000);
3124 *jaddr=(int)out;
3125 emit_jne(0);
3126 }
3127}
3128
3129int gen_tlb_addr_w(int ar, int map) {
3130 if(map>=0) {
3131 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3132 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3133 }
3134}
3135
3136// Generate the address of the memory_map entry, relative to dynarec_local
3137generate_map_const(u_int addr,int reg) {
3138 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3139 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3140}
3141
3142/* Special assem */
3143
3144void shift_assemble_arm(int i,struct regstat *i_regs)
3145{
3146 if(rt1[i]) {
3147 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3148 {
3149 signed char s,t,shift;
3150 t=get_reg(i_regs->regmap,rt1[i]);
3151 s=get_reg(i_regs->regmap,rs1[i]);
3152 shift=get_reg(i_regs->regmap,rs2[i]);
3153 if(t>=0){
3154 if(rs1[i]==0)
3155 {
3156 emit_zeroreg(t);
3157 }
3158 else if(rs2[i]==0)
3159 {
3160 assert(s>=0);
3161 if(s!=t) emit_mov(s,t);
3162 }
3163 else
3164 {
3165 emit_andimm(shift,31,HOST_TEMPREG);
3166 if(opcode2[i]==4) // SLLV
3167 {
3168 emit_shl(s,HOST_TEMPREG,t);
3169 }
3170 if(opcode2[i]==6) // SRLV
3171 {
3172 emit_shr(s,HOST_TEMPREG,t);
3173 }
3174 if(opcode2[i]==7) // SRAV
3175 {
3176 emit_sar(s,HOST_TEMPREG,t);
3177 }
3178 }
3179 }
3180 } else { // DSLLV/DSRLV/DSRAV
3181 signed char sh,sl,th,tl,shift;
3182 th=get_reg(i_regs->regmap,rt1[i]|64);
3183 tl=get_reg(i_regs->regmap,rt1[i]);
3184 sh=get_reg(i_regs->regmap,rs1[i]|64);
3185 sl=get_reg(i_regs->regmap,rs1[i]);
3186 shift=get_reg(i_regs->regmap,rs2[i]);
3187 if(tl>=0){
3188 if(rs1[i]==0)
3189 {
3190 emit_zeroreg(tl);
3191 if(th>=0) emit_zeroreg(th);
3192 }
3193 else if(rs2[i]==0)
3194 {
3195 assert(sl>=0);
3196 if(sl!=tl) emit_mov(sl,tl);
3197 if(th>=0&&sh!=th) emit_mov(sh,th);
3198 }
3199 else
3200 {
3201 // FIXME: What if shift==tl ?
3202 assert(shift!=tl);
3203 int temp=get_reg(i_regs->regmap,-1);
3204 int real_th=th;
3205 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3206 assert(sl>=0);
3207 assert(sh>=0);
3208 emit_andimm(shift,31,HOST_TEMPREG);
3209 if(opcode2[i]==0x14) // DSLLV
3210 {
3211 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3212 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3213 emit_orrshr(sl,HOST_TEMPREG,th);
3214 emit_andimm(shift,31,HOST_TEMPREG);
3215 emit_testimm(shift,32);
3216 emit_shl(sl,HOST_TEMPREG,tl);
3217 if(th>=0) emit_cmovne_reg(tl,th);
3218 emit_cmovne_imm(0,tl);
3219 }
3220 if(opcode2[i]==0x16) // DSRLV
3221 {
3222 assert(th>=0);
3223 emit_shr(sl,HOST_TEMPREG,tl);
3224 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3225 emit_orrshl(sh,HOST_TEMPREG,tl);
3226 emit_andimm(shift,31,HOST_TEMPREG);
3227 emit_testimm(shift,32);
3228 emit_shr(sh,HOST_TEMPREG,th);
3229 emit_cmovne_reg(th,tl);
3230 if(real_th>=0) emit_cmovne_imm(0,th);
3231 }
3232 if(opcode2[i]==0x17) // DSRAV
3233 {
3234 assert(th>=0);
3235 emit_shr(sl,HOST_TEMPREG,tl);
3236 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3237 if(real_th>=0) {
3238 assert(temp>=0);
3239 emit_sarimm(th,31,temp);
3240 }
3241 emit_orrshl(sh,HOST_TEMPREG,tl);
3242 emit_andimm(shift,31,HOST_TEMPREG);
3243 emit_testimm(shift,32);
3244 emit_sar(sh,HOST_TEMPREG,th);
3245 emit_cmovne_reg(th,tl);
3246 if(real_th>=0) emit_cmovne_reg(temp,th);
3247 }
3248 }
3249 }
3250 }
3251 }
3252}
3253#define shift_assemble shift_assemble_arm
3254
3255void loadlr_assemble_arm(int i,struct regstat *i_regs)
3256{
3257 int s,th,tl,temp,temp2,addr,map=-1;
3258 int offset;
3259 int jaddr=0;
3260 int memtarget,c=0;
3261 u_int hr,reglist=0;
3262 th=get_reg(i_regs->regmap,rt1[i]|64);
3263 tl=get_reg(i_regs->regmap,rt1[i]);
3264 s=get_reg(i_regs->regmap,rs1[i]);
3265 temp=get_reg(i_regs->regmap,-1);
3266 temp2=get_reg(i_regs->regmap,FTEMP);
3267 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3268 assert(addr<0);
3269 offset=imm[i];
3270 for(hr=0;hr<HOST_REGS;hr++) {
3271 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3272 }
3273 reglist|=1<<temp;
3274 if(offset||s<0||c) addr=temp2;
3275 else addr=s;
3276 if(s>=0) {
3277 c=(i_regs->wasconst>>s)&1;
4cb76aa4 3278 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
57871462 3279 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3280 }
535d208a 3281 if(!using_tlb) {
3282 if(!c) {
3283 #ifdef RAM_OFFSET
3284 map=get_reg(i_regs->regmap,ROREG);
3285 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3286 #endif
3287 emit_shlimm(addr,3,temp);
3288 if (opcode[i]==0x22||opcode[i]==0x26) {
3289 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 3290 }else{
535d208a 3291 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 3292 }
535d208a 3293 emit_cmpimm(addr,RAM_SIZE);
3294 jaddr=(int)out;
3295 emit_jno(0);
3296 }
3297 else {
3298 if (opcode[i]==0x22||opcode[i]==0x26) {
3299 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3300 }else{
3301 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3302 }
57871462 3303 }
535d208a 3304 }else{ // using tlb
3305 int a;
3306 if(c) {
3307 a=-1;
3308 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3309 a=0xFFFFFFFC; // LWL/LWR
3310 }else{
3311 a=0xFFFFFFF8; // LDL/LDR
3312 }
3313 map=get_reg(i_regs->regmap,TLREG);
3314 assert(map>=0);
3315 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3316 if(c) {
3317 if (opcode[i]==0x22||opcode[i]==0x26) {
3318 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3319 }else{
3320 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3321 }
535d208a 3322 }
3323 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3324 }
3325 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3326 if(!c||memtarget) {
3327 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3328 emit_readword_indexed_tlb(0,temp2,map,temp2);
3329 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3330 }
3331 else
3332 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3333 if(rt1[i]) {
3334 assert(tl>=0);
57871462 3335 emit_andimm(temp,24,temp);
2002a1db 3336#ifdef BIG_ENDIAN_MIPS
3337 if (opcode[i]==0x26) // LWR
3338#else
3339 if (opcode[i]==0x22) // LWL
3340#endif
3341 emit_xorimm(temp,24,temp);
57871462 3342 emit_movimm(-1,HOST_TEMPREG);
3343 if (opcode[i]==0x26) {
3344 emit_shr(temp2,temp,temp2);
3345 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3346 }else{
3347 emit_shl(temp2,temp,temp2);
3348 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3349 }
3350 emit_or(temp2,tl,tl);
57871462 3351 }
535d208a 3352 //emit_storereg(rt1[i],tl); // DEBUG
3353 }
3354 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3355 // FIXME: little endian
3356 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3357 if(!c||memtarget) {
3358 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3359 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3360 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3361 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3362 }
3363 else
3364 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3365 if(rt1[i]) {
3366 assert(th>=0);
3367 assert(tl>=0);
57871462 3368 emit_testimm(temp,32);
3369 emit_andimm(temp,24,temp);
3370 if (opcode[i]==0x1A) { // LDL
3371 emit_rsbimm(temp,32,HOST_TEMPREG);
3372 emit_shl(temp2h,temp,temp2h);
3373 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3374 emit_movimm(-1,HOST_TEMPREG);
3375 emit_shl(temp2,temp,temp2);
3376 emit_cmove_reg(temp2h,th);
3377 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3378 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3379 emit_orreq(temp2,tl,tl);
3380 emit_orrne(temp2,th,th);
3381 }
3382 if (opcode[i]==0x1B) { // LDR
3383 emit_xorimm(temp,24,temp);
3384 emit_rsbimm(temp,32,HOST_TEMPREG);
3385 emit_shr(temp2,temp,temp2);
3386 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3387 emit_movimm(-1,HOST_TEMPREG);
3388 emit_shr(temp2h,temp,temp2h);
3389 emit_cmovne_reg(temp2,tl);
3390 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3391 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3392 emit_orrne(temp2h,th,th);
3393 emit_orreq(temp2h,tl,tl);
3394 }
3395 }
3396 }
3397}
3398#define loadlr_assemble loadlr_assemble_arm
3399
3400void cop0_assemble(int i,struct regstat *i_regs)
3401{
3402 if(opcode2[i]==0) // MFC0
3403 {
3404 signed char t=get_reg(i_regs->regmap,rt1[i]);
3405 char copr=(source[i]>>11)&0x1f;
3406 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3407 if(t>=0&&rt1[i]!=0) {
7139f3c8 3408#ifdef MUPEN64
57871462 3409 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3410 emit_movimm((source[i]>>11)&0x1f,1);
3411 emit_writeword(0,(int)&PC);
3412 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3413 if(copr==9) {
3414 emit_readword((int)&last_count,ECX);
3415 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3416 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3417 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3418 emit_writeword(HOST_CCREG,(int)&Count);
3419 }
3420 emit_call((int)MFC0);
3421 emit_readword((int)&readmem_dword,t);
7139f3c8 3422#else
3423 emit_readword((int)&reg_cop0+copr*4,t);
3424#endif
57871462 3425 }
3426 }
3427 else if(opcode2[i]==4) // MTC0
3428 {
3429 signed char s=get_reg(i_regs->regmap,rs1[i]);
3430 char copr=(source[i]>>11)&0x1f;
3431 assert(s>=0);
3432 emit_writeword(s,(int)&readmem_dword);
3433 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
fca1aef2 3434#ifdef MUPEN64
57871462 3435 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3436 emit_movimm((source[i]>>11)&0x1f,1);
3437 emit_writeword(0,(int)&PC);
3438 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
7139f3c8 3439#endif
3440 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3441 emit_readword((int)&last_count,ECX);
3442 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3443 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3444 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3445 emit_writeword(HOST_CCREG,(int)&Count);
3446 }
3447 // What a mess. The status register (12) can enable interrupts,
3448 // so needs a special case to handle a pending interrupt.
3449 // The interrupt must be taken immediately, because a subsequent
3450 // instruction might disable interrupts again.
7139f3c8 3451 if(copr==12||copr==13) {
fca1aef2 3452#ifdef PCSX
3453 if (is_delayslot) {
3454 // burn cycles to cause cc_interrupt, which will
3455 // reschedule next_interupt. Relies on CCREG from above.
3456 assem_debug("MTC0 DS %d\n", copr);
3457 emit_writeword(HOST_CCREG,(int)&last_count);
3458 emit_movimm(0,HOST_CCREG);
3459 emit_storereg(CCREG,HOST_CCREG);
3460 emit_movimm(copr,0);
3461 emit_call((int)pcsx_mtc0_ds);
3462 return;
3463 }
3464#endif
57871462 3465 emit_movimm(start+i*4+4,0);
3466 emit_movimm(0,1);
3467 emit_writeword(0,(int)&pcaddr);
3468 emit_writeword(1,(int)&pending_exception);
3469 }
3470 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3471 //else