drc: merge Ari64's patch: 05_dont_write_r0
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69#include "fpu.h"
70
71/* Linker */
72
73void set_jump_target(int addr,u_int target)
74{
75 u_char *ptr=(u_char *)addr;
76 u_int *ptr2=(u_int *)ptr;
77 if(ptr[3]==0xe2) {
78 assert((target-(u_int)ptr2-8)<1024);
79 assert((addr&3)==0);
80 assert((target&3)==0);
81 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
82 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
83 }
84 else if(ptr[3]==0x72) {
85 // generated by emit_jno_unlikely
86 if((target-(u_int)ptr2-8)<1024) {
87 assert((addr&3)==0);
88 assert((target&3)==0);
89 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
90 }
91 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
92 assert((addr&3)==0);
93 assert((target&3)==0);
94 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
95 }
96 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
97 }
98 else {
99 assert((ptr[3]&0x0e)==0xa);
100 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
101 }
102}
103
104// This optionally copies the instruction from the target of the branch into
105// the space before the branch. Works, but the difference in speed is
106// usually insignificant.
107void set_jump_target_fillslot(int addr,u_int target,int copy)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 assert(!copy||ptr2[-1]==0xe28dd000);
112 if(ptr[3]==0xe2) {
113 assert(!copy);
114 assert((target-(u_int)ptr2-8)<4096);
115 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
116 }
117 else {
118 assert((ptr[3]&0x0e)==0xa);
119 u_int target_insn=*(u_int *)target;
120 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
121 copy=0;
122 }
123 if((target_insn&0x0c100000)==0x04100000) { // Load
124 copy=0;
125 }
126 if(target_insn&0x08000000) {
127 copy=0;
128 }
129 if(copy) {
130 ptr2[-1]=target_insn;
131 target+=4;
132 }
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137/* Literal pool */
138add_literal(int addr,int val)
139{
140 literals[literalcount][0]=addr;
141 literals[literalcount][1]=val;
142 literalcount++;
143}
144
f76eeef9 145void *kill_pointer(void *stub)
57871462 146{
147 int *ptr=(int *)(stub+4);
148 assert((*ptr&0x0ff00000)==0x05900000);
149 u_int offset=*ptr&0xfff;
150 int **l_ptr=(void *)ptr+offset+8;
151 int *i_ptr=*l_ptr;
152 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 153 return i_ptr;
57871462 154}
155
156int get_pointer(void *stub)
157{
158 //printf("get_pointer(%x)\n",(int)stub);
159 int *ptr=(int *)(stub+4);
160 assert((*ptr&0x0ff00000)==0x05900000);
161 u_int offset=*ptr&0xfff;
162 int **l_ptr=(void *)ptr+offset+8;
163 int *i_ptr=*l_ptr;
164 assert((*i_ptr&0x0f000000)==0x0a000000);
165 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
166}
167
168// Find the "clean" entry point from a "dirty" entry point
169// by skipping past the call to verify_code
170u_int get_clean_addr(int addr)
171{
172 int *ptr=(int *)addr;
173 #ifdef ARMv5_ONLY
174 ptr+=4;
175 #else
176 ptr+=6;
177 #endif
178 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
179 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
180 ptr++;
181 if((*ptr&0xFF000000)==0xea000000) {
182 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
183 }
184 return (u_int)ptr;
185}
186
187int verify_dirty(int addr)
188{
189 u_int *ptr=(u_int *)addr;
190 #ifdef ARMv5_ONLY
191 // get from literal pool
192 assert((*ptr&0xFFF00000)==0xe5900000);
193 u_int offset=*ptr&0xfff;
194 u_int *l_ptr=(void *)ptr+offset+8;
195 u_int source=l_ptr[0];
196 u_int copy=l_ptr[1];
197 u_int len=l_ptr[2];
198 ptr+=4;
199 #else
200 // ARMv7 movw/movt
201 assert((*ptr&0xFFF00000)==0xe3000000);
202 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
203 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
204 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
205 ptr+=6;
206 #endif
207 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
208 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 209 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 210 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
211 unsigned int page=source>>12;
212 unsigned int map_value=memory_map[page];
213 if(map_value>=0x80000000) return 0;
214 while(page<((source+len-1)>>12)) {
215 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
216 }
217 source = source+(map_value<<2);
218 }
219 //printf("verify_dirty: %x %x %x\n",source,copy,len);
220 return !memcmp((void *)source,(void *)copy,len);
221}
222
223// This doesn't necessarily find all clean entry points, just
224// guarantees that it's not dirty
225int isclean(int addr)
226{
227 #ifdef ARMv5_ONLY
228 int *ptr=((u_int *)addr)+4;
229 #else
230 int *ptr=((u_int *)addr)+6;
231 #endif
232 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
233 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
234 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
235 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
236 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
237 return 1;
238}
239
240void get_bounds(int addr,u_int *start,u_int *end)
241{
242 u_int *ptr=(u_int *)addr;
243 #ifdef ARMv5_ONLY
244 // get from literal pool
245 assert((*ptr&0xFFF00000)==0xe5900000);
246 u_int offset=*ptr&0xfff;
247 u_int *l_ptr=(void *)ptr+offset+8;
248 u_int source=l_ptr[0];
249 //u_int copy=l_ptr[1];
250 u_int len=l_ptr[2];
251 ptr+=4;
252 #else
253 // ARMv7 movw/movt
254 assert((*ptr&0xFFF00000)==0xe3000000);
255 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
256 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
257 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
258 ptr+=6;
259 #endif
260 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
261 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 262 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 263 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
264 if(memory_map[source>>12]>=0x80000000) source = 0;
265 else source = source+(memory_map[source>>12]<<2);
266 }
267 *start=source;
268 *end=source+len;
269}
270
271/* Register allocation */
272
273// Note: registers are allocated clean (unmodified state)
274// if you intend to modify the register, you must call dirty_reg().
275void alloc_reg(struct regstat *cur,int i,signed char reg)
276{
277 int r,hr;
278 int preferred_reg = (reg&7);
279 if(reg==CCREG) preferred_reg=HOST_CCREG;
280 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
281
282 // Don't allocate unused registers
283 if((cur->u>>reg)&1) return;
284
285 // see if it's already allocated
286 for(hr=0;hr<HOST_REGS;hr++)
287 {
288 if(cur->regmap[hr]==reg) return;
289 }
290
291 // Keep the same mapping if the register was already allocated in a loop
292 preferred_reg = loop_reg(i,reg,preferred_reg);
293
294 // Try to allocate the preferred register
295 if(cur->regmap[preferred_reg]==-1) {
296 cur->regmap[preferred_reg]=reg;
297 cur->dirty&=~(1<<preferred_reg);
298 cur->isconst&=~(1<<preferred_reg);
299 return;
300 }
301 r=cur->regmap[preferred_reg];
302 if(r<64&&((cur->u>>r)&1)) {
303 cur->regmap[preferred_reg]=reg;
304 cur->dirty&=~(1<<preferred_reg);
305 cur->isconst&=~(1<<preferred_reg);
306 return;
307 }
308 if(r>=64&&((cur->uu>>(r&63))&1)) {
309 cur->regmap[preferred_reg]=reg;
310 cur->dirty&=~(1<<preferred_reg);
311 cur->isconst&=~(1<<preferred_reg);
312 return;
313 }
314
315 // Clear any unneeded registers
316 // We try to keep the mapping consistent, if possible, because it
317 // makes branches easier (especially loops). So we try to allocate
318 // first (see above) before removing old mappings. If this is not
319 // possible then go ahead and clear out the registers that are no
320 // longer needed.
321 for(hr=0;hr<HOST_REGS;hr++)
322 {
323 r=cur->regmap[hr];
324 if(r>=0) {
325 if(r<64) {
326 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
327 }
328 else
329 {
330 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
331 }
332 }
333 }
334 // Try to allocate any available register, but prefer
335 // registers that have not been used recently.
336 if(i>0) {
337 for(hr=0;hr<HOST_REGS;hr++) {
338 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
339 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
340 cur->regmap[hr]=reg;
341 cur->dirty&=~(1<<hr);
342 cur->isconst&=~(1<<hr);
343 return;
344 }
345 }
346 }
347 }
348 // Try to allocate any available register
349 for(hr=0;hr<HOST_REGS;hr++) {
350 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
351 cur->regmap[hr]=reg;
352 cur->dirty&=~(1<<hr);
353 cur->isconst&=~(1<<hr);
354 return;
355 }
356 }
357
358 // Ok, now we have to evict someone
359 // Pick a register we hopefully won't need soon
360 u_char hsn[MAXREG+1];
361 memset(hsn,10,sizeof(hsn));
362 int j;
363 lsn(hsn,i,&preferred_reg);
364 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
365 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
366 if(i>0) {
367 // Don't evict the cycle count at entry points, otherwise the entry
368 // stub will have to write it.
369 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
370 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
371 for(j=10;j>=3;j--)
372 {
373 // Alloc preferred register if available
374 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
375 for(hr=0;hr<HOST_REGS;hr++) {
376 // Evict both parts of a 64-bit register
377 if((cur->regmap[hr]&63)==r) {
378 cur->regmap[hr]=-1;
379 cur->dirty&=~(1<<hr);
380 cur->isconst&=~(1<<hr);
381 }
382 }
383 cur->regmap[preferred_reg]=reg;
384 return;
385 }
386 for(r=1;r<=MAXREG;r++)
387 {
388 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
389 for(hr=0;hr<HOST_REGS;hr++) {
390 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
391 if(cur->regmap[hr]==r+64) {
392 cur->regmap[hr]=reg;
393 cur->dirty&=~(1<<hr);
394 cur->isconst&=~(1<<hr);
395 return;
396 }
397 }
398 }
399 for(hr=0;hr<HOST_REGS;hr++) {
400 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
401 if(cur->regmap[hr]==r) {
402 cur->regmap[hr]=reg;
403 cur->dirty&=~(1<<hr);
404 cur->isconst&=~(1<<hr);
405 return;
406 }
407 }
408 }
409 }
410 }
411 }
412 }
413 for(j=10;j>=0;j--)
414 {
415 for(r=1;r<=MAXREG;r++)
416 {
417 if(hsn[r]==j) {
418 for(hr=0;hr<HOST_REGS;hr++) {
419 if(cur->regmap[hr]==r+64) {
420 cur->regmap[hr]=reg;
421 cur->dirty&=~(1<<hr);
422 cur->isconst&=~(1<<hr);
423 return;
424 }
425 }
426 for(hr=0;hr<HOST_REGS;hr++) {
427 if(cur->regmap[hr]==r) {
428 cur->regmap[hr]=reg;
429 cur->dirty&=~(1<<hr);
430 cur->isconst&=~(1<<hr);
431 return;
432 }
433 }
434 }
435 }
436 }
437 printf("This shouldn't happen (alloc_reg)");exit(1);
438}
439
440void alloc_reg64(struct regstat *cur,int i,signed char reg)
441{
442 int preferred_reg = 8+(reg&1);
443 int r,hr;
444
445 // allocate the lower 32 bits
446 alloc_reg(cur,i,reg);
447
448 // Don't allocate unused registers
449 if((cur->uu>>reg)&1) return;
450
451 // see if the upper half is already allocated
452 for(hr=0;hr<HOST_REGS;hr++)
453 {
454 if(cur->regmap[hr]==reg+64) return;
455 }
456
457 // Keep the same mapping if the register was already allocated in a loop
458 preferred_reg = loop_reg(i,reg,preferred_reg);
459
460 // Try to allocate the preferred register
461 if(cur->regmap[preferred_reg]==-1) {
462 cur->regmap[preferred_reg]=reg|64;
463 cur->dirty&=~(1<<preferred_reg);
464 cur->isconst&=~(1<<preferred_reg);
465 return;
466 }
467 r=cur->regmap[preferred_reg];
468 if(r<64&&((cur->u>>r)&1)) {
469 cur->regmap[preferred_reg]=reg|64;
470 cur->dirty&=~(1<<preferred_reg);
471 cur->isconst&=~(1<<preferred_reg);
472 return;
473 }
474 if(r>=64&&((cur->uu>>(r&63))&1)) {
475 cur->regmap[preferred_reg]=reg|64;
476 cur->dirty&=~(1<<preferred_reg);
477 cur->isconst&=~(1<<preferred_reg);
478 return;
479 }
480
481 // Clear any unneeded registers
482 // We try to keep the mapping consistent, if possible, because it
483 // makes branches easier (especially loops). So we try to allocate
484 // first (see above) before removing old mappings. If this is not
485 // possible then go ahead and clear out the registers that are no
486 // longer needed.
487 for(hr=HOST_REGS-1;hr>=0;hr--)
488 {
489 r=cur->regmap[hr];
490 if(r>=0) {
491 if(r<64) {
492 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
493 }
494 else
495 {
496 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
497 }
498 }
499 }
500 // Try to allocate any available register, but prefer
501 // registers that have not been used recently.
502 if(i>0) {
503 for(hr=0;hr<HOST_REGS;hr++) {
504 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
505 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
506 cur->regmap[hr]=reg|64;
507 cur->dirty&=~(1<<hr);
508 cur->isconst&=~(1<<hr);
509 return;
510 }
511 }
512 }
513 }
514 // Try to allocate any available register
515 for(hr=0;hr<HOST_REGS;hr++) {
516 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
517 cur->regmap[hr]=reg|64;
518 cur->dirty&=~(1<<hr);
519 cur->isconst&=~(1<<hr);
520 return;
521 }
522 }
523
524 // Ok, now we have to evict someone
525 // Pick a register we hopefully won't need soon
526 u_char hsn[MAXREG+1];
527 memset(hsn,10,sizeof(hsn));
528 int j;
529 lsn(hsn,i,&preferred_reg);
530 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
531 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
532 if(i>0) {
533 // Don't evict the cycle count at entry points, otherwise the entry
534 // stub will have to write it.
535 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
536 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
537 for(j=10;j>=3;j--)
538 {
539 // Alloc preferred register if available
540 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
541 for(hr=0;hr<HOST_REGS;hr++) {
542 // Evict both parts of a 64-bit register
543 if((cur->regmap[hr]&63)==r) {
544 cur->regmap[hr]=-1;
545 cur->dirty&=~(1<<hr);
546 cur->isconst&=~(1<<hr);
547 }
548 }
549 cur->regmap[preferred_reg]=reg|64;
550 return;
551 }
552 for(r=1;r<=MAXREG;r++)
553 {
554 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
555 for(hr=0;hr<HOST_REGS;hr++) {
556 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
557 if(cur->regmap[hr]==r+64) {
558 cur->regmap[hr]=reg|64;
559 cur->dirty&=~(1<<hr);
560 cur->isconst&=~(1<<hr);
561 return;
562 }
563 }
564 }
565 for(hr=0;hr<HOST_REGS;hr++) {
566 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
567 if(cur->regmap[hr]==r) {
568 cur->regmap[hr]=reg|64;
569 cur->dirty&=~(1<<hr);
570 cur->isconst&=~(1<<hr);
571 return;
572 }
573 }
574 }
575 }
576 }
577 }
578 }
579 for(j=10;j>=0;j--)
580 {
581 for(r=1;r<=MAXREG;r++)
582 {
583 if(hsn[r]==j) {
584 for(hr=0;hr<HOST_REGS;hr++) {
585 if(cur->regmap[hr]==r+64) {
586 cur->regmap[hr]=reg|64;
587 cur->dirty&=~(1<<hr);
588 cur->isconst&=~(1<<hr);
589 return;
590 }
591 }
592 for(hr=0;hr<HOST_REGS;hr++) {
593 if(cur->regmap[hr]==r) {
594 cur->regmap[hr]=reg|64;
595 cur->dirty&=~(1<<hr);
596 cur->isconst&=~(1<<hr);
597 return;
598 }
599 }
600 }
601 }
602 }
603 printf("This shouldn't happen");exit(1);
604}
605
606// Allocate a temporary register. This is done without regard to
607// dirty status or whether the register we request is on the unneeded list
608// Note: This will only allocate one register, even if called multiple times
609void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
610{
611 int r,hr;
612 int preferred_reg = -1;
613
614 // see if it's already allocated
615 for(hr=0;hr<HOST_REGS;hr++)
616 {
617 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
618 }
619
620 // Try to allocate any available register
621 for(hr=HOST_REGS-1;hr>=0;hr--) {
622 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
623 cur->regmap[hr]=reg;
624 cur->dirty&=~(1<<hr);
625 cur->isconst&=~(1<<hr);
626 return;
627 }
628 }
629
630 // Find an unneeded register
631 for(hr=HOST_REGS-1;hr>=0;hr--)
632 {
633 r=cur->regmap[hr];
634 if(r>=0) {
635 if(r<64) {
636 if((cur->u>>r)&1) {
637 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
638 cur->regmap[hr]=reg;
639 cur->dirty&=~(1<<hr);
640 cur->isconst&=~(1<<hr);
641 return;
642 }
643 }
644 }
645 else
646 {
647 if((cur->uu>>(r&63))&1) {
648 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
649 cur->regmap[hr]=reg;
650 cur->dirty&=~(1<<hr);
651 cur->isconst&=~(1<<hr);
652 return;
653 }
654 }
655 }
656 }
657 }
658
659 // Ok, now we have to evict someone
660 // Pick a register we hopefully won't need soon
661 // TODO: we might want to follow unconditional jumps here
662 // TODO: get rid of dupe code and make this into a function
663 u_char hsn[MAXREG+1];
664 memset(hsn,10,sizeof(hsn));
665 int j;
666 lsn(hsn,i,&preferred_reg);
667 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
668 if(i>0) {
669 // Don't evict the cycle count at entry points, otherwise the entry
670 // stub will have to write it.
671 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
672 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
673 for(j=10;j>=3;j--)
674 {
675 for(r=1;r<=MAXREG;r++)
676 {
677 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
678 for(hr=0;hr<HOST_REGS;hr++) {
679 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
680 if(cur->regmap[hr]==r+64) {
681 cur->regmap[hr]=reg;
682 cur->dirty&=~(1<<hr);
683 cur->isconst&=~(1<<hr);
684 return;
685 }
686 }
687 }
688 for(hr=0;hr<HOST_REGS;hr++) {
689 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
690 if(cur->regmap[hr]==r) {
691 cur->regmap[hr]=reg;
692 cur->dirty&=~(1<<hr);
693 cur->isconst&=~(1<<hr);
694 return;
695 }
696 }
697 }
698 }
699 }
700 }
701 }
702 for(j=10;j>=0;j--)
703 {
704 for(r=1;r<=MAXREG;r++)
705 {
706 if(hsn[r]==j) {
707 for(hr=0;hr<HOST_REGS;hr++) {
708 if(cur->regmap[hr]==r+64) {
709 cur->regmap[hr]=reg;
710 cur->dirty&=~(1<<hr);
711 cur->isconst&=~(1<<hr);
712 return;
713 }
714 }
715 for(hr=0;hr<HOST_REGS;hr++) {
716 if(cur->regmap[hr]==r) {
717 cur->regmap[hr]=reg;
718 cur->dirty&=~(1<<hr);
719 cur->isconst&=~(1<<hr);
720 return;
721 }
722 }
723 }
724 }
725 }
726 printf("This shouldn't happen");exit(1);
727}
728// Allocate a specific ARM register.
729void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
730{
731 int n;
732
733 // see if it's already allocated (and dealloc it)
734 for(n=0;n<HOST_REGS;n++)
735 {
736 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
737 }
738
739 cur->regmap[hr]=reg;
740 cur->dirty&=~(1<<hr);
741 cur->isconst&=~(1<<hr);
742}
743
744// Alloc cycle count into dedicated register
745alloc_cc(struct regstat *cur,int i)
746{
747 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
748}
749
750/* Special alloc */
751
752
753/* Assembler */
754
755char regname[16][4] = {
756 "r0",
757 "r1",
758 "r2",
759 "r3",
760 "r4",
761 "r5",
762 "r6",
763 "r7",
764 "r8",
765 "r9",
766 "r10",
767 "fp",
768 "r12",
769 "sp",
770 "lr",
771 "pc"};
772
773void output_byte(u_char byte)
774{
775 *(out++)=byte;
776}
777void output_modrm(u_char mod,u_char rm,u_char ext)
778{
779 assert(mod<4);
780 assert(rm<8);
781 assert(ext<8);
782 u_char byte=(mod<<6)|(ext<<3)|rm;
783 *(out++)=byte;
784}
785void output_sib(u_char scale,u_char index,u_char base)
786{
787 assert(scale<4);
788 assert(index<8);
789 assert(base<8);
790 u_char byte=(scale<<6)|(index<<3)|base;
791 *(out++)=byte;
792}
793void output_w32(u_int word)
794{
795 *((u_int *)out)=word;
796 out+=4;
797}
798u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
799{
800 assert(rd<16);
801 assert(rn<16);
802 assert(rm<16);
803 return((rn<<16)|(rd<<12)|rm);
804}
805u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
806{
807 assert(rd<16);
808 assert(rn<16);
809 assert(imm<256);
810 assert((shift&1)==0);
811 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
812}
813u_int genimm(u_int imm,u_int *encoded)
814{
815 if(imm==0) {*encoded=0;return 1;}
816 int i=32;
817 while(i>0)
818 {
819 if(imm<256) {
820 *encoded=((i&30)<<7)|imm;
821 return 1;
822 }
823 imm=(imm>>2)|(imm<<30);i-=2;
824 }
825 return 0;
826}
cfbd3c6e 827void genimm_checked(u_int imm,u_int *encoded)
828{
829 u_int ret=genimm(imm,encoded);
830 assert(ret);
831}
57871462 832u_int genjmp(u_int addr)
833{
834 int offset=addr-(int)out-8;
e80343e2 835 if(offset<-33554432||offset>=33554432) {
836 if (addr>2) {
837 printf("genjmp: out of range: %08x\n", offset);
838 exit(1);
839 }
840 return 0;
841 }
57871462 842 return ((u_int)offset>>2)&0xffffff;
843}
844
845void emit_mov(int rs,int rt)
846{
847 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
848 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
849}
850
851void emit_movs(int rs,int rt)
852{
853 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
854 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
855}
856
857void emit_add(int rs1,int rs2,int rt)
858{
859 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
860 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
861}
862
863void emit_adds(int rs1,int rs2,int rt)
864{
865 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
866 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
867}
868
869void emit_adcs(int rs1,int rs2,int rt)
870{
871 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
872 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
873}
874
875void emit_sbc(int rs1,int rs2,int rt)
876{
877 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
878 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
879}
880
881void emit_sbcs(int rs1,int rs2,int rt)
882{
883 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
884 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
885}
886
887void emit_neg(int rs, int rt)
888{
889 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
890 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
891}
892
893void emit_negs(int rs, int rt)
894{
895 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
896 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
897}
898
899void emit_sub(int rs1,int rs2,int rt)
900{
901 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
902 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
903}
904
905void emit_subs(int rs1,int rs2,int rt)
906{
907 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
908 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
909}
910
911void emit_zeroreg(int rt)
912{
913 assem_debug("mov %s,#0\n",regname[rt]);
914 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
915}
916
790ee18e 917void emit_loadlp(u_int imm,u_int rt)
918{
919 add_literal((int)out,imm);
920 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
921 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
922}
923void emit_movw(u_int imm,u_int rt)
924{
925 assert(imm<65536);
926 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
927 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
928}
929void emit_movt(u_int imm,u_int rt)
930{
931 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
932 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
933}
934void emit_movimm(u_int imm,u_int rt)
935{
936 u_int armval;
937 if(genimm(imm,&armval)) {
938 assem_debug("mov %s,#%d\n",regname[rt],imm);
939 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
940 }else if(genimm(~imm,&armval)) {
941 assem_debug("mvn %s,#%d\n",regname[rt],imm);
942 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
943 }else if(imm<65536) {
944 #ifdef ARMv5_ONLY
945 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
946 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
947 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
948 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
949 #else
950 emit_movw(imm,rt);
951 #endif
952 }else{
953 #ifdef ARMv5_ONLY
954 emit_loadlp(imm,rt);
955 #else
956 emit_movw(imm&0x0000FFFF,rt);
957 emit_movt(imm&0xFFFF0000,rt);
958 #endif
959 }
960}
961void emit_pcreladdr(u_int rt)
962{
963 assem_debug("add %s,pc,#?\n",regname[rt]);
964 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
965}
966
57871462 967void emit_loadreg(int r, int hr)
968{
3d624f89 969#ifdef FORCE32
970 if(r&64) {
971 printf("64bit load in 32bit mode!\n");
972 exit(1);
973 }
974#endif
57871462 975 if((r&63)==0)
976 emit_zeroreg(hr);
977 else {
3d624f89 978 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 979 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
980 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
981 if(r==CCREG) addr=(int)&cycle_count;
982 if(r==CSREG) addr=(int)&Status;
983 if(r==FSREG) addr=(int)&FCR31;
984 if(r==INVCP) addr=(int)&invc_ptr;
985 u_int offset = addr-(u_int)&dynarec_local;
986 assert(offset<4096);
987 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
988 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
989 }
990}
991void emit_storereg(int r, int hr)
992{
3d624f89 993#ifdef FORCE32
994 if(r&64) {
995 printf("64bit store in 32bit mode!\n");
996 exit(1);
997 }
998#endif
999 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1000 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1001 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1002 if(r==CCREG) addr=(int)&cycle_count;
1003 if(r==FSREG) addr=(int)&FCR31;
1004 u_int offset = addr-(u_int)&dynarec_local;
1005 assert(offset<4096);
1006 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1007 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1008}
1009
1010void emit_test(int rs, int rt)
1011{
1012 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1013 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1014}
1015
1016void emit_testimm(int rs,int imm)
1017{
1018 u_int armval;
1019 assem_debug("tst %s,$%d\n",regname[rs],imm);
cfbd3c6e 1020 genimm_checked(imm,&armval);
57871462 1021 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1022}
1023
b9b61529 1024void emit_testeqimm(int rs,int imm)
1025{
1026 u_int armval;
1027 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1028 genimm_checked(imm,&armval);
b9b61529 1029 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1030}
1031
57871462 1032void emit_not(int rs,int rt)
1033{
1034 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1035 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1036}
1037
b9b61529 1038void emit_mvnmi(int rs,int rt)
1039{
1040 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1041 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1042}
1043
57871462 1044void emit_and(u_int rs1,u_int rs2,u_int rt)
1045{
1046 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1047 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1048}
1049
1050void emit_or(u_int rs1,u_int rs2,u_int rt)
1051{
1052 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1053 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1054}
1055void emit_or_and_set_flags(int rs1,int rs2,int rt)
1056{
1057 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1058 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1059}
1060
f70d384d 1061void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1062{
1063 assert(rs<16);
1064 assert(rt<16);
1065 assert(imm<32);
1066 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1067 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1068}
1069
576bbd8f 1070void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1071{
1072 assert(rs<16);
1073 assert(rt<16);
1074 assert(imm<32);
1075 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1076 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1077}
1078
57871462 1079void emit_xor(u_int rs1,u_int rs2,u_int rt)
1080{
1081 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1082 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1083}
1084
57871462 1085void emit_addimm(u_int rs,int imm,u_int rt)
1086{
1087 assert(rs<16);
1088 assert(rt<16);
1089 if(imm!=0) {
1090 assert(imm>-65536&&imm<65536);
1091 u_int armval;
1092 if(genimm(imm,&armval)) {
1093 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1094 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1095 }else if(genimm(-imm,&armval)) {
1096 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1097 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1098 }else if(imm<0) {
1099 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1100 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1101 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1102 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1103 }else{
1104 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1105 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1106 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1107 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1108 }
1109 }
1110 else if(rs!=rt) emit_mov(rs,rt);
1111}
1112
1113void emit_addimm_and_set_flags(int imm,int rt)
1114{
1115 assert(imm>-65536&&imm<65536);
1116 u_int armval;
1117 if(genimm(imm,&armval)) {
1118 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1119 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1120 }else if(genimm(-imm,&armval)) {
1121 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1122 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1123 }else if(imm<0) {
1124 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1125 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1126 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1127 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1128 }else{
1129 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1130 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1131 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1132 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1133 }
1134}
1135void emit_addimm_no_flags(u_int imm,u_int rt)
1136{
1137 emit_addimm(rt,imm,rt);
1138}
1139
1140void emit_addnop(u_int r)
1141{
1142 assert(r<16);
1143 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1144 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1145}
1146
1147void emit_adcimm(u_int rs,int imm,u_int rt)
1148{
1149 u_int armval;
cfbd3c6e 1150 genimm_checked(imm,&armval);
57871462 1151 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1152 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1153}
1154/*void emit_sbcimm(int imm,u_int rt)
1155{
1156 u_int armval;
cfbd3c6e 1157 genimm_checked(imm,&armval);
57871462 1158 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1159 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1160}*/
1161void emit_sbbimm(int imm,u_int rt)
1162{
1163 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1164 assert(rt<8);
1165 if(imm<128&&imm>=-128) {
1166 output_byte(0x83);
1167 output_modrm(3,rt,3);
1168 output_byte(imm);
1169 }
1170 else
1171 {
1172 output_byte(0x81);
1173 output_modrm(3,rt,3);
1174 output_w32(imm);
1175 }
1176}
1177void emit_rscimm(int rs,int imm,u_int rt)
1178{
1179 assert(0);
1180 u_int armval;
cfbd3c6e 1181 genimm_checked(imm,&armval);
57871462 1182 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1183 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1184}
1185
1186void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1187{
1188 // TODO: if(genimm(imm,&armval)) ...
1189 // else
1190 emit_movimm(imm,HOST_TEMPREG);
1191 emit_adds(HOST_TEMPREG,rsl,rtl);
1192 emit_adcimm(rsh,0,rth);
1193}
1194
1195void emit_sbb(int rs1,int rs2)
1196{
1197 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1198 output_byte(0x19);
1199 output_modrm(3,rs1,rs2);
1200}
1201
1202void emit_andimm(int rs,int imm,int rt)
1203{
1204 u_int armval;
790ee18e 1205 if(imm==0) {
1206 emit_zeroreg(rt);
1207 }else if(genimm(imm,&armval)) {
57871462 1208 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1209 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1210 }else if(genimm(~imm,&armval)) {
1211 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1212 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1213 }else if(imm==65535) {
1214 #ifdef ARMv5_ONLY
1215 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1216 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1217 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1218 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1219 #else
1220 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1221 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1222 #endif
1223 }else{
1224 assert(imm>0&&imm<65535);
1225 #ifdef ARMv5_ONLY
1226 assem_debug("mov r14,#%d\n",imm&0xFF00);
1227 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1228 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1229 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1230 #else
1231 emit_movw(imm,HOST_TEMPREG);
1232 #endif
1233 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1234 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1235 }
1236}
1237
1238void emit_orimm(int rs,int imm,int rt)
1239{
1240 u_int armval;
790ee18e 1241 if(imm==0) {
1242 if(rs!=rt) emit_mov(rs,rt);
1243 }else if(genimm(imm,&armval)) {
57871462 1244 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1245 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1246 }else{
1247 assert(imm>0&&imm<65536);
1248 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1249 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1250 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1251 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1252 }
1253}
1254
1255void emit_xorimm(int rs,int imm,int rt)
1256{
57871462 1257 u_int armval;
790ee18e 1258 if(imm==0) {
1259 if(rs!=rt) emit_mov(rs,rt);
1260 }else if(genimm(imm,&armval)) {
57871462 1261 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1262 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1263 }else{
514ed0d9 1264 assert(imm>0&&imm<65536);
57871462 1265 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1266 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1267 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1268 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1269 }
1270}
1271
1272void emit_shlimm(int rs,u_int imm,int rt)
1273{
1274 assert(imm>0);
1275 assert(imm<32);
1276 //if(imm==1) ...
1277 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1278 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1279}
1280
1281void emit_shrimm(int rs,u_int imm,int rt)
1282{
1283 assert(imm>0);
1284 assert(imm<32);
1285 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1286 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1287}
1288
1289void emit_sarimm(int rs,u_int imm,int rt)
1290{
1291 assert(imm>0);
1292 assert(imm<32);
1293 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1294 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1295}
1296
1297void emit_rorimm(int rs,u_int imm,int rt)
1298{
1299 assert(imm>0);
1300 assert(imm<32);
1301 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1302 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1303}
1304
1305void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1306{
1307 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1308 assert(imm>0);
1309 assert(imm<32);
1310 //if(imm==1) ...
1311 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1312 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1313 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1314 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1315}
1316
1317void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1318{
1319 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1320 assert(imm>0);
1321 assert(imm<32);
1322 //if(imm==1) ...
1323 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1324 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1325 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1326 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1327}
1328
b9b61529 1329void emit_signextend16(int rs,int rt)
1330{
1331 #ifdef ARMv5_ONLY
1332 emit_shlimm(rs,16,rt);
1333 emit_sarimm(rt,16,rt);
1334 #else
1335 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1336 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1337 #endif
1338}
1339
57871462 1340void emit_shl(u_int rs,u_int shift,u_int rt)
1341{
1342 assert(rs<16);
1343 assert(rt<16);
1344 assert(shift<16);
1345 //if(imm==1) ...
1346 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1347 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1348}
1349void emit_shr(u_int rs,u_int shift,u_int rt)
1350{
1351 assert(rs<16);
1352 assert(rt<16);
1353 assert(shift<16);
1354 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1355 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1356}
1357void emit_sar(u_int rs,u_int shift,u_int rt)
1358{
1359 assert(rs<16);
1360 assert(rt<16);
1361 assert(shift<16);
1362 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1363 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1364}
1365void emit_shlcl(int r)
1366{
1367 assem_debug("shl %%%s,%%cl\n",regname[r]);
1368 assert(0);
1369}
1370void emit_shrcl(int r)
1371{
1372 assem_debug("shr %%%s,%%cl\n",regname[r]);
1373 assert(0);
1374}
1375void emit_sarcl(int r)
1376{
1377 assem_debug("sar %%%s,%%cl\n",regname[r]);
1378 assert(0);
1379}
1380
1381void emit_shldcl(int r1,int r2)
1382{
1383 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1384 assert(0);
1385}
1386void emit_shrdcl(int r1,int r2)
1387{
1388 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1389 assert(0);
1390}
1391void emit_orrshl(u_int rs,u_int shift,u_int rt)
1392{
1393 assert(rs<16);
1394 assert(rt<16);
1395 assert(shift<16);
1396 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1397 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1398}
1399void emit_orrshr(u_int rs,u_int shift,u_int rt)
1400{
1401 assert(rs<16);
1402 assert(rt<16);
1403 assert(shift<16);
1404 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1405 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1406}
1407
1408void emit_cmpimm(int rs,int imm)
1409{
1410 u_int armval;
1411 if(genimm(imm,&armval)) {
1412 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1413 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1414 }else if(genimm(-imm,&armval)) {
1415 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1416 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1417 }else if(imm>0) {
1418 assert(imm<65536);
1419 #ifdef ARMv5_ONLY
1420 emit_movimm(imm,HOST_TEMPREG);
1421 #else
1422 emit_movw(imm,HOST_TEMPREG);
1423 #endif
1424 assem_debug("cmp %s,r14\n",regname[rs]);
1425 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1426 }else{
1427 assert(imm>-65536);
1428 #ifdef ARMv5_ONLY
1429 emit_movimm(-imm,HOST_TEMPREG);
1430 #else
1431 emit_movw(-imm,HOST_TEMPREG);
1432 #endif
1433 assem_debug("cmn %s,r14\n",regname[rs]);
1434 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1435 }
1436}
1437
1438void emit_cmovne(u_int *addr,int rt)
1439{
1440 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1441 assert(0);
1442}
1443void emit_cmovl(u_int *addr,int rt)
1444{
1445 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1446 assert(0);
1447}
1448void emit_cmovs(u_int *addr,int rt)
1449{
1450 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1451 assert(0);
1452}
1453void emit_cmovne_imm(int imm,int rt)
1454{
1455 assem_debug("movne %s,#%d\n",regname[rt],imm);
1456 u_int armval;
cfbd3c6e 1457 genimm_checked(imm,&armval);
57871462 1458 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1459}
1460void emit_cmovl_imm(int imm,int rt)
1461{
1462 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1463 u_int armval;
cfbd3c6e 1464 genimm_checked(imm,&armval);
57871462 1465 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1466}
1467void emit_cmovb_imm(int imm,int rt)
1468{
1469 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1470 u_int armval;
cfbd3c6e 1471 genimm_checked(imm,&armval);
57871462 1472 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1473}
1474void emit_cmovs_imm(int imm,int rt)
1475{
1476 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1477 u_int armval;
cfbd3c6e 1478 genimm_checked(imm,&armval);
57871462 1479 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1480}
1481void emit_cmove_reg(int rs,int rt)
1482{
1483 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1484 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1485}
1486void emit_cmovne_reg(int rs,int rt)
1487{
1488 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1489 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1490}
1491void emit_cmovl_reg(int rs,int rt)
1492{
1493 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1494 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1495}
1496void emit_cmovs_reg(int rs,int rt)
1497{
1498 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1499 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1500}
1501
1502void emit_slti32(int rs,int imm,int rt)
1503{
1504 if(rs!=rt) emit_zeroreg(rt);
1505 emit_cmpimm(rs,imm);
1506 if(rs==rt) emit_movimm(0,rt);
1507 emit_cmovl_imm(1,rt);
1508}
1509void emit_sltiu32(int rs,int imm,int rt)
1510{
1511 if(rs!=rt) emit_zeroreg(rt);
1512 emit_cmpimm(rs,imm);
1513 if(rs==rt) emit_movimm(0,rt);
1514 emit_cmovb_imm(1,rt);
1515}
1516void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1517{
1518 assert(rsh!=rt);
1519 emit_slti32(rsl,imm,rt);
1520 if(imm>=0)
1521 {
1522 emit_test(rsh,rsh);
1523 emit_cmovne_imm(0,rt);
1524 emit_cmovs_imm(1,rt);
1525 }
1526 else
1527 {
1528 emit_cmpimm(rsh,-1);
1529 emit_cmovne_imm(0,rt);
1530 emit_cmovl_imm(1,rt);
1531 }
1532}
1533void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1534{
1535 assert(rsh!=rt);
1536 emit_sltiu32(rsl,imm,rt);
1537 if(imm>=0)
1538 {
1539 emit_test(rsh,rsh);
1540 emit_cmovne_imm(0,rt);
1541 }
1542 else
1543 {
1544 emit_cmpimm(rsh,-1);
1545 emit_cmovne_imm(1,rt);
1546 }
1547}
1548
1549void emit_cmp(int rs,int rt)
1550{
1551 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1552 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1553}
1554void emit_set_gz32(int rs, int rt)
1555{
1556 //assem_debug("set_gz32\n");
1557 emit_cmpimm(rs,1);
1558 emit_movimm(1,rt);
1559 emit_cmovl_imm(0,rt);
1560}
1561void emit_set_nz32(int rs, int rt)
1562{
1563 //assem_debug("set_nz32\n");
1564 if(rs!=rt) emit_movs(rs,rt);
1565 else emit_test(rs,rs);
1566 emit_cmovne_imm(1,rt);
1567}
1568void emit_set_gz64_32(int rsh, int rsl, int rt)
1569{
1570 //assem_debug("set_gz64\n");
1571 emit_set_gz32(rsl,rt);
1572 emit_test(rsh,rsh);
1573 emit_cmovne_imm(1,rt);
1574 emit_cmovs_imm(0,rt);
1575}
1576void emit_set_nz64_32(int rsh, int rsl, int rt)
1577{
1578 //assem_debug("set_nz64\n");
1579 emit_or_and_set_flags(rsh,rsl,rt);
1580 emit_cmovne_imm(1,rt);
1581}
1582void emit_set_if_less32(int rs1, int rs2, int rt)
1583{
1584 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1585 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1586 emit_cmp(rs1,rs2);
1587 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1588 emit_cmovl_imm(1,rt);
1589}
1590void emit_set_if_carry32(int rs1, int rs2, int rt)
1591{
1592 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1593 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1594 emit_cmp(rs1,rs2);
1595 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1596 emit_cmovb_imm(1,rt);
1597}
1598void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1599{
1600 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1601 assert(u1!=rt);
1602 assert(u2!=rt);
1603 emit_cmp(l1,l2);
1604 emit_movimm(0,rt);
1605 emit_sbcs(u1,u2,HOST_TEMPREG);
1606 emit_cmovl_imm(1,rt);
1607}
1608void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1609{
1610 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1611 assert(u1!=rt);
1612 assert(u2!=rt);
1613 emit_cmp(l1,l2);
1614 emit_movimm(0,rt);
1615 emit_sbcs(u1,u2,HOST_TEMPREG);
1616 emit_cmovb_imm(1,rt);
1617}
1618
1619void emit_call(int a)
1620{
1621 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1622 u_int offset=genjmp(a);
1623 output_w32(0xeb000000|offset);
1624}
1625void emit_jmp(int a)
1626{
1627 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1628 u_int offset=genjmp(a);
1629 output_w32(0xea000000|offset);
1630}
1631void emit_jne(int a)
1632{
1633 assem_debug("bne %x\n",a);
1634 u_int offset=genjmp(a);
1635 output_w32(0x1a000000|offset);
1636}
1637void emit_jeq(int a)
1638{
1639 assem_debug("beq %x\n",a);
1640 u_int offset=genjmp(a);
1641 output_w32(0x0a000000|offset);
1642}
1643void emit_js(int a)
1644{
1645 assem_debug("bmi %x\n",a);
1646 u_int offset=genjmp(a);
1647 output_w32(0x4a000000|offset);
1648}
1649void emit_jns(int a)
1650{
1651 assem_debug("bpl %x\n",a);
1652 u_int offset=genjmp(a);
1653 output_w32(0x5a000000|offset);
1654}
1655void emit_jl(int a)
1656{
1657 assem_debug("blt %x\n",a);
1658 u_int offset=genjmp(a);
1659 output_w32(0xba000000|offset);
1660}
1661void emit_jge(int a)
1662{
1663 assem_debug("bge %x\n",a);
1664 u_int offset=genjmp(a);
1665 output_w32(0xaa000000|offset);
1666}
1667void emit_jno(int a)
1668{
1669 assem_debug("bvc %x\n",a);
1670 u_int offset=genjmp(a);
1671 output_w32(0x7a000000|offset);
1672}
1673void emit_jc(int a)
1674{
1675 assem_debug("bcs %x\n",a);
1676 u_int offset=genjmp(a);
1677 output_w32(0x2a000000|offset);
1678}
1679void emit_jcc(int a)
1680{
1681 assem_debug("bcc %x\n",a);
1682 u_int offset=genjmp(a);
1683 output_w32(0x3a000000|offset);
1684}
1685
1686void emit_pushimm(int imm)
1687{
1688 assem_debug("push $%x\n",imm);
1689 assert(0);
1690}
1691void emit_pusha()
1692{
1693 assem_debug("pusha\n");
1694 assert(0);
1695}
1696void emit_popa()
1697{
1698 assem_debug("popa\n");
1699 assert(0);
1700}
1701void emit_pushreg(u_int r)
1702{
1703 assem_debug("push %%%s\n",regname[r]);
1704 assert(0);
1705}
1706void emit_popreg(u_int r)
1707{
1708 assem_debug("pop %%%s\n",regname[r]);
1709 assert(0);
1710}
1711void emit_callreg(u_int r)
1712{
1713 assem_debug("call *%%%s\n",regname[r]);
1714 assert(0);
1715}
1716void emit_jmpreg(u_int r)
1717{
1718 assem_debug("mov pc,%s\n",regname[r]);
1719 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1720}
1721
1722void emit_readword_indexed(int offset, int rs, int rt)
1723{
1724 assert(offset>-4096&&offset<4096);
1725 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1726 if(offset>=0) {
1727 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1728 }else{
1729 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1730 }
1731}
1732void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1733{
1734 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1735 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1736}
1737void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1738{
1739 if(map<0) emit_readword_indexed(addr, rs, rt);
1740 else {
1741 assert(addr==0);
1742 emit_readword_dualindexedx4(rs, map, rt);
1743 }
1744}
1745void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1746{
1747 if(map<0) {
1748 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1749 emit_readword_indexed(addr+4, rs, rl);
1750 }else{
1751 assert(rh!=rs);
1752 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1753 emit_addimm(map,1,map);
1754 emit_readword_indexed_tlb(addr, rs, map, rl);
1755 }
1756}
1757void emit_movsbl_indexed(int offset, int rs, int rt)
1758{
1759 assert(offset>-256&&offset<256);
1760 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1761 if(offset>=0) {
1762 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1763 }else{
1764 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1765 }
1766}
1767void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1768{
1769 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1770 else {
1771 if(addr==0) {
1772 emit_shlimm(map,2,map);
1773 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1774 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1775 }else{
1776 assert(addr>-256&&addr<256);
1777 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1778 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1779 emit_movsbl_indexed(addr, rt, rt);
1780 }
1781 }
1782}
1783void emit_movswl_indexed(int offset, int rs, int rt)
1784{
1785 assert(offset>-256&&offset<256);
1786 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1787 if(offset>=0) {
1788 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1789 }else{
1790 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1791 }
1792}
1793void emit_movzbl_indexed(int offset, int rs, int rt)
1794{
1795 assert(offset>-4096&&offset<4096);
1796 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1797 if(offset>=0) {
1798 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1799 }else{
1800 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1801 }
1802}
1803void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1804{
1805 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1806 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1807}
1808void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1809{
1810 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1811 else {
1812 if(addr==0) {
1813 emit_movzbl_dualindexedx4(rs, map, rt);
1814 }else{
1815 emit_addimm(rs,addr,rt);
1816 emit_movzbl_dualindexedx4(rt, map, rt);
1817 }
1818 }
1819}
1820void emit_movzwl_indexed(int offset, int rs, int rt)
1821{
1822 assert(offset>-256&&offset<256);
1823 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1824 if(offset>=0) {
1825 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1826 }else{
1827 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1828 }
1829}
1830void emit_readword(int addr, int rt)
1831{
1832 u_int offset = addr-(u_int)&dynarec_local;
1833 assert(offset<4096);
1834 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1835 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1836}
1837void emit_movsbl(int addr, int rt)
1838{
1839 u_int offset = addr-(u_int)&dynarec_local;
1840 assert(offset<256);
1841 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1842 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1843}
1844void emit_movswl(int addr, int rt)
1845{
1846 u_int offset = addr-(u_int)&dynarec_local;
1847 assert(offset<256);
1848 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1849 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1850}
1851void emit_movzbl(int addr, int rt)
1852{
1853 u_int offset = addr-(u_int)&dynarec_local;
1854 assert(offset<4096);
1855 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1856 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1857}
1858void emit_movzwl(int addr, int rt)
1859{
1860 u_int offset = addr-(u_int)&dynarec_local;
1861 assert(offset<256);
1862 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1863 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1864}
1865void emit_movzwl_reg(int rs, int rt)
1866{
1867 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1868 assert(0);
1869}
1870
1871void emit_xchg(int rs, int rt)
1872{
1873 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1874 assert(0);
1875}
1876void emit_writeword_indexed(int rt, int offset, int rs)
1877{
1878 assert(offset>-4096&&offset<4096);
1879 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1880 if(offset>=0) {
1881 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1882 }else{
1883 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1884 }
1885}
1886void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1887{
1888 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1889 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1890}
1891void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1892{
1893 if(map<0) emit_writeword_indexed(rt, addr, rs);
1894 else {
1895 assert(addr==0);
1896 emit_writeword_dualindexedx4(rt, rs, map);
1897 }
1898}
1899void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1900{
1901 if(map<0) {
1902 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1903 emit_writeword_indexed(rl, addr+4, rs);
1904 }else{
1905 assert(rh>=0);
1906 if(temp!=rs) emit_addimm(map,1,temp);
1907 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1908 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1909 else {
1910 emit_addimm(rs,4,rs);
1911 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1912 }
1913 }
1914}
1915void emit_writehword_indexed(int rt, int offset, int rs)
1916{
1917 assert(offset>-256&&offset<256);
1918 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1919 if(offset>=0) {
1920 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1921 }else{
1922 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1923 }
1924}
1925void emit_writebyte_indexed(int rt, int offset, int rs)
1926{
1927 assert(offset>-4096&&offset<4096);
1928 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1929 if(offset>=0) {
1930 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1931 }else{
1932 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1933 }
1934}
1935void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1936{
1937 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1938 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1939}
1940void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1941{
1942 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1943 else {
1944 if(addr==0) {
1945 emit_writebyte_dualindexedx4(rt, rs, map);
1946 }else{
1947 emit_addimm(rs,addr,temp);
1948 emit_writebyte_dualindexedx4(rt, temp, map);
1949 }
1950 }
1951}
1952void emit_writeword(int rt, int addr)
1953{
1954 u_int offset = addr-(u_int)&dynarec_local;
1955 assert(offset<4096);
1956 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1957 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1958}
1959void emit_writehword(int rt, int addr)
1960{
1961 u_int offset = addr-(u_int)&dynarec_local;
1962 assert(offset<256);
1963 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1964 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1965}
1966void emit_writebyte(int rt, int addr)
1967{
1968 u_int offset = addr-(u_int)&dynarec_local;
1969 assert(offset<4096);
74426039 1970 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 1971 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1972}
1973void emit_writeword_imm(int imm, int addr)
1974{
1975 assem_debug("movl $%x,%x\n",imm,addr);
1976 assert(0);
1977}
1978void emit_writebyte_imm(int imm, int addr)
1979{
1980 assem_debug("movb $%x,%x\n",imm,addr);
1981 assert(0);
1982}
1983
1984void emit_mul(int rs)
1985{
1986 assem_debug("mul %%%s\n",regname[rs]);
1987 assert(0);
1988}
1989void emit_imul(int rs)
1990{
1991 assem_debug("imul %%%s\n",regname[rs]);
1992 assert(0);
1993}
1994void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1995{
1996 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1997 assert(rs1<16);
1998 assert(rs2<16);
1999 assert(hi<16);
2000 assert(lo<16);
2001 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2002}
2003void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2004{
2005 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2006 assert(rs1<16);
2007 assert(rs2<16);
2008 assert(hi<16);
2009 assert(lo<16);
2010 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2011}
2012
2013void emit_div(int rs)
2014{
2015 assem_debug("div %%%s\n",regname[rs]);
2016 assert(0);
2017}
2018void emit_idiv(int rs)
2019{
2020 assem_debug("idiv %%%s\n",regname[rs]);
2021 assert(0);
2022}
2023void emit_cdq()
2024{
2025 assem_debug("cdq\n");
2026 assert(0);
2027}
2028
2029void emit_clz(int rs,int rt)
2030{
2031 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2032 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2033}
2034
2035void emit_subcs(int rs1,int rs2,int rt)
2036{
2037 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2038 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2039}
2040
2041void emit_shrcc_imm(int rs,u_int imm,int rt)
2042{
2043 assert(imm>0);
2044 assert(imm<32);
2045 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2046 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2047}
2048
2049void emit_negmi(int rs, int rt)
2050{
2051 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2052 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2053}
2054
2055void emit_negsmi(int rs, int rt)
2056{
2057 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2058 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2059}
2060
2061void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2062{
2063 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2064 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2065}
2066
2067void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2068{
2069 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2070 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2071}
2072
2073void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2074{
2075 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2076 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2077}
2078
2079void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2080{
2081 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2082 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2083}
2084
2085void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2086{
2087 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2088 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2089}
2090
2091void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2092{
2093 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2094 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2095}
2096
2097void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2098{
2099 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2100 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2101}
2102
2103void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2104{
2105 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2106 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2107}
2108
2109void emit_teq(int rs, int rt)
2110{
2111 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2112 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2113}
2114
2115void emit_rsbimm(int rs, int imm, int rt)
2116{
2117 u_int armval;
cfbd3c6e 2118 genimm_checked(imm,&armval);
57871462 2119 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2120 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2121}
2122
2123// Load 2 immediates optimizing for small code size
2124void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2125{
2126 emit_movimm(imm1,rt1);
2127 u_int armval;
2128 if(genimm(imm2-imm1,&armval)) {
2129 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2130 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2131 }else if(genimm(imm1-imm2,&armval)) {
2132 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2133 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2134 }
2135 else emit_movimm(imm2,rt2);
2136}
2137
2138// Conditionally select one of two immediates, optimizing for small code size
2139// This will only be called if HAVE_CMOV_IMM is defined
2140void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2141{
2142 u_int armval;
2143 if(genimm(imm2-imm1,&armval)) {
2144 emit_movimm(imm1,rt);
2145 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2146 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2147 }else if(genimm(imm1-imm2,&armval)) {
2148 emit_movimm(imm1,rt);
2149 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2150 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2151 }
2152 else {
2153 #ifdef ARMv5_ONLY
2154 emit_movimm(imm1,rt);
2155 add_literal((int)out,imm2);
2156 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2157 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2158 #else
2159 emit_movw(imm1&0x0000FFFF,rt);
2160 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2161 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2162 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2163 }
2164 emit_movt(imm1&0xFFFF0000,rt);
2165 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2166 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2167 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2168 }
2169 #endif
2170 }
2171}
2172
2173// special case for checking invalid_code
2174void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2175{
2176 assert(0);
2177}
2178
2179// special case for checking invalid_code
2180void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2181{
2182 assert(imm<128&&imm>=0);
2183 assert(r>=0&&r<16);
2184 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2185 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2186 emit_cmpimm(HOST_TEMPREG,imm);
2187}
2188
2189// special case for tlb mapping
2190void emit_addsr12(int rs1,int rs2,int rt)
2191{
2192 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2193 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2194}
2195
2196// Used to preload hash table entries
2197void emit_prefetch(void *addr)
2198{
2199 assem_debug("prefetch %x\n",(int)addr);
2200 output_byte(0x0F);
2201 output_byte(0x18);
2202 output_modrm(0,5,1);
2203 output_w32((int)addr);
2204}
2205void emit_prefetchreg(int r)
2206{
2207 assem_debug("pld %s\n",regname[r]);
2208 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2209}
2210
2211// Special case for mini_ht
2212void emit_ldreq_indexed(int rs, u_int offset, int rt)
2213{
2214 assert(offset<4096);
2215 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2216 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2217}
2218
2219void emit_flds(int r,int sr)
2220{
2221 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2222 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2223}
2224
2225void emit_vldr(int r,int vr)
2226{
2227 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2228 output_w32(0xed900b00|(vr<<12)|(r<<16));
2229}
2230
2231void emit_fsts(int sr,int r)
2232{
2233 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2234 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2235}
2236
2237void emit_vstr(int vr,int r)
2238{
2239 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2240 output_w32(0xed800b00|(vr<<12)|(r<<16));
2241}
2242
2243void emit_ftosizs(int s,int d)
2244{
2245 assem_debug("ftosizs s%d,s%d\n",d,s);
2246 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2247}
2248
2249void emit_ftosizd(int s,int d)
2250{
2251 assem_debug("ftosizd s%d,d%d\n",d,s);
2252 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2253}
2254
2255void emit_fsitos(int s,int d)
2256{
2257 assem_debug("fsitos s%d,s%d\n",d,s);
2258 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2259}
2260
2261void emit_fsitod(int s,int d)
2262{
2263 assem_debug("fsitod d%d,s%d\n",d,s);
2264 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2265}
2266
2267void emit_fcvtds(int s,int d)
2268{
2269 assem_debug("fcvtds d%d,s%d\n",d,s);
2270 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2271}
2272
2273void emit_fcvtsd(int s,int d)
2274{
2275 assem_debug("fcvtsd s%d,d%d\n",d,s);
2276 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2277}
2278
2279void emit_fsqrts(int s,int d)
2280{
2281 assem_debug("fsqrts d%d,s%d\n",d,s);
2282 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2283}
2284
2285void emit_fsqrtd(int s,int d)
2286{
2287 assem_debug("fsqrtd s%d,d%d\n",d,s);
2288 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2289}
2290
2291void emit_fabss(int s,int d)
2292{
2293 assem_debug("fabss d%d,s%d\n",d,s);
2294 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2295}
2296
2297void emit_fabsd(int s,int d)
2298{
2299 assem_debug("fabsd s%d,d%d\n",d,s);
2300 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2301}
2302
2303void emit_fnegs(int s,int d)
2304{
2305 assem_debug("fnegs d%d,s%d\n",d,s);
2306 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2307}
2308
2309void emit_fnegd(int s,int d)
2310{
2311 assem_debug("fnegd s%d,d%d\n",d,s);
2312 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2313}
2314
2315void emit_fadds(int s1,int s2,int d)
2316{
2317 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2318 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2319}
2320
2321void emit_faddd(int s1,int s2,int d)
2322{
2323 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2324 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2325}
2326
2327void emit_fsubs(int s1,int s2,int d)
2328{
2329 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2330 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2331}
2332
2333void emit_fsubd(int s1,int s2,int d)
2334{
2335 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2336 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2337}
2338
2339void emit_fmuls(int s1,int s2,int d)
2340{
2341 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2342 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2343}
2344
2345void emit_fmuld(int s1,int s2,int d)
2346{
2347 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2348 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2349}
2350
2351void emit_fdivs(int s1,int s2,int d)
2352{
2353 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2354 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2355}
2356
2357void emit_fdivd(int s1,int s2,int d)
2358{
2359 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2360 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2361}
2362
2363void emit_fcmps(int x,int y)
2364{
2365 assem_debug("fcmps s14, s15\n");
2366 output_w32(0xeeb47a67);
2367}
2368
2369void emit_fcmpd(int x,int y)
2370{
2371 assem_debug("fcmpd d6, d7\n");
2372 output_w32(0xeeb46b47);
2373}
2374
2375void emit_fmstat()
2376{
2377 assem_debug("fmstat\n");
2378 output_w32(0xeef1fa10);
2379}
2380
2381void emit_bicne_imm(int rs,int imm,int rt)
2382{
2383 u_int armval;
cfbd3c6e 2384 genimm_checked(imm,&armval);
57871462 2385 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2386 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2387}
2388
2389void emit_biccs_imm(int rs,int imm,int rt)
2390{
2391 u_int armval;
cfbd3c6e 2392 genimm_checked(imm,&armval);
57871462 2393 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2394 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2395}
2396
2397void emit_bicvc_imm(int rs,int imm,int rt)
2398{
2399 u_int armval;
cfbd3c6e 2400 genimm_checked(imm,&armval);
57871462 2401 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2402 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2403}
2404
2405void emit_bichi_imm(int rs,int imm,int rt)
2406{
2407 u_int armval;
cfbd3c6e 2408 genimm_checked(imm,&armval);
57871462 2409 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2410 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2411}
2412
2413void emit_orrvs_imm(int rs,int imm,int rt)
2414{
2415 u_int armval;
cfbd3c6e 2416 genimm_checked(imm,&armval);
57871462 2417 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2418 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2419}
2420
b9b61529 2421void emit_orrne_imm(int rs,int imm,int rt)
2422{
2423 u_int armval;
cfbd3c6e 2424 genimm_checked(imm,&armval);
b9b61529 2425 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2426 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2427}
2428
2429void emit_andne_imm(int rs,int imm,int rt)
2430{
2431 u_int armval;
cfbd3c6e 2432 genimm_checked(imm,&armval);
b9b61529 2433 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2434 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2435}
2436
57871462 2437void emit_jno_unlikely(int a)
2438{
2439 //emit_jno(a);
2440 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2441 output_w32(0x72800000|rd_rn_rm(15,15,0));
2442}
2443
2444// Save registers before function call
2445void save_regs(u_int reglist)
2446{
2447 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2448 if(!reglist) return;
2449 assem_debug("stmia fp,{");
2450 if(reglist&1) assem_debug("r0, ");
2451 if(reglist&2) assem_debug("r1, ");
2452 if(reglist&4) assem_debug("r2, ");
2453 if(reglist&8) assem_debug("r3, ");
2454 if(reglist&0x1000) assem_debug("r12");
2455 assem_debug("}\n");
2456 output_w32(0xe88b0000|reglist);
2457}
2458// Restore registers after function call
2459void restore_regs(u_int reglist)
2460{
2461 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2462 if(!reglist) return;
2463 assem_debug("ldmia fp,{");
2464 if(reglist&1) assem_debug("r0, ");
2465 if(reglist&2) assem_debug("r1, ");
2466 if(reglist&4) assem_debug("r2, ");
2467 if(reglist&8) assem_debug("r3, ");
2468 if(reglist&0x1000) assem_debug("r12");
2469 assem_debug("}\n");
2470 output_w32(0xe89b0000|reglist);
2471}
2472
2473// Write back consts using r14 so we don't disturb the other registers
2474void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2475{
2476 int hr;
2477 for(hr=0;hr<HOST_REGS;hr++) {
2478 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2479 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2480 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2481 int value=constmap[i][hr];
2482 if(value==0) {
2483 emit_zeroreg(HOST_TEMPREG);
2484 }
2485 else {
2486 emit_movimm(value,HOST_TEMPREG);
2487 }
2488 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2489#ifndef FORCE32
57871462 2490 if((i_is32>>i_regmap[hr])&1) {
2491 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2492 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2493 }
24385cae 2494#endif
57871462 2495 }
2496 }
2497 }
2498 }
2499}
2500
2501/* Stubs/epilogue */
2502
2503void literal_pool(int n)
2504{
2505 if(!literalcount) return;
2506 if(n) {
2507 if((int)out-literals[0][0]<4096-n) return;
2508 }
2509 u_int *ptr;
2510 int i;
2511 for(i=0;i<literalcount;i++)
2512 {
2513 ptr=(u_int *)literals[i][0];
2514 u_int offset=(u_int)out-(u_int)ptr-8;
2515 assert(offset<4096);
2516 assert(!(offset&3));
2517 *ptr|=offset;
2518 output_w32(literals[i][1]);
2519 }
2520 literalcount=0;
2521}
2522
2523void literal_pool_jumpover(int n)
2524{
2525 if(!literalcount) return;
2526 if(n) {
2527 if((int)out-literals[0][0]<4096-n) return;
2528 }
2529 int jaddr=(int)out;
2530 emit_jmp(0);
2531 literal_pool(0);
2532 set_jump_target(jaddr,(int)out);
2533}
2534
2535emit_extjump2(int addr, int target, int linker)
2536{
2537 u_char *ptr=(u_char *)addr;
2538 assert((ptr[3]&0x0e)==0xa);
2539 emit_loadlp(target,0);
2540 emit_loadlp(addr,1);
24385cae 2541 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2542 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2543//DEBUG >
2544#ifdef DEBUG_CYCLE_COUNT
2545 emit_readword((int)&last_count,ECX);
2546 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2547 emit_readword((int)&next_interupt,ECX);
2548 emit_writeword(HOST_CCREG,(int)&Count);
2549 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2550 emit_writeword(ECX,(int)&last_count);
2551#endif
2552//DEBUG <
2553 emit_jmp(linker);
2554}
2555
2556emit_extjump(int addr, int target)
2557{
2558 emit_extjump2(addr, target, (int)dyna_linker);
2559}
2560emit_extjump_ds(int addr, int target)
2561{
2562 emit_extjump2(addr, target, (int)dyna_linker_ds);
2563}
2564
2565do_readstub(int n)
2566{
2567 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2568 literal_pool(256);
2569 set_jump_target(stubs[n][1],(int)out);
2570 int type=stubs[n][0];
2571 int i=stubs[n][3];
2572 int rs=stubs[n][4];
2573 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2574 u_int reglist=stubs[n][7];
2575 signed char *i_regmap=i_regs->regmap;
2576 int addr=get_reg(i_regmap,AGEN1+(i&1));
2577 int rth,rt;
2578 int ds;
b9b61529 2579 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2580 rth=get_reg(i_regmap,FTEMP|64);
2581 rt=get_reg(i_regmap,FTEMP);
2582 }else{
2583 rth=get_reg(i_regmap,rt1[i]|64);
2584 rt=get_reg(i_regmap,rt1[i]);
2585 }
2586 assert(rs>=0);
57871462 2587 if(addr<0) addr=rt;
535d208a 2588 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2589 assert(addr>=0);
2590 int ftable=0;
2591 if(type==LOADB_STUB||type==LOADBU_STUB)
2592 ftable=(int)readmemb;
2593 if(type==LOADH_STUB||type==LOADHU_STUB)
2594 ftable=(int)readmemh;
2595 if(type==LOADW_STUB)
2596 ftable=(int)readmem;
24385cae 2597#ifndef FORCE32
57871462 2598 if(type==LOADD_STUB)
2599 ftable=(int)readmemd;
24385cae 2600#endif
2601 assert(ftable!=0);
57871462 2602 emit_writeword(rs,(int)&address);
2603 //emit_pusha();
2604 save_regs(reglist);
2605 ds=i_regs!=&regs[i];
2606 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2607 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2608 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2609 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2610 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2611 emit_shrimm(rs,16,1);
2612 int cc=get_reg(i_regmap,CCREG);
2613 if(cc<0) {
2614 emit_loadreg(CCREG,2);
2615 }
2616 emit_movimm(ftable,0);
2617 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2618 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2619 //emit_readword((int)&last_count,temp);
2620 //emit_add(cc,temp,cc);
2621 //emit_writeword(cc,(int)&Count);
2622 //emit_mov(15,14);
2623 emit_call((int)&indirect_jump_indexed);
2624 //emit_callreg(rs);
2625 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2626 // We really shouldn't need to update the count here,
2627 // but not doing so causes random crashes...
2628 emit_readword((int)&Count,HOST_TEMPREG);
2629 emit_readword((int)&next_interupt,2);
2630 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2631 emit_writeword(2,(int)&last_count);
2632 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2633 if(cc<0) {
2634 emit_storereg(CCREG,HOST_TEMPREG);
2635 }
2636 //emit_popa();
2637 restore_regs(reglist);
2638 //if((cc=get_reg(regmap,CCREG))>=0) {
2639 // emit_loadreg(CCREG,cc);
2640 //}
f18c0f46 2641 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2642 assert(rt>=0);
2643 if(type==LOADB_STUB)
2644 emit_movsbl((int)&readmem_dword,rt);
2645 if(type==LOADBU_STUB)
2646 emit_movzbl((int)&readmem_dword,rt);
2647 if(type==LOADH_STUB)
2648 emit_movswl((int)&readmem_dword,rt);
2649 if(type==LOADHU_STUB)
2650 emit_movzwl((int)&readmem_dword,rt);
2651 if(type==LOADW_STUB)
2652 emit_readword((int)&readmem_dword,rt);
2653 if(type==LOADD_STUB) {
2654 emit_readword((int)&readmem_dword,rt);
2655 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2656 }
57871462 2657 }
2658 emit_jmp(stubs[n][2]); // return address
2659}
2660
2661inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2662{
2663 int rs=get_reg(regmap,target);
2664 int rth=get_reg(regmap,target|64);
2665 int rt=get_reg(regmap,target);
535d208a 2666 if(rs<0) rs=get_reg(regmap,-1);
57871462 2667 assert(rs>=0);
57871462 2668 int ftable=0;
2669 if(type==LOADB_STUB||type==LOADBU_STUB)
2670 ftable=(int)readmemb;
2671 if(type==LOADH_STUB||type==LOADHU_STUB)
2672 ftable=(int)readmemh;
2673 if(type==LOADW_STUB)
2674 ftable=(int)readmem;
24385cae 2675#ifndef FORCE32
57871462 2676 if(type==LOADD_STUB)
2677 ftable=(int)readmemd;
24385cae 2678#endif
2679 assert(ftable!=0);
fd99c415 2680 if(target==0)
2681 emit_movimm(addr,rs);
57871462 2682 emit_writeword(rs,(int)&address);
2683 //emit_pusha();
2684 save_regs(reglist);
2685 //emit_shrimm(rs,16,1);
2686 int cc=get_reg(regmap,CCREG);
2687 if(cc<0) {
2688 emit_loadreg(CCREG,2);
2689 }
2690 //emit_movimm(ftable,0);
2691 emit_movimm(((u_int *)ftable)[addr>>16],0);
2692 //emit_readword((int)&last_count,12);
2693 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2694 if((signed int)addr>=(signed int)0xC0000000) {
2695 // Pagefault address
2696 int ds=regmap!=regs[i].regmap;
2697 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2698 }
2699 //emit_add(12,2,2);
2700 //emit_writeword(2,(int)&Count);
2701 //emit_call(((u_int *)ftable)[addr>>16]);
2702 emit_call((int)&indirect_jump);
2703 // We really shouldn't need to update the count here,
2704 // but not doing so causes random crashes...
2705 emit_readword((int)&Count,HOST_TEMPREG);
2706 emit_readword((int)&next_interupt,2);
2707 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2708 emit_writeword(2,(int)&last_count);
2709 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2710 if(cc<0) {
2711 emit_storereg(CCREG,HOST_TEMPREG);
2712 }
2713 //emit_popa();
2714 restore_regs(reglist);
fd99c415 2715 if(rt>=0) {
2716 if(type==LOADB_STUB)
2717 emit_movsbl((int)&readmem_dword,rt);
2718 if(type==LOADBU_STUB)
2719 emit_movzbl((int)&readmem_dword,rt);
2720 if(type==LOADH_STUB)
2721 emit_movswl((int)&readmem_dword,rt);
2722 if(type==LOADHU_STUB)
2723 emit_movzwl((int)&readmem_dword,rt);
2724 if(type==LOADW_STUB)
2725 emit_readword((int)&readmem_dword,rt);
2726 if(type==LOADD_STUB) {
2727 emit_readword((int)&readmem_dword,rt);
2728 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2729 }
57871462 2730 }
2731}
2732
2733do_writestub(int n)
2734{
2735 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2736 literal_pool(256);
2737 set_jump_target(stubs[n][1],(int)out);
2738 int type=stubs[n][0];
2739 int i=stubs[n][3];
2740 int rs=stubs[n][4];
2741 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2742 u_int reglist=stubs[n][7];
2743 signed char *i_regmap=i_regs->regmap;
2744 int addr=get_reg(i_regmap,AGEN1+(i&1));
2745 int rth,rt,r;
2746 int ds;
b9b61529 2747 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2748 rth=get_reg(i_regmap,FTEMP|64);
2749 rt=get_reg(i_regmap,r=FTEMP);
2750 }else{
2751 rth=get_reg(i_regmap,rs2[i]|64);
2752 rt=get_reg(i_regmap,r=rs2[i]);
2753 }
2754 assert(rs>=0);
2755 assert(rt>=0);
2756 if(addr<0) addr=get_reg(i_regmap,-1);
2757 assert(addr>=0);
2758 int ftable=0;
2759 if(type==STOREB_STUB)
2760 ftable=(int)writememb;
2761 if(type==STOREH_STUB)
2762 ftable=(int)writememh;
2763 if(type==STOREW_STUB)
2764 ftable=(int)writemem;
24385cae 2765#ifndef FORCE32
57871462 2766 if(type==STORED_STUB)
2767 ftable=(int)writememd;
24385cae 2768#endif
2769 assert(ftable!=0);
57871462 2770 emit_writeword(rs,(int)&address);
2771 //emit_shrimm(rs,16,rs);
2772 //emit_movmem_indexedx4(ftable,rs,rs);
2773 if(type==STOREB_STUB)
2774 emit_writebyte(rt,(int)&byte);
2775 if(type==STOREH_STUB)
2776 emit_writehword(rt,(int)&hword);
2777 if(type==STOREW_STUB)
2778 emit_writeword(rt,(int)&word);
2779 if(type==STORED_STUB) {
3d624f89 2780#ifndef FORCE32
57871462 2781 emit_writeword(rt,(int)&dword);
2782 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2783#else
2784 printf("STORED_STUB\n");
2785#endif
57871462 2786 }
2787 //emit_pusha();
2788 save_regs(reglist);
2789 ds=i_regs!=&regs[i];
2790 int real_rs=get_reg(i_regmap,rs1[i]);
2791 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2792 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2793 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2794 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2795 emit_shrimm(rs,16,1);
2796 int cc=get_reg(i_regmap,CCREG);
2797 if(cc<0) {
2798 emit_loadreg(CCREG,2);
2799 }
2800 emit_movimm(ftable,0);
2801 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2802 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2803 //emit_readword((int)&last_count,temp);
2804 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2805 //emit_add(cc,temp,cc);
2806 //emit_writeword(cc,(int)&Count);
2807 emit_call((int)&indirect_jump_indexed);
2808 //emit_callreg(rs);
2809 emit_readword((int)&Count,HOST_TEMPREG);
2810 emit_readword((int)&next_interupt,2);
2811 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2812 emit_writeword(2,(int)&last_count);
2813 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2814 if(cc<0) {
2815 emit_storereg(CCREG,HOST_TEMPREG);
2816 }
2817 //emit_popa();
2818 restore_regs(reglist);
2819 //if((cc=get_reg(regmap,CCREG))>=0) {
2820 // emit_loadreg(CCREG,cc);
2821 //}
2822 emit_jmp(stubs[n][2]); // return address
2823}
2824
2825inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2826{
2827 int rs=get_reg(regmap,-1);
2828 int rth=get_reg(regmap,target|64);
2829 int rt=get_reg(regmap,target);
2830 assert(rs>=0);
2831 assert(rt>=0);
2832 int ftable=0;
2833 if(type==STOREB_STUB)
2834 ftable=(int)writememb;
2835 if(type==STOREH_STUB)
2836 ftable=(int)writememh;
2837 if(type==STOREW_STUB)
2838 ftable=(int)writemem;
24385cae 2839#ifndef FORCE32
57871462 2840 if(type==STORED_STUB)
2841 ftable=(int)writememd;
24385cae 2842#endif
2843 assert(ftable!=0);
57871462 2844 emit_writeword(rs,(int)&address);
2845 //emit_shrimm(rs,16,rs);
2846 //emit_movmem_indexedx4(ftable,rs,rs);
2847 if(type==STOREB_STUB)
2848 emit_writebyte(rt,(int)&byte);
2849 if(type==STOREH_STUB)
2850 emit_writehword(rt,(int)&hword);
2851 if(type==STOREW_STUB)
2852 emit_writeword(rt,(int)&word);
2853 if(type==STORED_STUB) {
3d624f89 2854#ifndef FORCE32
57871462 2855 emit_writeword(rt,(int)&dword);
2856 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2857#else
2858 printf("STORED_STUB\n");
2859#endif
57871462 2860 }
2861 //emit_pusha();
2862 save_regs(reglist);
2863 //emit_shrimm(rs,16,1);
2864 int cc=get_reg(regmap,CCREG);
2865 if(cc<0) {
2866 emit_loadreg(CCREG,2);
2867 }
2868 //emit_movimm(ftable,0);
2869 emit_movimm(((u_int *)ftable)[addr>>16],0);
2870 //emit_readword((int)&last_count,12);
2871 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2872 if((signed int)addr>=(signed int)0xC0000000) {
2873 // Pagefault address
2874 int ds=regmap!=regs[i].regmap;
2875 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2876 }
2877 //emit_add(12,2,2);
2878 //emit_writeword(2,(int)&Count);
2879 //emit_call(((u_int *)ftable)[addr>>16]);
2880 emit_call((int)&indirect_jump);
2881 emit_readword((int)&Count,HOST_TEMPREG);
2882 emit_readword((int)&next_interupt,2);
2883 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2884 emit_writeword(2,(int)&last_count);
2885 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2886 if(cc<0) {
2887 emit_storereg(CCREG,HOST_TEMPREG);
2888 }
2889 //emit_popa();
2890 restore_regs(reglist);
2891}
2892
2893do_unalignedwritestub(int n)
2894{
b7918751 2895 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2896 literal_pool(256);
57871462 2897 set_jump_target(stubs[n][1],(int)out);
b7918751 2898
2899 int i=stubs[n][3];
2900 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2901 int addr=stubs[n][5];
2902 u_int reglist=stubs[n][7];
2903 signed char *i_regmap=i_regs->regmap;
2904 int temp2=get_reg(i_regmap,FTEMP);
2905 int rt;
2906 int ds, real_rs;
2907 rt=get_reg(i_regmap,rs2[i]);
2908 assert(rt>=0);
2909 assert(addr>=0);
2910 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2911 reglist|=(1<<addr);
2912 reglist&=~(1<<temp2);
2913
2914 emit_andimm(addr,0xfffffffc,temp2);
2915 emit_writeword(temp2,(int)&address);
2916
2917 save_regs(reglist);
2918 ds=i_regs!=&regs[i];
2919 real_rs=get_reg(i_regmap,rs1[i]);
2920 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2921 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2922 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2923 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2924 emit_shrimm(addr,16,1);
2925 int cc=get_reg(i_regmap,CCREG);
2926 if(cc<0) {
2927 emit_loadreg(CCREG,2);
2928 }
2929 emit_movimm((u_int)readmem,0);
2930 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2931 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3); // XXX: can be rm'd?
2932 emit_call((int)&indirect_jump_indexed);
2933 restore_regs(reglist);
2934
2935 emit_readword((int)&readmem_dword,temp2);
2936 int temp=addr; //hmh
2937 emit_shlimm(addr,3,temp);
2938 emit_andimm(temp,24,temp);
2939#ifdef BIG_ENDIAN_MIPS
2940 if (opcode[i]==0x2e) // SWR
2941#else
2942 if (opcode[i]==0x2a) // SWL
2943#endif
2944 emit_xorimm(temp,24,temp);
2945 emit_movimm(-1,HOST_TEMPREG);
55439448 2946 if (opcode[i]==0x2a) { // SWL
b7918751 2947 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2948 emit_orrshr(rt,temp,temp2);
2949 }else{
2950 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2951 emit_orrshl(rt,temp,temp2);
2952 }
2953 emit_readword((int)&address,addr);
2954 emit_writeword(temp2,(int)&word);
2955 //save_regs(reglist); // don't need to, no state changes
2956 emit_shrimm(addr,16,1);
2957 emit_movimm((u_int)writemem,0);
2958 //emit_call((int)&indirect_jump_indexed);
2959 emit_mov(15,14);
2960 emit_readword_dualindexedx4(0,1,15);
2961 emit_readword((int)&Count,HOST_TEMPREG);
2962 emit_readword((int)&next_interupt,2);
2963 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2964 emit_writeword(2,(int)&last_count);
2965 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2966 if(cc<0) {
2967 emit_storereg(CCREG,HOST_TEMPREG);
2968 }
2969 restore_regs(reglist);
57871462 2970 emit_jmp(stubs[n][2]); // return address
2971}
2972
2973void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2974{
2975 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2976}
2977
2978do_invstub(int n)
2979{
2980 literal_pool(20);
2981 u_int reglist=stubs[n][3];
2982 set_jump_target(stubs[n][1],(int)out);
2983 save_regs(reglist);
2984 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2985 emit_call((int)&invalidate_addr);
2986 restore_regs(reglist);
2987 emit_jmp(stubs[n][2]); // return address
2988}
2989
2990int do_dirty_stub(int i)
2991{
2992 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 2993 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
2994 #ifdef PCSX
2995 addr=(u_int)source;
2996 #endif
57871462 2997 // Careful about the code output here, verify_dirty needs to parse it.
2998 #ifdef ARMv5_ONLY
ac545b3a 2999 emit_loadlp(addr,1);
57871462 3000 emit_loadlp((int)copy,2);
3001 emit_loadlp(slen*4,3);
3002 #else
ac545b3a 3003 emit_movw(addr&0x0000FFFF,1);
57871462 3004 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3005 emit_movt(addr&0xFFFF0000,1);
57871462 3006 emit_movt(((u_int)copy)&0xFFFF0000,2);
3007 emit_movw(slen*4,3);
3008 #endif
3009 emit_movimm(start+i*4,0);
3010 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3011 int entry=(int)out;
3012 load_regs_entry(i);
3013 if(entry==(int)out) entry=instr_addr[i];
3014 emit_jmp(instr_addr[i]);
3015 return entry;
3016}
3017
3018void do_dirty_stub_ds()
3019{
3020 // Careful about the code output here, verify_dirty needs to parse it.
3021 #ifdef ARMv5_ONLY
3022 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3023 emit_loadlp((int)copy,2);
3024 emit_loadlp(slen*4,3);
3025 #else
3026 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3027 emit_movw(((u_int)copy)&0x0000FFFF,2);
3028 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3029 emit_movt(((u_int)copy)&0xFFFF0000,2);
3030 emit_movw(slen*4,3);
3031 #endif
3032 emit_movimm(start+1,0);
3033 emit_call((int)&verify_code_ds);
3034}
3035
3036do_cop1stub(int n)
3037{
3038 literal_pool(256);
3039 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3040 set_jump_target(stubs[n][1],(int)out);
3041 int i=stubs[n][3];
3d624f89 3042// int rs=stubs[n][4];
57871462 3043 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3044 int ds=stubs[n][6];
3045 if(!ds) {
3046 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3047 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3048 }
3049 //else {printf("fp exception in delay slot\n");}
3050 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3051 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3052 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3053 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3054 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3055}
3056
3057/* TLB */
3058
3059int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3060{
3061 if(c) {
3062 if((signed int)addr>=(signed int)0xC0000000) {
3063 // address_generation already loaded the const
3064 emit_readword_dualindexedx4(FP,map,map);
3065 }
3066 else
3067 return -1; // No mapping
3068 }
3069 else {
3070 assert(s!=map);
3071 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3072 emit_addsr12(map,s,map);
3073 // Schedule this while we wait on the load
3074 //if(x) emit_xorimm(s,x,ar);
3075 if(shift>=0) emit_shlimm(s,3,shift);
3076 if(~a) emit_andimm(s,a,ar);
3077 emit_readword_dualindexedx4(FP,map,map);
3078 }
3079 return map;
3080}
3081int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3082{
3083 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3084 emit_test(map,map);
3085 *jaddr=(int)out;
3086 emit_js(0);
3087 }
3088 return map;
3089}
3090
3091int gen_tlb_addr_r(int ar, int map) {
3092 if(map>=0) {
3093 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3094 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3095 }
3096}
3097
3098int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3099{
3100 if(c) {
3101 if(addr<0x80800000||addr>=0xC0000000) {
3102 // address_generation already loaded the const
3103 emit_readword_dualindexedx4(FP,map,map);
3104 }
3105 else
3106 return -1; // No mapping
3107 }
3108 else {
3109 assert(s!=map);
3110 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3111 emit_addsr12(map,s,map);
3112 // Schedule this while we wait on the load
3113 //if(x) emit_xorimm(s,x,ar);
3114 emit_readword_dualindexedx4(FP,map,map);
3115 }
3116 return map;
3117}
3118int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3119{
3120 if(!c||addr<0x80800000||addr>=0xC0000000) {
3121 emit_testimm(map,0x40000000);
3122 *jaddr=(int)out;
3123 emit_jne(0);
3124 }
3125}
3126
3127int gen_tlb_addr_w(int ar, int map) {
3128 if(map>=0) {
3129 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3130 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3131 }
3132}
3133
3134// Generate the address of the memory_map entry, relative to dynarec_local
3135generate_map_const(u_int addr,int reg) {
3136 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3137 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3138}
3139
3140/* Special assem */
3141
3142void shift_assemble_arm(int i,struct regstat *i_regs)
3143{
3144 if(rt1[i]) {
3145 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3146 {
3147 signed char s,t,shift;
3148 t=get_reg(i_regs->regmap,rt1[i]);
3149 s=get_reg(i_regs->regmap,rs1[i]);
3150 shift=get_reg(i_regs->regmap,rs2[i]);
3151 if(t>=0){
3152 if(rs1[i]==0)
3153 {
3154 emit_zeroreg(t);
3155 }
3156 else if(rs2[i]==0)
3157 {
3158 assert(s>=0);
3159 if(s!=t) emit_mov(s,t);
3160 }
3161 else
3162 {
3163 emit_andimm(shift,31,HOST_TEMPREG);
3164 if(opcode2[i]==4) // SLLV
3165 {
3166 emit_shl(s,HOST_TEMPREG,t);
3167 }
3168 if(opcode2[i]==6) // SRLV
3169 {
3170 emit_shr(s,HOST_TEMPREG,t);
3171 }
3172 if(opcode2[i]==7) // SRAV
3173 {
3174 emit_sar(s,HOST_TEMPREG,t);
3175 }
3176 }
3177 }
3178 } else { // DSLLV/DSRLV/DSRAV
3179 signed char sh,sl,th,tl,shift;
3180 th=get_reg(i_regs->regmap,rt1[i]|64);
3181 tl=get_reg(i_regs->regmap,rt1[i]);
3182 sh=get_reg(i_regs->regmap,rs1[i]|64);
3183 sl=get_reg(i_regs->regmap,rs1[i]);
3184 shift=get_reg(i_regs->regmap,rs2[i]);
3185 if(tl>=0){
3186 if(rs1[i]==0)
3187 {
3188 emit_zeroreg(tl);
3189 if(th>=0) emit_zeroreg(th);
3190 }
3191 else if(rs2[i]==0)
3192 {
3193 assert(sl>=0);
3194 if(sl!=tl) emit_mov(sl,tl);
3195 if(th>=0&&sh!=th) emit_mov(sh,th);
3196 }
3197 else
3198 {
3199 // FIXME: What if shift==tl ?
3200 assert(shift!=tl);
3201 int temp=get_reg(i_regs->regmap,-1);
3202 int real_th=th;
3203 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3204 assert(sl>=0);
3205 assert(sh>=0);
3206 emit_andimm(shift,31,HOST_TEMPREG);
3207 if(opcode2[i]==0x14) // DSLLV
3208 {
3209 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3210 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3211 emit_orrshr(sl,HOST_TEMPREG,th);
3212 emit_andimm(shift,31,HOST_TEMPREG);
3213 emit_testimm(shift,32);
3214 emit_shl(sl,HOST_TEMPREG,tl);
3215 if(th>=0) emit_cmovne_reg(tl,th);
3216 emit_cmovne_imm(0,tl);
3217 }
3218 if(opcode2[i]==0x16) // DSRLV
3219 {
3220 assert(th>=0);
3221 emit_shr(sl,HOST_TEMPREG,tl);
3222 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3223 emit_orrshl(sh,HOST_TEMPREG,tl);
3224 emit_andimm(shift,31,HOST_TEMPREG);
3225 emit_testimm(shift,32);
3226 emit_shr(sh,HOST_TEMPREG,th);
3227 emit_cmovne_reg(th,tl);
3228 if(real_th>=0) emit_cmovne_imm(0,th);
3229 }
3230 if(opcode2[i]==0x17) // DSRAV
3231 {
3232 assert(th>=0);
3233 emit_shr(sl,HOST_TEMPREG,tl);
3234 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3235 if(real_th>=0) {
3236 assert(temp>=0);
3237 emit_sarimm(th,31,temp);
3238 }
3239 emit_orrshl(sh,HOST_TEMPREG,tl);
3240 emit_andimm(shift,31,HOST_TEMPREG);
3241 emit_testimm(shift,32);
3242 emit_sar(sh,HOST_TEMPREG,th);
3243 emit_cmovne_reg(th,tl);
3244 if(real_th>=0) emit_cmovne_reg(temp,th);
3245 }
3246 }
3247 }
3248 }
3249 }
3250}
3251#define shift_assemble shift_assemble_arm
3252
3253void loadlr_assemble_arm(int i,struct regstat *i_regs)
3254{
3255 int s,th,tl,temp,temp2,addr,map=-1;
3256 int offset;
3257 int jaddr=0;
3258 int memtarget,c=0;
3259 u_int hr,reglist=0;
3260 th=get_reg(i_regs->regmap,rt1[i]|64);
3261 tl=get_reg(i_regs->regmap,rt1[i]);
3262 s=get_reg(i_regs->regmap,rs1[i]);
3263 temp=get_reg(i_regs->regmap,-1);
3264 temp2=get_reg(i_regs->regmap,FTEMP);
3265 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3266 assert(addr<0);
3267 offset=imm[i];
3268 for(hr=0;hr<HOST_REGS;hr++) {
3269 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3270 }
3271 reglist|=1<<temp;
3272 if(offset||s<0||c) addr=temp2;
3273 else addr=s;
3274 if(s>=0) {
3275 c=(i_regs->wasconst>>s)&1;
4cb76aa4 3276 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
57871462 3277 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3278 }
535d208a 3279 if(!using_tlb) {
3280 if(!c) {
3281 #ifdef RAM_OFFSET
3282 map=get_reg(i_regs->regmap,ROREG);
3283 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3284 #endif
3285 emit_shlimm(addr,3,temp);
3286 if (opcode[i]==0x22||opcode[i]==0x26) {
3287 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 3288 }else{
535d208a 3289 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 3290 }
535d208a 3291 emit_cmpimm(addr,RAM_SIZE);
3292 jaddr=(int)out;
3293 emit_jno(0);
3294 }
3295 else {
3296 if (opcode[i]==0x22||opcode[i]==0x26) {
3297 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3298 }else{
3299 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3300 }
57871462 3301 }
535d208a 3302 }else{ // using tlb
3303 int a;
3304 if(c) {
3305 a=-1;
3306 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3307 a=0xFFFFFFFC; // LWL/LWR
3308 }else{
3309 a=0xFFFFFFF8; // LDL/LDR
3310 }
3311 map=get_reg(i_regs->regmap,TLREG);
3312 assert(map>=0);
3313 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3314 if(c) {
3315 if (opcode[i]==0x22||opcode[i]==0x26) {
3316 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3317 }else{
3318 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3319 }
535d208a 3320 }
3321 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3322 }
3323 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3324 if(!c||memtarget) {
3325 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3326 emit_readword_indexed_tlb(0,temp2,map,temp2);
3327 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3328 }
3329 else
3330 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3331 if(rt1[i]) {
3332 assert(tl>=0);
57871462 3333 emit_andimm(temp,24,temp);
2002a1db 3334#ifdef BIG_ENDIAN_MIPS
3335 if (opcode[i]==0x26) // LWR
3336#else
3337 if (opcode[i]==0x22) // LWL
3338#endif
3339 emit_xorimm(temp,24,temp);
57871462 3340 emit_movimm(-1,HOST_TEMPREG);
3341 if (opcode[i]==0x26) {
3342 emit_shr(temp2,temp,temp2);
3343 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3344 }else{
3345 emit_shl(temp2,temp,temp2);
3346 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3347 }
3348 emit_or(temp2,tl,tl);
57871462 3349 }
535d208a 3350 //emit_storereg(rt1[i],tl); // DEBUG
3351 }
3352 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3353 // FIXME: little endian
3354 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3355 if(!c||memtarget) {
3356 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3357 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3358 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3359 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3360 }
3361 else
3362 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3363 if(rt1[i]) {
3364 assert(th>=0);
3365 assert(tl>=0);
57871462 3366 emit_testimm(temp,32);
3367 emit_andimm(temp,24,temp);
3368 if (opcode[i]==0x1A) { // LDL
3369 emit_rsbimm(temp,32,HOST_TEMPREG);
3370 emit_shl(temp2h,temp,temp2h);
3371 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3372 emit_movimm(-1,HOST_TEMPREG);
3373 emit_shl(temp2,temp,temp2);
3374 emit_cmove_reg(temp2h,th);
3375 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3376 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3377 emit_orreq(temp2,tl,tl);
3378 emit_orrne(temp2,th,th);
3379 }
3380 if (opcode[i]==0x1B) { // LDR
3381 emit_xorimm(temp,24,temp);
3382 emit_rsbimm(temp,32,HOST_TEMPREG);
3383 emit_shr(temp2,temp,temp2);
3384 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3385 emit_movimm(-1,HOST_TEMPREG);
3386 emit_shr(temp2h,temp,temp2h);
3387 emit_cmovne_reg(temp2,tl);
3388 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3389 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3390 emit_orrne(temp2h,th,th);
3391 emit_orreq(temp2h,tl,tl);
3392 }
3393 }
3394 }
3395}
3396#define loadlr_assemble loadlr_assemble_arm
3397
3398void cop0_assemble(int i,struct regstat *i_regs)
3399{
3400 if(opcode2[i]==0) // MFC0
3401 {
3402 signed char t=get_reg(i_regs->regmap,rt1[i]);
3403 char copr=(source[i]>>11)&0x1f;
3404 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3405 if(t>=0&&rt1[i]!=0) {
7139f3c8 3406#ifdef MUPEN64
57871462 3407 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3408 emit_movimm((source[i]>>11)&0x1f,1);
3409 emit_writeword(0,(int)&PC);
3410 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3411 if(copr==9) {
3412 emit_readword((int)&last_count,ECX);
3413 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3414 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3415 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3416 emit_writeword(HOST_CCREG,(int)&Count);
3417 }
3418 emit_call((int)MFC0);
3419 emit_readword((int)&readmem_dword,t);
7139f3c8 3420#else
3421 emit_readword((int)&reg_cop0+copr*4,t);
3422#endif
57871462 3423 }
3424 }
3425 else if(opcode2[i]==4) // MTC0
3426 {
3427 signed char s=get_reg(i_regs->regmap,rs1[i]);
3428 char copr=(source[i]>>11)&0x1f;
3429 assert(s>=0);
3430 emit_writeword(s,(int)&readmem_dword);
3431 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
fca1aef2 3432#ifdef MUPEN64
57871462 3433 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3434 emit_movimm((source[i]>>11)&0x1f,1);
3435 emit_writeword(0,(int)&PC);
3436 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
7139f3c8 3437#endif
3438 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3439 emit_readword((int)&last_count,ECX);
3440 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3441 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3442 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3443 emit_writeword(HOST_CCREG,(int)&Count);
3444 }
3445 // What a mess. The status register (12) can enable interrupts,
3446 // so needs a special case to handle a pending interrupt.
3447 // The interrupt must be taken immediately, because a subsequent
3448 // instruction might disable interrupts again.
7139f3c8 3449 if(copr==12||copr==13) {
fca1aef2 3450#ifdef PCSX
3451 if (is_delayslot) {
3452 // burn cycles to cause cc_interrupt, which will
3453 // reschedule next_interupt. Relies on CCREG from above.
3454 assem_debug("MTC0 DS %d\n", copr);
3455 emit_writeword(HOST_CCREG,(int)&last_count);
3456 emit_movimm(0,HOST_CCREG);
3457 emit_storereg(CCREG,HOST_CCREG);
3458 emit_movimm(copr,0);
3459 emit_call((int)pcsx_mtc0_ds);
3460 return;
3461 }
3462#endif
57871462 3463 emit_movimm(start+i*4+4,0);
3464 emit_movimm(0,1);
3465 emit_writeword(0,(int)&pcaddr);
3466 emit_writeword(1,(int)&pending_exception);
3467 }
3468 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3469 //else
fca1aef2 3470#ifdef PCSX
3471 emit_movimm(copr,0);
3472 emit_call((int)pcsx_mtc0);