drc: don't read readmem_dword to r0 or on dummy reads
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69#include "fpu.h"
70
71/* Linker */
72
73void set_jump_target(int addr,u_int target)
74{
75 u_char *ptr=(u_char *)addr;
76 u_int *ptr2=(u_int *)ptr;
77 if(ptr[3]==0xe2) {
78 assert((target-(u_int)ptr2-8)<1024);
79 assert((addr&3)==0);
80 assert((target&3)==0);
81 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
82 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
83 }
84 else if(ptr[3]==0x72) {
85 // generated by emit_jno_unlikely
86 if((target-(u_int)ptr2-8)<1024) {
87 assert((addr&3)==0);
88 assert((target&3)==0);
89 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
90 }
91 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
92 assert((addr&3)==0);
93 assert((target&3)==0);
94 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
95 }
96 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
97 }
98 else {
99 assert((ptr[3]&0x0e)==0xa);
100 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
101 }
102}
103
104// This optionally copies the instruction from the target of the branch into
105// the space before the branch. Works, but the difference in speed is
106// usually insignificant.
107void set_jump_target_fillslot(int addr,u_int target,int copy)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 assert(!copy||ptr2[-1]==0xe28dd000);
112 if(ptr[3]==0xe2) {
113 assert(!copy);
114 assert((target-(u_int)ptr2-8)<4096);
115 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
116 }
117 else {
118 assert((ptr[3]&0x0e)==0xa);
119 u_int target_insn=*(u_int *)target;
120 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
121 copy=0;
122 }
123 if((target_insn&0x0c100000)==0x04100000) { // Load
124 copy=0;
125 }
126 if(target_insn&0x08000000) {
127 copy=0;
128 }
129 if(copy) {
130 ptr2[-1]=target_insn;
131 target+=4;
132 }
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137/* Literal pool */
138add_literal(int addr,int val)
139{
140 literals[literalcount][0]=addr;
141 literals[literalcount][1]=val;
142 literalcount++;
143}
144
f76eeef9 145void *kill_pointer(void *stub)
57871462 146{
147 int *ptr=(int *)(stub+4);
148 assert((*ptr&0x0ff00000)==0x05900000);
149 u_int offset=*ptr&0xfff;
150 int **l_ptr=(void *)ptr+offset+8;
151 int *i_ptr=*l_ptr;
152 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 153 return i_ptr;
57871462 154}
155
156int get_pointer(void *stub)
157{
158 //printf("get_pointer(%x)\n",(int)stub);
159 int *ptr=(int *)(stub+4);
160 assert((*ptr&0x0ff00000)==0x05900000);
161 u_int offset=*ptr&0xfff;
162 int **l_ptr=(void *)ptr+offset+8;
163 int *i_ptr=*l_ptr;
164 assert((*i_ptr&0x0f000000)==0x0a000000);
165 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
166}
167
168// Find the "clean" entry point from a "dirty" entry point
169// by skipping past the call to verify_code
170u_int get_clean_addr(int addr)
171{
172 int *ptr=(int *)addr;
173 #ifdef ARMv5_ONLY
174 ptr+=4;
175 #else
176 ptr+=6;
177 #endif
178 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
179 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
180 ptr++;
181 if((*ptr&0xFF000000)==0xea000000) {
182 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
183 }
184 return (u_int)ptr;
185}
186
187int verify_dirty(int addr)
188{
189 u_int *ptr=(u_int *)addr;
190 #ifdef ARMv5_ONLY
191 // get from literal pool
192 assert((*ptr&0xFFF00000)==0xe5900000);
193 u_int offset=*ptr&0xfff;
194 u_int *l_ptr=(void *)ptr+offset+8;
195 u_int source=l_ptr[0];
196 u_int copy=l_ptr[1];
197 u_int len=l_ptr[2];
198 ptr+=4;
199 #else
200 // ARMv7 movw/movt
201 assert((*ptr&0xFFF00000)==0xe3000000);
202 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
203 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
204 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
205 ptr+=6;
206 #endif
207 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
208 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 209 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 210 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
211 unsigned int page=source>>12;
212 unsigned int map_value=memory_map[page];
213 if(map_value>=0x80000000) return 0;
214 while(page<((source+len-1)>>12)) {
215 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
216 }
217 source = source+(map_value<<2);
218 }
219 //printf("verify_dirty: %x %x %x\n",source,copy,len);
220 return !memcmp((void *)source,(void *)copy,len);
221}
222
223// This doesn't necessarily find all clean entry points, just
224// guarantees that it's not dirty
225int isclean(int addr)
226{
227 #ifdef ARMv5_ONLY
228 int *ptr=((u_int *)addr)+4;
229 #else
230 int *ptr=((u_int *)addr)+6;
231 #endif
232 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
233 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
234 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
235 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
236 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
237 return 1;
238}
239
240void get_bounds(int addr,u_int *start,u_int *end)
241{
242 u_int *ptr=(u_int *)addr;
243 #ifdef ARMv5_ONLY
244 // get from literal pool
245 assert((*ptr&0xFFF00000)==0xe5900000);
246 u_int offset=*ptr&0xfff;
247 u_int *l_ptr=(void *)ptr+offset+8;
248 u_int source=l_ptr[0];
249 //u_int copy=l_ptr[1];
250 u_int len=l_ptr[2];
251 ptr+=4;
252 #else
253 // ARMv7 movw/movt
254 assert((*ptr&0xFFF00000)==0xe3000000);
255 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
256 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
257 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
258 ptr+=6;
259 #endif
260 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
261 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 262 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 263 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
264 if(memory_map[source>>12]>=0x80000000) source = 0;
265 else source = source+(memory_map[source>>12]<<2);
266 }
267 *start=source;
268 *end=source+len;
269}
270
271/* Register allocation */
272
273// Note: registers are allocated clean (unmodified state)
274// if you intend to modify the register, you must call dirty_reg().
275void alloc_reg(struct regstat *cur,int i,signed char reg)
276{
277 int r,hr;
278 int preferred_reg = (reg&7);
279 if(reg==CCREG) preferred_reg=HOST_CCREG;
280 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
281
282 // Don't allocate unused registers
283 if((cur->u>>reg)&1) return;
284
285 // see if it's already allocated
286 for(hr=0;hr<HOST_REGS;hr++)
287 {
288 if(cur->regmap[hr]==reg) return;
289 }
290
291 // Keep the same mapping if the register was already allocated in a loop
292 preferred_reg = loop_reg(i,reg,preferred_reg);
293
294 // Try to allocate the preferred register
295 if(cur->regmap[preferred_reg]==-1) {
296 cur->regmap[preferred_reg]=reg;
297 cur->dirty&=~(1<<preferred_reg);
298 cur->isconst&=~(1<<preferred_reg);
299 return;
300 }
301 r=cur->regmap[preferred_reg];
302 if(r<64&&((cur->u>>r)&1)) {
303 cur->regmap[preferred_reg]=reg;
304 cur->dirty&=~(1<<preferred_reg);
305 cur->isconst&=~(1<<preferred_reg);
306 return;
307 }
308 if(r>=64&&((cur->uu>>(r&63))&1)) {
309 cur->regmap[preferred_reg]=reg;
310 cur->dirty&=~(1<<preferred_reg);
311 cur->isconst&=~(1<<preferred_reg);
312 return;
313 }
314
315 // Clear any unneeded registers
316 // We try to keep the mapping consistent, if possible, because it
317 // makes branches easier (especially loops). So we try to allocate
318 // first (see above) before removing old mappings. If this is not
319 // possible then go ahead and clear out the registers that are no
320 // longer needed.
321 for(hr=0;hr<HOST_REGS;hr++)
322 {
323 r=cur->regmap[hr];
324 if(r>=0) {
325 if(r<64) {
326 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
327 }
328 else
329 {
330 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
331 }
332 }
333 }
334 // Try to allocate any available register, but prefer
335 // registers that have not been used recently.
336 if(i>0) {
337 for(hr=0;hr<HOST_REGS;hr++) {
338 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
339 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
340 cur->regmap[hr]=reg;
341 cur->dirty&=~(1<<hr);
342 cur->isconst&=~(1<<hr);
343 return;
344 }
345 }
346 }
347 }
348 // Try to allocate any available register
349 for(hr=0;hr<HOST_REGS;hr++) {
350 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
351 cur->regmap[hr]=reg;
352 cur->dirty&=~(1<<hr);
353 cur->isconst&=~(1<<hr);
354 return;
355 }
356 }
357
358 // Ok, now we have to evict someone
359 // Pick a register we hopefully won't need soon
360 u_char hsn[MAXREG+1];
361 memset(hsn,10,sizeof(hsn));
362 int j;
363 lsn(hsn,i,&preferred_reg);
364 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
365 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
366 if(i>0) {
367 // Don't evict the cycle count at entry points, otherwise the entry
368 // stub will have to write it.
369 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
370 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
371 for(j=10;j>=3;j--)
372 {
373 // Alloc preferred register if available
374 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
375 for(hr=0;hr<HOST_REGS;hr++) {
376 // Evict both parts of a 64-bit register
377 if((cur->regmap[hr]&63)==r) {
378 cur->regmap[hr]=-1;
379 cur->dirty&=~(1<<hr);
380 cur->isconst&=~(1<<hr);
381 }
382 }
383 cur->regmap[preferred_reg]=reg;
384 return;
385 }
386 for(r=1;r<=MAXREG;r++)
387 {
388 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
389 for(hr=0;hr<HOST_REGS;hr++) {
390 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
391 if(cur->regmap[hr]==r+64) {
392 cur->regmap[hr]=reg;
393 cur->dirty&=~(1<<hr);
394 cur->isconst&=~(1<<hr);
395 return;
396 }
397 }
398 }
399 for(hr=0;hr<HOST_REGS;hr++) {
400 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
401 if(cur->regmap[hr]==r) {
402 cur->regmap[hr]=reg;
403 cur->dirty&=~(1<<hr);
404 cur->isconst&=~(1<<hr);
405 return;
406 }
407 }
408 }
409 }
410 }
411 }
412 }
413 for(j=10;j>=0;j--)
414 {
415 for(r=1;r<=MAXREG;r++)
416 {
417 if(hsn[r]==j) {
418 for(hr=0;hr<HOST_REGS;hr++) {
419 if(cur->regmap[hr]==r+64) {
420 cur->regmap[hr]=reg;
421 cur->dirty&=~(1<<hr);
422 cur->isconst&=~(1<<hr);
423 return;
424 }
425 }
426 for(hr=0;hr<HOST_REGS;hr++) {
427 if(cur->regmap[hr]==r) {
428 cur->regmap[hr]=reg;
429 cur->dirty&=~(1<<hr);
430 cur->isconst&=~(1<<hr);
431 return;
432 }
433 }
434 }
435 }
436 }
437 printf("This shouldn't happen (alloc_reg)");exit(1);
438}
439
440void alloc_reg64(struct regstat *cur,int i,signed char reg)
441{
442 int preferred_reg = 8+(reg&1);
443 int r,hr;
444
445 // allocate the lower 32 bits
446 alloc_reg(cur,i,reg);
447
448 // Don't allocate unused registers
449 if((cur->uu>>reg)&1) return;
450
451 // see if the upper half is already allocated
452 for(hr=0;hr<HOST_REGS;hr++)
453 {
454 if(cur->regmap[hr]==reg+64) return;
455 }
456
457 // Keep the same mapping if the register was already allocated in a loop
458 preferred_reg = loop_reg(i,reg,preferred_reg);
459
460 // Try to allocate the preferred register
461 if(cur->regmap[preferred_reg]==-1) {
462 cur->regmap[preferred_reg]=reg|64;
463 cur->dirty&=~(1<<preferred_reg);
464 cur->isconst&=~(1<<preferred_reg);
465 return;
466 }
467 r=cur->regmap[preferred_reg];
468 if(r<64&&((cur->u>>r)&1)) {
469 cur->regmap[preferred_reg]=reg|64;
470 cur->dirty&=~(1<<preferred_reg);
471 cur->isconst&=~(1<<preferred_reg);
472 return;
473 }
474 if(r>=64&&((cur->uu>>(r&63))&1)) {
475 cur->regmap[preferred_reg]=reg|64;
476 cur->dirty&=~(1<<preferred_reg);
477 cur->isconst&=~(1<<preferred_reg);
478 return;
479 }
480
481 // Clear any unneeded registers
482 // We try to keep the mapping consistent, if possible, because it
483 // makes branches easier (especially loops). So we try to allocate
484 // first (see above) before removing old mappings. If this is not
485 // possible then go ahead and clear out the registers that are no
486 // longer needed.
487 for(hr=HOST_REGS-1;hr>=0;hr--)
488 {
489 r=cur->regmap[hr];
490 if(r>=0) {
491 if(r<64) {
492 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
493 }
494 else
495 {
496 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
497 }
498 }
499 }
500 // Try to allocate any available register, but prefer
501 // registers that have not been used recently.
502 if(i>0) {
503 for(hr=0;hr<HOST_REGS;hr++) {
504 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
505 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
506 cur->regmap[hr]=reg|64;
507 cur->dirty&=~(1<<hr);
508 cur->isconst&=~(1<<hr);
509 return;
510 }
511 }
512 }
513 }
514 // Try to allocate any available register
515 for(hr=0;hr<HOST_REGS;hr++) {
516 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
517 cur->regmap[hr]=reg|64;
518 cur->dirty&=~(1<<hr);
519 cur->isconst&=~(1<<hr);
520 return;
521 }
522 }
523
524 // Ok, now we have to evict someone
525 // Pick a register we hopefully won't need soon
526 u_char hsn[MAXREG+1];
527 memset(hsn,10,sizeof(hsn));
528 int j;
529 lsn(hsn,i,&preferred_reg);
530 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
531 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
532 if(i>0) {
533 // Don't evict the cycle count at entry points, otherwise the entry
534 // stub will have to write it.
535 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
536 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
537 for(j=10;j>=3;j--)
538 {
539 // Alloc preferred register if available
540 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
541 for(hr=0;hr<HOST_REGS;hr++) {
542 // Evict both parts of a 64-bit register
543 if((cur->regmap[hr]&63)==r) {
544 cur->regmap[hr]=-1;
545 cur->dirty&=~(1<<hr);
546 cur->isconst&=~(1<<hr);
547 }
548 }
549 cur->regmap[preferred_reg]=reg|64;
550 return;
551 }
552 for(r=1;r<=MAXREG;r++)
553 {
554 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
555 for(hr=0;hr<HOST_REGS;hr++) {
556 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
557 if(cur->regmap[hr]==r+64) {
558 cur->regmap[hr]=reg|64;
559 cur->dirty&=~(1<<hr);
560 cur->isconst&=~(1<<hr);
561 return;
562 }
563 }
564 }
565 for(hr=0;hr<HOST_REGS;hr++) {
566 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
567 if(cur->regmap[hr]==r) {
568 cur->regmap[hr]=reg|64;
569 cur->dirty&=~(1<<hr);
570 cur->isconst&=~(1<<hr);
571 return;
572 }
573 }
574 }
575 }
576 }
577 }
578 }
579 for(j=10;j>=0;j--)
580 {
581 for(r=1;r<=MAXREG;r++)
582 {
583 if(hsn[r]==j) {
584 for(hr=0;hr<HOST_REGS;hr++) {
585 if(cur->regmap[hr]==r+64) {
586 cur->regmap[hr]=reg|64;
587 cur->dirty&=~(1<<hr);
588 cur->isconst&=~(1<<hr);
589 return;
590 }
591 }
592 for(hr=0;hr<HOST_REGS;hr++) {
593 if(cur->regmap[hr]==r) {
594 cur->regmap[hr]=reg|64;
595 cur->dirty&=~(1<<hr);
596 cur->isconst&=~(1<<hr);
597 return;
598 }
599 }
600 }
601 }
602 }
603 printf("This shouldn't happen");exit(1);
604}
605
606// Allocate a temporary register. This is done without regard to
607// dirty status or whether the register we request is on the unneeded list
608// Note: This will only allocate one register, even if called multiple times
609void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
610{
611 int r,hr;
612 int preferred_reg = -1;
613
614 // see if it's already allocated
615 for(hr=0;hr<HOST_REGS;hr++)
616 {
617 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
618 }
619
620 // Try to allocate any available register
621 for(hr=HOST_REGS-1;hr>=0;hr--) {
622 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
623 cur->regmap[hr]=reg;
624 cur->dirty&=~(1<<hr);
625 cur->isconst&=~(1<<hr);
626 return;
627 }
628 }
629
630 // Find an unneeded register
631 for(hr=HOST_REGS-1;hr>=0;hr--)
632 {
633 r=cur->regmap[hr];
634 if(r>=0) {
635 if(r<64) {
636 if((cur->u>>r)&1) {
637 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
638 cur->regmap[hr]=reg;
639 cur->dirty&=~(1<<hr);
640 cur->isconst&=~(1<<hr);
641 return;
642 }
643 }
644 }
645 else
646 {
647 if((cur->uu>>(r&63))&1) {
648 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
649 cur->regmap[hr]=reg;
650 cur->dirty&=~(1<<hr);
651 cur->isconst&=~(1<<hr);
652 return;
653 }
654 }
655 }
656 }
657 }
658
659 // Ok, now we have to evict someone
660 // Pick a register we hopefully won't need soon
661 // TODO: we might want to follow unconditional jumps here
662 // TODO: get rid of dupe code and make this into a function
663 u_char hsn[MAXREG+1];
664 memset(hsn,10,sizeof(hsn));
665 int j;
666 lsn(hsn,i,&preferred_reg);
667 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
668 if(i>0) {
669 // Don't evict the cycle count at entry points, otherwise the entry
670 // stub will have to write it.
671 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
672 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
673 for(j=10;j>=3;j--)
674 {
675 for(r=1;r<=MAXREG;r++)
676 {
677 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
678 for(hr=0;hr<HOST_REGS;hr++) {
679 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
680 if(cur->regmap[hr]==r+64) {
681 cur->regmap[hr]=reg;
682 cur->dirty&=~(1<<hr);
683 cur->isconst&=~(1<<hr);
684 return;
685 }
686 }
687 }
688 for(hr=0;hr<HOST_REGS;hr++) {
689 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
690 if(cur->regmap[hr]==r) {
691 cur->regmap[hr]=reg;
692 cur->dirty&=~(1<<hr);
693 cur->isconst&=~(1<<hr);
694 return;
695 }
696 }
697 }
698 }
699 }
700 }
701 }
702 for(j=10;j>=0;j--)
703 {
704 for(r=1;r<=MAXREG;r++)
705 {
706 if(hsn[r]==j) {
707 for(hr=0;hr<HOST_REGS;hr++) {
708 if(cur->regmap[hr]==r+64) {
709 cur->regmap[hr]=reg;
710 cur->dirty&=~(1<<hr);
711 cur->isconst&=~(1<<hr);
712 return;
713 }
714 }
715 for(hr=0;hr<HOST_REGS;hr++) {
716 if(cur->regmap[hr]==r) {
717 cur->regmap[hr]=reg;
718 cur->dirty&=~(1<<hr);
719 cur->isconst&=~(1<<hr);
720 return;
721 }
722 }
723 }
724 }
725 }
726 printf("This shouldn't happen");exit(1);
727}
728// Allocate a specific ARM register.
729void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
730{
731 int n;
732
733 // see if it's already allocated (and dealloc it)
734 for(n=0;n<HOST_REGS;n++)
735 {
736 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
737 }
738
739 cur->regmap[hr]=reg;
740 cur->dirty&=~(1<<hr);
741 cur->isconst&=~(1<<hr);
742}
743
744// Alloc cycle count into dedicated register
745alloc_cc(struct regstat *cur,int i)
746{
747 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
748}
749
750/* Special alloc */
751
752
753/* Assembler */
754
755char regname[16][4] = {
756 "r0",
757 "r1",
758 "r2",
759 "r3",
760 "r4",
761 "r5",
762 "r6",
763 "r7",
764 "r8",
765 "r9",
766 "r10",
767 "fp",
768 "r12",
769 "sp",
770 "lr",
771 "pc"};
772
773void output_byte(u_char byte)
774{
775 *(out++)=byte;
776}
777void output_modrm(u_char mod,u_char rm,u_char ext)
778{
779 assert(mod<4);
780 assert(rm<8);
781 assert(ext<8);
782 u_char byte=(mod<<6)|(ext<<3)|rm;
783 *(out++)=byte;
784}
785void output_sib(u_char scale,u_char index,u_char base)
786{
787 assert(scale<4);
788 assert(index<8);
789 assert(base<8);
790 u_char byte=(scale<<6)|(index<<3)|base;
791 *(out++)=byte;
792}
793void output_w32(u_int word)
794{
795 *((u_int *)out)=word;
796 out+=4;
797}
798u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
799{
800 assert(rd<16);
801 assert(rn<16);
802 assert(rm<16);
803 return((rn<<16)|(rd<<12)|rm);
804}
805u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
806{
807 assert(rd<16);
808 assert(rn<16);
809 assert(imm<256);
810 assert((shift&1)==0);
811 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
812}
813u_int genimm(u_int imm,u_int *encoded)
814{
815 if(imm==0) {*encoded=0;return 1;}
816 int i=32;
817 while(i>0)
818 {
819 if(imm<256) {
820 *encoded=((i&30)<<7)|imm;
821 return 1;
822 }
823 imm=(imm>>2)|(imm<<30);i-=2;
824 }
825 return 0;
826}
827u_int genjmp(u_int addr)
828{
829 int offset=addr-(int)out-8;
e80343e2 830 if(offset<-33554432||offset>=33554432) {
831 if (addr>2) {
832 printf("genjmp: out of range: %08x\n", offset);
833 exit(1);
834 }
835 return 0;
836 }
57871462 837 return ((u_int)offset>>2)&0xffffff;
838}
839
840void emit_mov(int rs,int rt)
841{
842 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
843 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
844}
845
846void emit_movs(int rs,int rt)
847{
848 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
849 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
850}
851
852void emit_add(int rs1,int rs2,int rt)
853{
854 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
855 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
856}
857
858void emit_adds(int rs1,int rs2,int rt)
859{
860 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
861 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
862}
863
864void emit_adcs(int rs1,int rs2,int rt)
865{
866 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
867 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
868}
869
870void emit_sbc(int rs1,int rs2,int rt)
871{
872 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
873 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
874}
875
876void emit_sbcs(int rs1,int rs2,int rt)
877{
878 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
879 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
880}
881
882void emit_neg(int rs, int rt)
883{
884 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
885 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
886}
887
888void emit_negs(int rs, int rt)
889{
890 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
891 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
892}
893
894void emit_sub(int rs1,int rs2,int rt)
895{
896 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
897 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
898}
899
900void emit_subs(int rs1,int rs2,int rt)
901{
902 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
903 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
904}
905
906void emit_zeroreg(int rt)
907{
908 assem_debug("mov %s,#0\n",regname[rt]);
909 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
910}
911
912void emit_loadreg(int r, int hr)
913{
3d624f89 914#ifdef FORCE32
915 if(r&64) {
916 printf("64bit load in 32bit mode!\n");
917 exit(1);
918 }
919#endif
57871462 920 if((r&63)==0)
921 emit_zeroreg(hr);
922 else {
3d624f89 923 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 924 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
925 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
926 if(r==CCREG) addr=(int)&cycle_count;
927 if(r==CSREG) addr=(int)&Status;
928 if(r==FSREG) addr=(int)&FCR31;
929 if(r==INVCP) addr=(int)&invc_ptr;
930 u_int offset = addr-(u_int)&dynarec_local;
931 assert(offset<4096);
932 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
933 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
934 }
935}
936void emit_storereg(int r, int hr)
937{
3d624f89 938#ifdef FORCE32
939 if(r&64) {
940 printf("64bit store in 32bit mode!\n");
941 exit(1);
942 }
943#endif
944 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 945 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
946 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
947 if(r==CCREG) addr=(int)&cycle_count;
948 if(r==FSREG) addr=(int)&FCR31;
949 u_int offset = addr-(u_int)&dynarec_local;
950 assert(offset<4096);
951 assem_debug("str %s,fp+%d\n",regname[hr],offset);
952 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
953}
954
955void emit_test(int rs, int rt)
956{
957 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
958 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
959}
960
961void emit_testimm(int rs,int imm)
962{
963 u_int armval;
964 assem_debug("tst %s,$%d\n",regname[rs],imm);
965 assert(genimm(imm,&armval));
966 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
967}
968
b9b61529 969void emit_testeqimm(int rs,int imm)
970{
971 u_int armval;
972 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
973 assert(genimm(imm,&armval));
974 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
975}
976
57871462 977void emit_not(int rs,int rt)
978{
979 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
980 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
981}
982
b9b61529 983void emit_mvnmi(int rs,int rt)
984{
985 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
986 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
987}
988
57871462 989void emit_and(u_int rs1,u_int rs2,u_int rt)
990{
991 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
992 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
993}
994
995void emit_or(u_int rs1,u_int rs2,u_int rt)
996{
997 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
998 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
999}
1000void emit_or_and_set_flags(int rs1,int rs2,int rt)
1001{
1002 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1003 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1004}
1005
576bbd8f 1006void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1007{
1008 assert(rs<16);
1009 assert(rt<16);
1010 assert(imm<32);
1011 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1012 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1013}
1014
57871462 1015void emit_xor(u_int rs1,u_int rs2,u_int rt)
1016{
1017 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1018 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1019}
1020
1021void emit_loadlp(u_int imm,u_int rt)
1022{
1023 add_literal((int)out,imm);
1024 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
1025 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
1026}
1027void emit_movw(u_int imm,u_int rt)
1028{
1029 assert(imm<65536);
1030 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
1031 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
1032}
1033void emit_movt(u_int imm,u_int rt)
1034{
1035 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
1036 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
1037}
1038void emit_movimm(u_int imm,u_int rt)
1039{
1040 u_int armval;
1041 if(genimm(imm,&armval)) {
1042 assem_debug("mov %s,#%d\n",regname[rt],imm);
1043 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1044 }else if(genimm(~imm,&armval)) {
1045 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1046 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1047 }else if(imm<65536) {
1048 #ifdef ARMv5_ONLY
1049 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1050 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1051 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1052 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1053 #else
1054 emit_movw(imm,rt);
1055 #endif
1056 }else{
1057 #ifdef ARMv5_ONLY
1058 emit_loadlp(imm,rt);
1059 #else
1060 emit_movw(imm&0x0000FFFF,rt);
1061 emit_movt(imm&0xFFFF0000,rt);
1062 #endif
1063 }
1064}
1065void emit_pcreladdr(u_int rt)
1066{
1067 assem_debug("add %s,pc,#?\n",regname[rt]);
1068 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1069}
1070
1071void emit_addimm(u_int rs,int imm,u_int rt)
1072{
1073 assert(rs<16);
1074 assert(rt<16);
1075 if(imm!=0) {
1076 assert(imm>-65536&&imm<65536);
1077 u_int armval;
1078 if(genimm(imm,&armval)) {
1079 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1080 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1081 }else if(genimm(-imm,&armval)) {
1082 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1083 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1084 }else if(imm<0) {
1085 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1086 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1087 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1088 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1089 }else{
1090 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1091 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1092 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1093 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1094 }
1095 }
1096 else if(rs!=rt) emit_mov(rs,rt);
1097}
1098
1099void emit_addimm_and_set_flags(int imm,int rt)
1100{
1101 assert(imm>-65536&&imm<65536);
1102 u_int armval;
1103 if(genimm(imm,&armval)) {
1104 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1105 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1106 }else if(genimm(-imm,&armval)) {
1107 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1108 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1109 }else if(imm<0) {
1110 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1111 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1112 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1113 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1114 }else{
1115 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1116 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1117 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1118 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1119 }
1120}
1121void emit_addimm_no_flags(u_int imm,u_int rt)
1122{
1123 emit_addimm(rt,imm,rt);
1124}
1125
1126void emit_addnop(u_int r)
1127{
1128 assert(r<16);
1129 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1130 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1131}
1132
1133void emit_adcimm(u_int rs,int imm,u_int rt)
1134{
1135 u_int armval;
1136 assert(genimm(imm,&armval));
1137 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1138 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1139}
1140/*void emit_sbcimm(int imm,u_int rt)
1141{
1142 u_int armval;
1143 assert(genimm(imm,&armval));
1144 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1145 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1146}*/
1147void emit_sbbimm(int imm,u_int rt)
1148{
1149 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1150 assert(rt<8);
1151 if(imm<128&&imm>=-128) {
1152 output_byte(0x83);
1153 output_modrm(3,rt,3);
1154 output_byte(imm);
1155 }
1156 else
1157 {
1158 output_byte(0x81);
1159 output_modrm(3,rt,3);
1160 output_w32(imm);
1161 }
1162}
1163void emit_rscimm(int rs,int imm,u_int rt)
1164{
1165 assert(0);
1166 u_int armval;
1167 assert(genimm(imm,&armval));
1168 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1169 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1170}
1171
1172void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1173{
1174 // TODO: if(genimm(imm,&armval)) ...
1175 // else
1176 emit_movimm(imm,HOST_TEMPREG);
1177 emit_adds(HOST_TEMPREG,rsl,rtl);
1178 emit_adcimm(rsh,0,rth);
1179}
1180
1181void emit_sbb(int rs1,int rs2)
1182{
1183 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1184 output_byte(0x19);
1185 output_modrm(3,rs1,rs2);
1186}
1187
1188void emit_andimm(int rs,int imm,int rt)
1189{
1190 u_int armval;
1191 if(genimm(imm,&armval)) {
1192 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1193 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1194 }else if(genimm(~imm,&armval)) {
1195 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1196 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1197 }else if(imm==65535) {
1198 #ifdef ARMv5_ONLY
1199 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1200 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1201 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1202 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1203 #else
1204 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1205 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1206 #endif
1207 }else{
1208 assert(imm>0&&imm<65535);
1209 #ifdef ARMv5_ONLY
1210 assem_debug("mov r14,#%d\n",imm&0xFF00);
1211 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1212 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1213 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1214 #else
1215 emit_movw(imm,HOST_TEMPREG);
1216 #endif
1217 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1218 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1219 }
1220}
1221
1222void emit_orimm(int rs,int imm,int rt)
1223{
1224 u_int armval;
1225 if(genimm(imm,&armval)) {
1226 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1227 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1228 }else{
1229 assert(imm>0&&imm<65536);
1230 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1231 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1232 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1233 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1234 }
1235}
1236
1237void emit_xorimm(int rs,int imm,int rt)
1238{
57871462 1239 u_int armval;
1240 if(genimm(imm,&armval)) {
1241 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1242 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1243 }else{
514ed0d9 1244 assert(imm>0&&imm<65536);
57871462 1245 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1246 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1247 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1248 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1249 }
1250}
1251
1252void emit_shlimm(int rs,u_int imm,int rt)
1253{
1254 assert(imm>0);
1255 assert(imm<32);
1256 //if(imm==1) ...
1257 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1258 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1259}
1260
1261void emit_shrimm(int rs,u_int imm,int rt)
1262{
1263 assert(imm>0);
1264 assert(imm<32);
1265 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1266 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1267}
1268
1269void emit_sarimm(int rs,u_int imm,int rt)
1270{
1271 assert(imm>0);
1272 assert(imm<32);
1273 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1274 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1275}
1276
1277void emit_rorimm(int rs,u_int imm,int rt)
1278{
1279 assert(imm>0);
1280 assert(imm<32);
1281 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1282 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1283}
1284
1285void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1286{
1287 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1288 assert(imm>0);
1289 assert(imm<32);
1290 //if(imm==1) ...
1291 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1292 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1293 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1294 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1295}
1296
1297void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1298{
1299 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1300 assert(imm>0);
1301 assert(imm<32);
1302 //if(imm==1) ...
1303 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1304 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1305 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1306 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1307}
1308
b9b61529 1309void emit_signextend16(int rs,int rt)
1310{
1311 #ifdef ARMv5_ONLY
1312 emit_shlimm(rs,16,rt);
1313 emit_sarimm(rt,16,rt);
1314 #else
1315 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1316 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1317 #endif
1318}
1319
57871462 1320void emit_shl(u_int rs,u_int shift,u_int rt)
1321{
1322 assert(rs<16);
1323 assert(rt<16);
1324 assert(shift<16);
1325 //if(imm==1) ...
1326 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1327 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1328}
1329void emit_shr(u_int rs,u_int shift,u_int rt)
1330{
1331 assert(rs<16);
1332 assert(rt<16);
1333 assert(shift<16);
1334 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1335 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1336}
1337void emit_sar(u_int rs,u_int shift,u_int rt)
1338{
1339 assert(rs<16);
1340 assert(rt<16);
1341 assert(shift<16);
1342 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1343 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1344}
1345void emit_shlcl(int r)
1346{
1347 assem_debug("shl %%%s,%%cl\n",regname[r]);
1348 assert(0);
1349}
1350void emit_shrcl(int r)
1351{
1352 assem_debug("shr %%%s,%%cl\n",regname[r]);
1353 assert(0);
1354}
1355void emit_sarcl(int r)
1356{
1357 assem_debug("sar %%%s,%%cl\n",regname[r]);
1358 assert(0);
1359}
1360
1361void emit_shldcl(int r1,int r2)
1362{
1363 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1364 assert(0);
1365}
1366void emit_shrdcl(int r1,int r2)
1367{
1368 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1369 assert(0);
1370}
1371void emit_orrshl(u_int rs,u_int shift,u_int rt)
1372{
1373 assert(rs<16);
1374 assert(rt<16);
1375 assert(shift<16);
1376 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1377 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1378}
1379void emit_orrshr(u_int rs,u_int shift,u_int rt)
1380{
1381 assert(rs<16);
1382 assert(rt<16);
1383 assert(shift<16);
1384 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1385 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1386}
1387
1388void emit_cmpimm(int rs,int imm)
1389{
1390 u_int armval;
1391 if(genimm(imm,&armval)) {
1392 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1393 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1394 }else if(genimm(-imm,&armval)) {
1395 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1396 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1397 }else if(imm>0) {
1398 assert(imm<65536);
1399 #ifdef ARMv5_ONLY
1400 emit_movimm(imm,HOST_TEMPREG);
1401 #else
1402 emit_movw(imm,HOST_TEMPREG);
1403 #endif
1404 assem_debug("cmp %s,r14\n",regname[rs]);
1405 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1406 }else{
1407 assert(imm>-65536);
1408 #ifdef ARMv5_ONLY
1409 emit_movimm(-imm,HOST_TEMPREG);
1410 #else
1411 emit_movw(-imm,HOST_TEMPREG);
1412 #endif
1413 assem_debug("cmn %s,r14\n",regname[rs]);
1414 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1415 }
1416}
1417
1418void emit_cmovne(u_int *addr,int rt)
1419{
1420 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1421 assert(0);
1422}
1423void emit_cmovl(u_int *addr,int rt)
1424{
1425 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1426 assert(0);
1427}
1428void emit_cmovs(u_int *addr,int rt)
1429{
1430 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1431 assert(0);
1432}
1433void emit_cmovne_imm(int imm,int rt)
1434{
1435 assem_debug("movne %s,#%d\n",regname[rt],imm);
1436 u_int armval;
1437 assert(genimm(imm,&armval));
1438 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1439}
1440void emit_cmovl_imm(int imm,int rt)
1441{
1442 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1443 u_int armval;
1444 assert(genimm(imm,&armval));
1445 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1446}
1447void emit_cmovb_imm(int imm,int rt)
1448{
1449 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1450 u_int armval;
1451 assert(genimm(imm,&armval));
1452 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1453}
1454void emit_cmovs_imm(int imm,int rt)
1455{
1456 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1457 u_int armval;
1458 assert(genimm(imm,&armval));
1459 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1460}
1461void emit_cmove_reg(int rs,int rt)
1462{
1463 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1464 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1465}
1466void emit_cmovne_reg(int rs,int rt)
1467{
1468 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1469 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1470}
1471void emit_cmovl_reg(int rs,int rt)
1472{
1473 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1474 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1475}
1476void emit_cmovs_reg(int rs,int rt)
1477{
1478 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1479 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1480}
1481
1482void emit_slti32(int rs,int imm,int rt)
1483{
1484 if(rs!=rt) emit_zeroreg(rt);
1485 emit_cmpimm(rs,imm);
1486 if(rs==rt) emit_movimm(0,rt);
1487 emit_cmovl_imm(1,rt);
1488}
1489void emit_sltiu32(int rs,int imm,int rt)
1490{
1491 if(rs!=rt) emit_zeroreg(rt);
1492 emit_cmpimm(rs,imm);
1493 if(rs==rt) emit_movimm(0,rt);
1494 emit_cmovb_imm(1,rt);
1495}
1496void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1497{
1498 assert(rsh!=rt);
1499 emit_slti32(rsl,imm,rt);
1500 if(imm>=0)
1501 {
1502 emit_test(rsh,rsh);
1503 emit_cmovne_imm(0,rt);
1504 emit_cmovs_imm(1,rt);
1505 }
1506 else
1507 {
1508 emit_cmpimm(rsh,-1);
1509 emit_cmovne_imm(0,rt);
1510 emit_cmovl_imm(1,rt);
1511 }
1512}
1513void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1514{
1515 assert(rsh!=rt);
1516 emit_sltiu32(rsl,imm,rt);
1517 if(imm>=0)
1518 {
1519 emit_test(rsh,rsh);
1520 emit_cmovne_imm(0,rt);
1521 }
1522 else
1523 {
1524 emit_cmpimm(rsh,-1);
1525 emit_cmovne_imm(1,rt);
1526 }
1527}
1528
1529void emit_cmp(int rs,int rt)
1530{
1531 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1532 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1533}
1534void emit_set_gz32(int rs, int rt)
1535{
1536 //assem_debug("set_gz32\n");
1537 emit_cmpimm(rs,1);
1538 emit_movimm(1,rt);
1539 emit_cmovl_imm(0,rt);
1540}
1541void emit_set_nz32(int rs, int rt)
1542{
1543 //assem_debug("set_nz32\n");
1544 if(rs!=rt) emit_movs(rs,rt);
1545 else emit_test(rs,rs);
1546 emit_cmovne_imm(1,rt);
1547}
1548void emit_set_gz64_32(int rsh, int rsl, int rt)
1549{
1550 //assem_debug("set_gz64\n");
1551 emit_set_gz32(rsl,rt);
1552 emit_test(rsh,rsh);
1553 emit_cmovne_imm(1,rt);
1554 emit_cmovs_imm(0,rt);
1555}
1556void emit_set_nz64_32(int rsh, int rsl, int rt)
1557{
1558 //assem_debug("set_nz64\n");
1559 emit_or_and_set_flags(rsh,rsl,rt);
1560 emit_cmovne_imm(1,rt);
1561}
1562void emit_set_if_less32(int rs1, int rs2, int rt)
1563{
1564 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1565 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1566 emit_cmp(rs1,rs2);
1567 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1568 emit_cmovl_imm(1,rt);
1569}
1570void emit_set_if_carry32(int rs1, int rs2, int rt)
1571{
1572 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1573 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1574 emit_cmp(rs1,rs2);
1575 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1576 emit_cmovb_imm(1,rt);
1577}
1578void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1579{
1580 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1581 assert(u1!=rt);
1582 assert(u2!=rt);
1583 emit_cmp(l1,l2);
1584 emit_movimm(0,rt);
1585 emit_sbcs(u1,u2,HOST_TEMPREG);
1586 emit_cmovl_imm(1,rt);
1587}
1588void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1589{
1590 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1591 assert(u1!=rt);
1592 assert(u2!=rt);
1593 emit_cmp(l1,l2);
1594 emit_movimm(0,rt);
1595 emit_sbcs(u1,u2,HOST_TEMPREG);
1596 emit_cmovb_imm(1,rt);
1597}
1598
1599void emit_call(int a)
1600{
1601 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1602 u_int offset=genjmp(a);
1603 output_w32(0xeb000000|offset);
1604}
1605void emit_jmp(int a)
1606{
1607 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1608 u_int offset=genjmp(a);
1609 output_w32(0xea000000|offset);
1610}
1611void emit_jne(int a)
1612{
1613 assem_debug("bne %x\n",a);
1614 u_int offset=genjmp(a);
1615 output_w32(0x1a000000|offset);
1616}
1617void emit_jeq(int a)
1618{
1619 assem_debug("beq %x\n",a);
1620 u_int offset=genjmp(a);
1621 output_w32(0x0a000000|offset);
1622}
1623void emit_js(int a)
1624{
1625 assem_debug("bmi %x\n",a);
1626 u_int offset=genjmp(a);
1627 output_w32(0x4a000000|offset);
1628}
1629void emit_jns(int a)
1630{
1631 assem_debug("bpl %x\n",a);
1632 u_int offset=genjmp(a);
1633 output_w32(0x5a000000|offset);
1634}
1635void emit_jl(int a)
1636{
1637 assem_debug("blt %x\n",a);
1638 u_int offset=genjmp(a);
1639 output_w32(0xba000000|offset);
1640}
1641void emit_jge(int a)
1642{
1643 assem_debug("bge %x\n",a);
1644 u_int offset=genjmp(a);
1645 output_w32(0xaa000000|offset);
1646}
1647void emit_jno(int a)
1648{
1649 assem_debug("bvc %x\n",a);
1650 u_int offset=genjmp(a);
1651 output_w32(0x7a000000|offset);
1652}
1653void emit_jc(int a)
1654{
1655 assem_debug("bcs %x\n",a);
1656 u_int offset=genjmp(a);
1657 output_w32(0x2a000000|offset);
1658}
1659void emit_jcc(int a)
1660{
1661 assem_debug("bcc %x\n",a);
1662 u_int offset=genjmp(a);
1663 output_w32(0x3a000000|offset);
1664}
1665
1666void emit_pushimm(int imm)
1667{
1668 assem_debug("push $%x\n",imm);
1669 assert(0);
1670}
1671void emit_pusha()
1672{
1673 assem_debug("pusha\n");
1674 assert(0);
1675}
1676void emit_popa()
1677{
1678 assem_debug("popa\n");
1679 assert(0);
1680}
1681void emit_pushreg(u_int r)
1682{
1683 assem_debug("push %%%s\n",regname[r]);
1684 assert(0);
1685}
1686void emit_popreg(u_int r)
1687{
1688 assem_debug("pop %%%s\n",regname[r]);
1689 assert(0);
1690}
1691void emit_callreg(u_int r)
1692{
1693 assem_debug("call *%%%s\n",regname[r]);
1694 assert(0);
1695}
1696void emit_jmpreg(u_int r)
1697{
1698 assem_debug("mov pc,%s\n",regname[r]);
1699 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1700}
1701
1702void emit_readword_indexed(int offset, int rs, int rt)
1703{
1704 assert(offset>-4096&&offset<4096);
1705 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1706 if(offset>=0) {
1707 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1708 }else{
1709 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1710 }
1711}
1712void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1713{
1714 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1715 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1716}
1717void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1718{
1719 if(map<0) emit_readword_indexed(addr, rs, rt);
1720 else {
1721 assert(addr==0);
1722 emit_readword_dualindexedx4(rs, map, rt);
1723 }
1724}
1725void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1726{
1727 if(map<0) {
1728 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1729 emit_readword_indexed(addr+4, rs, rl);
1730 }else{
1731 assert(rh!=rs);
1732 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1733 emit_addimm(map,1,map);
1734 emit_readword_indexed_tlb(addr, rs, map, rl);
1735 }
1736}
1737void emit_movsbl_indexed(int offset, int rs, int rt)
1738{
1739 assert(offset>-256&&offset<256);
1740 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1741 if(offset>=0) {
1742 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1743 }else{
1744 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1745 }
1746}
1747void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1748{
1749 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1750 else {
1751 if(addr==0) {
1752 emit_shlimm(map,2,map);
1753 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1754 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1755 }else{
1756 assert(addr>-256&&addr<256);
1757 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1758 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1759 emit_movsbl_indexed(addr, rt, rt);
1760 }
1761 }
1762}
1763void emit_movswl_indexed(int offset, int rs, int rt)
1764{
1765 assert(offset>-256&&offset<256);
1766 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1767 if(offset>=0) {
1768 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1769 }else{
1770 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1771 }
1772}
1773void emit_movzbl_indexed(int offset, int rs, int rt)
1774{
1775 assert(offset>-4096&&offset<4096);
1776 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1777 if(offset>=0) {
1778 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1779 }else{
1780 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1781 }
1782}
1783void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1784{
1785 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1786 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1787}
1788void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1789{
1790 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1791 else {
1792 if(addr==0) {
1793 emit_movzbl_dualindexedx4(rs, map, rt);
1794 }else{
1795 emit_addimm(rs,addr,rt);
1796 emit_movzbl_dualindexedx4(rt, map, rt);
1797 }
1798 }
1799}
1800void emit_movzwl_indexed(int offset, int rs, int rt)
1801{
1802 assert(offset>-256&&offset<256);
1803 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1804 if(offset>=0) {
1805 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1806 }else{
1807 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1808 }
1809}
1810void emit_readword(int addr, int rt)
1811{
1812 u_int offset = addr-(u_int)&dynarec_local;
1813 assert(offset<4096);
1814 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1815 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1816}
1817void emit_movsbl(int addr, int rt)
1818{
1819 u_int offset = addr-(u_int)&dynarec_local;
1820 assert(offset<256);
1821 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1822 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1823}
1824void emit_movswl(int addr, int rt)
1825{
1826 u_int offset = addr-(u_int)&dynarec_local;
1827 assert(offset<256);
1828 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1829 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1830}
1831void emit_movzbl(int addr, int rt)
1832{
1833 u_int offset = addr-(u_int)&dynarec_local;
1834 assert(offset<4096);
1835 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1836 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1837}
1838void emit_movzwl(int addr, int rt)
1839{
1840 u_int offset = addr-(u_int)&dynarec_local;
1841 assert(offset<256);
1842 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1843 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1844}
1845void emit_movzwl_reg(int rs, int rt)
1846{
1847 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1848 assert(0);
1849}
1850
1851void emit_xchg(int rs, int rt)
1852{
1853 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1854 assert(0);
1855}
1856void emit_writeword_indexed(int rt, int offset, int rs)
1857{
1858 assert(offset>-4096&&offset<4096);
1859 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1860 if(offset>=0) {
1861 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1862 }else{
1863 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1864 }
1865}
1866void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1867{
1868 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1869 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1870}
1871void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1872{
1873 if(map<0) emit_writeword_indexed(rt, addr, rs);
1874 else {
1875 assert(addr==0);
1876 emit_writeword_dualindexedx4(rt, rs, map);
1877 }
1878}
1879void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1880{
1881 if(map<0) {
1882 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1883 emit_writeword_indexed(rl, addr+4, rs);
1884 }else{
1885 assert(rh>=0);
1886 if(temp!=rs) emit_addimm(map,1,temp);
1887 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1888 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1889 else {
1890 emit_addimm(rs,4,rs);
1891 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1892 }
1893 }
1894}
1895void emit_writehword_indexed(int rt, int offset, int rs)
1896{
1897 assert(offset>-256&&offset<256);
1898 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1899 if(offset>=0) {
1900 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1901 }else{
1902 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1903 }
1904}
1905void emit_writebyte_indexed(int rt, int offset, int rs)
1906{
1907 assert(offset>-4096&&offset<4096);
1908 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1909 if(offset>=0) {
1910 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1911 }else{
1912 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1913 }
1914}
1915void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1916{
1917 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1918 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1919}
1920void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1921{
1922 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1923 else {
1924 if(addr==0) {
1925 emit_writebyte_dualindexedx4(rt, rs, map);
1926 }else{
1927 emit_addimm(rs,addr,temp);
1928 emit_writebyte_dualindexedx4(rt, temp, map);
1929 }
1930 }
1931}
1932void emit_writeword(int rt, int addr)
1933{
1934 u_int offset = addr-(u_int)&dynarec_local;
1935 assert(offset<4096);
1936 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1937 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1938}
1939void emit_writehword(int rt, int addr)
1940{
1941 u_int offset = addr-(u_int)&dynarec_local;
1942 assert(offset<256);
1943 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1944 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1945}
1946void emit_writebyte(int rt, int addr)
1947{
1948 u_int offset = addr-(u_int)&dynarec_local;
1949 assert(offset<4096);
74426039 1950 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 1951 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1952}
1953void emit_writeword_imm(int imm, int addr)
1954{
1955 assem_debug("movl $%x,%x\n",imm,addr);
1956 assert(0);
1957}
1958void emit_writebyte_imm(int imm, int addr)
1959{
1960 assem_debug("movb $%x,%x\n",imm,addr);
1961 assert(0);
1962}
1963
1964void emit_mul(int rs)
1965{
1966 assem_debug("mul %%%s\n",regname[rs]);
1967 assert(0);
1968}
1969void emit_imul(int rs)
1970{
1971 assem_debug("imul %%%s\n",regname[rs]);
1972 assert(0);
1973}
1974void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1975{
1976 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1977 assert(rs1<16);
1978 assert(rs2<16);
1979 assert(hi<16);
1980 assert(lo<16);
1981 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1982}
1983void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1984{
1985 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1986 assert(rs1<16);
1987 assert(rs2<16);
1988 assert(hi<16);
1989 assert(lo<16);
1990 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1991}
1992
1993void emit_div(int rs)
1994{
1995 assem_debug("div %%%s\n",regname[rs]);
1996 assert(0);
1997}
1998void emit_idiv(int rs)
1999{
2000 assem_debug("idiv %%%s\n",regname[rs]);
2001 assert(0);
2002}
2003void emit_cdq()
2004{
2005 assem_debug("cdq\n");
2006 assert(0);
2007}
2008
2009void emit_clz(int rs,int rt)
2010{
2011 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2012 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2013}
2014
2015void emit_subcs(int rs1,int rs2,int rt)
2016{
2017 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2018 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2019}
2020
2021void emit_shrcc_imm(int rs,u_int imm,int rt)
2022{
2023 assert(imm>0);
2024 assert(imm<32);
2025 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2026 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2027}
2028
2029void emit_negmi(int rs, int rt)
2030{
2031 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2032 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2033}
2034
2035void emit_negsmi(int rs, int rt)
2036{
2037 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2038 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2039}
2040
2041void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2042{
2043 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2044 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2045}
2046
2047void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2048{
2049 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2050 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2051}
2052
2053void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2054{
2055 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2056 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2057}
2058
2059void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2060{
2061 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2062 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2063}
2064
2065void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2066{
2067 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2068 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2069}
2070
2071void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2072{
2073 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2074 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2075}
2076
2077void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2078{
2079 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2080 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2081}
2082
2083void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2084{
2085 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2086 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2087}
2088
2089void emit_teq(int rs, int rt)
2090{
2091 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2092 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2093}
2094
2095void emit_rsbimm(int rs, int imm, int rt)
2096{
2097 u_int armval;
2098 assert(genimm(imm,&armval));
2099 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2100 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2101}
2102
2103// Load 2 immediates optimizing for small code size
2104void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2105{
2106 emit_movimm(imm1,rt1);
2107 u_int armval;
2108 if(genimm(imm2-imm1,&armval)) {
2109 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2110 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2111 }else if(genimm(imm1-imm2,&armval)) {
2112 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2113 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2114 }
2115 else emit_movimm(imm2,rt2);
2116}
2117
2118// Conditionally select one of two immediates, optimizing for small code size
2119// This will only be called if HAVE_CMOV_IMM is defined
2120void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2121{
2122 u_int armval;
2123 if(genimm(imm2-imm1,&armval)) {
2124 emit_movimm(imm1,rt);
2125 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2126 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2127 }else if(genimm(imm1-imm2,&armval)) {
2128 emit_movimm(imm1,rt);
2129 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2130 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2131 }
2132 else {
2133 #ifdef ARMv5_ONLY
2134 emit_movimm(imm1,rt);
2135 add_literal((int)out,imm2);
2136 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2137 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2138 #else
2139 emit_movw(imm1&0x0000FFFF,rt);
2140 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2141 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2142 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2143 }
2144 emit_movt(imm1&0xFFFF0000,rt);
2145 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2146 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2147 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2148 }
2149 #endif
2150 }
2151}
2152
2153// special case for checking invalid_code
2154void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2155{
2156 assert(0);
2157}
2158
2159// special case for checking invalid_code
2160void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2161{
2162 assert(imm<128&&imm>=0);
2163 assert(r>=0&&r<16);
2164 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2165 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2166 emit_cmpimm(HOST_TEMPREG,imm);
2167}
2168
2169// special case for tlb mapping
2170void emit_addsr12(int rs1,int rs2,int rt)
2171{
2172 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2173 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2174}
2175
2176// Used to preload hash table entries
2177void emit_prefetch(void *addr)
2178{
2179 assem_debug("prefetch %x\n",(int)addr);
2180 output_byte(0x0F);
2181 output_byte(0x18);
2182 output_modrm(0,5,1);
2183 output_w32((int)addr);
2184}
2185void emit_prefetchreg(int r)
2186{
2187 assem_debug("pld %s\n",regname[r]);
2188 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2189}
2190
2191// Special case for mini_ht
2192void emit_ldreq_indexed(int rs, u_int offset, int rt)
2193{
2194 assert(offset<4096);
2195 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2196 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2197}
2198
2199void emit_flds(int r,int sr)
2200{
2201 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2202 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2203}
2204
2205void emit_vldr(int r,int vr)
2206{
2207 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2208 output_w32(0xed900b00|(vr<<12)|(r<<16));
2209}
2210
2211void emit_fsts(int sr,int r)
2212{
2213 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2214 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2215}
2216
2217void emit_vstr(int vr,int r)
2218{
2219 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2220 output_w32(0xed800b00|(vr<<12)|(r<<16));
2221}
2222
2223void emit_ftosizs(int s,int d)
2224{
2225 assem_debug("ftosizs s%d,s%d\n",d,s);
2226 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2227}
2228
2229void emit_ftosizd(int s,int d)
2230{
2231 assem_debug("ftosizd s%d,d%d\n",d,s);
2232 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2233}
2234
2235void emit_fsitos(int s,int d)
2236{
2237 assem_debug("fsitos s%d,s%d\n",d,s);
2238 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2239}
2240
2241void emit_fsitod(int s,int d)
2242{
2243 assem_debug("fsitod d%d,s%d\n",d,s);
2244 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2245}
2246
2247void emit_fcvtds(int s,int d)
2248{
2249 assem_debug("fcvtds d%d,s%d\n",d,s);
2250 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2251}
2252
2253void emit_fcvtsd(int s,int d)
2254{
2255 assem_debug("fcvtsd s%d,d%d\n",d,s);
2256 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2257}
2258
2259void emit_fsqrts(int s,int d)
2260{
2261 assem_debug("fsqrts d%d,s%d\n",d,s);
2262 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2263}
2264
2265void emit_fsqrtd(int s,int d)
2266{
2267 assem_debug("fsqrtd s%d,d%d\n",d,s);
2268 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2269}
2270
2271void emit_fabss(int s,int d)
2272{
2273 assem_debug("fabss d%d,s%d\n",d,s);
2274 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2275}
2276
2277void emit_fabsd(int s,int d)
2278{
2279 assem_debug("fabsd s%d,d%d\n",d,s);
2280 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2281}
2282
2283void emit_fnegs(int s,int d)
2284{
2285 assem_debug("fnegs d%d,s%d\n",d,s);
2286 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2287}
2288
2289void emit_fnegd(int s,int d)
2290{
2291 assem_debug("fnegd s%d,d%d\n",d,s);
2292 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2293}
2294
2295void emit_fadds(int s1,int s2,int d)
2296{
2297 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2298 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2299}
2300
2301void emit_faddd(int s1,int s2,int d)
2302{
2303 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2304 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2305}
2306
2307void emit_fsubs(int s1,int s2,int d)
2308{
2309 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2310 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2311}
2312
2313void emit_fsubd(int s1,int s2,int d)
2314{
2315 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2316 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2317}
2318
2319void emit_fmuls(int s1,int s2,int d)
2320{
2321 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2322 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2323}
2324
2325void emit_fmuld(int s1,int s2,int d)
2326{
2327 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2328 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2329}
2330
2331void emit_fdivs(int s1,int s2,int d)
2332{
2333 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2334 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2335}
2336
2337void emit_fdivd(int s1,int s2,int d)
2338{
2339 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2340 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2341}
2342
2343void emit_fcmps(int x,int y)
2344{
2345 assem_debug("fcmps s14, s15\n");
2346 output_w32(0xeeb47a67);
2347}
2348
2349void emit_fcmpd(int x,int y)
2350{
2351 assem_debug("fcmpd d6, d7\n");
2352 output_w32(0xeeb46b47);
2353}
2354
2355void emit_fmstat()
2356{
2357 assem_debug("fmstat\n");
2358 output_w32(0xeef1fa10);
2359}
2360
2361void emit_bicne_imm(int rs,int imm,int rt)
2362{
2363 u_int armval;
2364 assert(genimm(imm,&armval));
2365 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2366 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2367}
2368
2369void emit_biccs_imm(int rs,int imm,int rt)
2370{
2371 u_int armval;
2372 assert(genimm(imm,&armval));
2373 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2374 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2375}
2376
2377void emit_bicvc_imm(int rs,int imm,int rt)
2378{
2379 u_int armval;
2380 assert(genimm(imm,&armval));
2381 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2382 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2383}
2384
2385void emit_bichi_imm(int rs,int imm,int rt)
2386{
2387 u_int armval;
2388 assert(genimm(imm,&armval));
2389 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2390 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2391}
2392
2393void emit_orrvs_imm(int rs,int imm,int rt)
2394{
2395 u_int armval;
2396 assert(genimm(imm,&armval));
2397 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2398 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2399}
2400
b9b61529 2401void emit_orrne_imm(int rs,int imm,int rt)
2402{
2403 u_int armval;
2404 assert(genimm(imm,&armval));
2405 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2406 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2407}
2408
2409void emit_andne_imm(int rs,int imm,int rt)
2410{
2411 u_int armval;
2412 assert(genimm(imm,&armval));
2413 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2414 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2415}
2416
57871462 2417void emit_jno_unlikely(int a)
2418{
2419 //emit_jno(a);
2420 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2421 output_w32(0x72800000|rd_rn_rm(15,15,0));
2422}
2423
2424// Save registers before function call
2425void save_regs(u_int reglist)
2426{
2427 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2428 if(!reglist) return;
2429 assem_debug("stmia fp,{");
2430 if(reglist&1) assem_debug("r0, ");
2431 if(reglist&2) assem_debug("r1, ");
2432 if(reglist&4) assem_debug("r2, ");
2433 if(reglist&8) assem_debug("r3, ");
2434 if(reglist&0x1000) assem_debug("r12");
2435 assem_debug("}\n");
2436 output_w32(0xe88b0000|reglist);
2437}
2438// Restore registers after function call
2439void restore_regs(u_int reglist)
2440{
2441 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2442 if(!reglist) return;
2443 assem_debug("ldmia fp,{");
2444 if(reglist&1) assem_debug("r0, ");
2445 if(reglist&2) assem_debug("r1, ");
2446 if(reglist&4) assem_debug("r2, ");
2447 if(reglist&8) assem_debug("r3, ");
2448 if(reglist&0x1000) assem_debug("r12");
2449 assem_debug("}\n");
2450 output_w32(0xe89b0000|reglist);
2451}
2452
2453// Write back consts using r14 so we don't disturb the other registers
2454void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2455{
2456 int hr;
2457 for(hr=0;hr<HOST_REGS;hr++) {
2458 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2459 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2460 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2461 int value=constmap[i][hr];
2462 if(value==0) {
2463 emit_zeroreg(HOST_TEMPREG);
2464 }
2465 else {
2466 emit_movimm(value,HOST_TEMPREG);
2467 }
2468 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2469#ifndef FORCE32
57871462 2470 if((i_is32>>i_regmap[hr])&1) {
2471 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2472 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2473 }
24385cae 2474#endif
57871462 2475 }
2476 }
2477 }
2478 }
2479}
2480
2481/* Stubs/epilogue */
2482
2483void literal_pool(int n)
2484{
2485 if(!literalcount) return;
2486 if(n) {
2487 if((int)out-literals[0][0]<4096-n) return;
2488 }
2489 u_int *ptr;
2490 int i;
2491 for(i=0;i<literalcount;i++)
2492 {
2493 ptr=(u_int *)literals[i][0];
2494 u_int offset=(u_int)out-(u_int)ptr-8;
2495 assert(offset<4096);
2496 assert(!(offset&3));
2497 *ptr|=offset;
2498 output_w32(literals[i][1]);
2499 }
2500 literalcount=0;
2501}
2502
2503void literal_pool_jumpover(int n)
2504{
2505 if(!literalcount) return;
2506 if(n) {
2507 if((int)out-literals[0][0]<4096-n) return;
2508 }
2509 int jaddr=(int)out;
2510 emit_jmp(0);
2511 literal_pool(0);
2512 set_jump_target(jaddr,(int)out);
2513}
2514
2515emit_extjump2(int addr, int target, int linker)
2516{
2517 u_char *ptr=(u_char *)addr;
2518 assert((ptr[3]&0x0e)==0xa);
2519 emit_loadlp(target,0);
2520 emit_loadlp(addr,1);
24385cae 2521 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2522 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2523//DEBUG >
2524#ifdef DEBUG_CYCLE_COUNT
2525 emit_readword((int)&last_count,ECX);
2526 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2527 emit_readword((int)&next_interupt,ECX);
2528 emit_writeword(HOST_CCREG,(int)&Count);
2529 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2530 emit_writeword(ECX,(int)&last_count);
2531#endif
2532//DEBUG <
2533 emit_jmp(linker);
2534}
2535
2536emit_extjump(int addr, int target)
2537{
2538 emit_extjump2(addr, target, (int)dyna_linker);
2539}
2540emit_extjump_ds(int addr, int target)
2541{
2542 emit_extjump2(addr, target, (int)dyna_linker_ds);
2543}
2544
2545do_readstub(int n)
2546{
2547 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2548 literal_pool(256);
2549 set_jump_target(stubs[n][1],(int)out);
2550 int type=stubs[n][0];
2551 int i=stubs[n][3];
2552 int rs=stubs[n][4];
2553 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2554 u_int reglist=stubs[n][7];
2555 signed char *i_regmap=i_regs->regmap;
2556 int addr=get_reg(i_regmap,AGEN1+(i&1));
2557 int rth,rt;
2558 int ds;
b9b61529 2559 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2560 rth=get_reg(i_regmap,FTEMP|64);
2561 rt=get_reg(i_regmap,FTEMP);
2562 }else{
2563 rth=get_reg(i_regmap,rt1[i]|64);
2564 rt=get_reg(i_regmap,rt1[i]);
2565 }
2566 assert(rs>=0);
57871462 2567 if(addr<0) addr=rt;
f18c0f46 2568 if(addr<0)
2569 // assume dummy read, no alloced reg
2570 addr=get_reg(i_regmap,-1);
57871462 2571 assert(addr>=0);
2572 int ftable=0;
2573 if(type==LOADB_STUB||type==LOADBU_STUB)
2574 ftable=(int)readmemb;
2575 if(type==LOADH_STUB||type==LOADHU_STUB)
2576 ftable=(int)readmemh;
2577 if(type==LOADW_STUB)
2578 ftable=(int)readmem;
24385cae 2579#ifndef FORCE32
57871462 2580 if(type==LOADD_STUB)
2581 ftable=(int)readmemd;
24385cae 2582#endif
2583 assert(ftable!=0);
57871462 2584 emit_writeword(rs,(int)&address);
2585 //emit_pusha();
2586 save_regs(reglist);
2587 ds=i_regs!=&regs[i];
2588 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2589 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2590 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2591 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2592 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2593 emit_shrimm(rs,16,1);
2594 int cc=get_reg(i_regmap,CCREG);
2595 if(cc<0) {
2596 emit_loadreg(CCREG,2);
2597 }
2598 emit_movimm(ftable,0);
2599 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2600 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2601 //emit_readword((int)&last_count,temp);
2602 //emit_add(cc,temp,cc);
2603 //emit_writeword(cc,(int)&Count);
2604 //emit_mov(15,14);
2605 emit_call((int)&indirect_jump_indexed);
2606 //emit_callreg(rs);
2607 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2608 // We really shouldn't need to update the count here,
2609 // but not doing so causes random crashes...
2610 emit_readword((int)&Count,HOST_TEMPREG);
2611 emit_readword((int)&next_interupt,2);
2612 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2613 emit_writeword(2,(int)&last_count);
2614 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2615 if(cc<0) {
2616 emit_storereg(CCREG,HOST_TEMPREG);
2617 }
2618 //emit_popa();
2619 restore_regs(reglist);
2620 //if((cc=get_reg(regmap,CCREG))>=0) {
2621 // emit_loadreg(CCREG,cc);
2622 //}
f18c0f46 2623 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2624 assert(rt>=0);
2625 if(type==LOADB_STUB)
2626 emit_movsbl((int)&readmem_dword,rt);
2627 if(type==LOADBU_STUB)
2628 emit_movzbl((int)&readmem_dword,rt);
2629 if(type==LOADH_STUB)
2630 emit_movswl((int)&readmem_dword,rt);
2631 if(type==LOADHU_STUB)
2632 emit_movzwl((int)&readmem_dword,rt);
2633 if(type==LOADW_STUB)
2634 emit_readword((int)&readmem_dword,rt);
2635 if(type==LOADD_STUB) {
2636 emit_readword((int)&readmem_dword,rt);
2637 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2638 }
57871462 2639 }
2640 emit_jmp(stubs[n][2]); // return address
2641}
2642
2643inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2644{
2645 int rs=get_reg(regmap,target);
2646 int rth=get_reg(regmap,target|64);
2647 int rt=get_reg(regmap,target);
2648 assert(rs>=0);
2649 assert(rt>=0);
2650 int ftable=0;
2651 if(type==LOADB_STUB||type==LOADBU_STUB)
2652 ftable=(int)readmemb;
2653 if(type==LOADH_STUB||type==LOADHU_STUB)
2654 ftable=(int)readmemh;
2655 if(type==LOADW_STUB)
2656 ftable=(int)readmem;
24385cae 2657#ifndef FORCE32
57871462 2658 if(type==LOADD_STUB)
2659 ftable=(int)readmemd;
24385cae 2660#endif
2661 assert(ftable!=0);
57871462 2662 emit_writeword(rs,(int)&address);
2663 //emit_pusha();
2664 save_regs(reglist);
2665 //emit_shrimm(rs,16,1);
2666 int cc=get_reg(regmap,CCREG);
2667 if(cc<0) {
2668 emit_loadreg(CCREG,2);
2669 }
2670 //emit_movimm(ftable,0);
2671 emit_movimm(((u_int *)ftable)[addr>>16],0);
2672 //emit_readword((int)&last_count,12);
2673 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2674 if((signed int)addr>=(signed int)0xC0000000) {
2675 // Pagefault address
2676 int ds=regmap!=regs[i].regmap;
2677 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2678 }
2679 //emit_add(12,2,2);
2680 //emit_writeword(2,(int)&Count);
2681 //emit_call(((u_int *)ftable)[addr>>16]);
2682 emit_call((int)&indirect_jump);
2683 // We really shouldn't need to update the count here,
2684 // but not doing so causes random crashes...
2685 emit_readword((int)&Count,HOST_TEMPREG);
2686 emit_readword((int)&next_interupt,2);
2687 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2688 emit_writeword(2,(int)&last_count);
2689 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2690 if(cc<0) {
2691 emit_storereg(CCREG,HOST_TEMPREG);
2692 }
2693 //emit_popa();
2694 restore_regs(reglist);
2695 if(type==LOADB_STUB)
2696 emit_movsbl((int)&readmem_dword,rt);
2697 if(type==LOADBU_STUB)
2698 emit_movzbl((int)&readmem_dword,rt);
2699 if(type==LOADH_STUB)
2700 emit_movswl((int)&readmem_dword,rt);
2701 if(type==LOADHU_STUB)
2702 emit_movzwl((int)&readmem_dword,rt);
2703 if(type==LOADW_STUB)
2704 emit_readword((int)&readmem_dword,rt);
2705 if(type==LOADD_STUB) {
2706 emit_readword((int)&readmem_dword,rt);
2707 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2708 }
2709}
2710
2711do_writestub(int n)
2712{
2713 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2714 literal_pool(256);
2715 set_jump_target(stubs[n][1],(int)out);
2716 int type=stubs[n][0];
2717 int i=stubs[n][3];
2718 int rs=stubs[n][4];
2719 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2720 u_int reglist=stubs[n][7];
2721 signed char *i_regmap=i_regs->regmap;
2722 int addr=get_reg(i_regmap,AGEN1+(i&1));
2723 int rth,rt,r;
2724 int ds;
b9b61529 2725 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2726 rth=get_reg(i_regmap,FTEMP|64);
2727 rt=get_reg(i_regmap,r=FTEMP);
2728 }else{
2729 rth=get_reg(i_regmap,rs2[i]|64);
2730 rt=get_reg(i_regmap,r=rs2[i]);
2731 }
2732 assert(rs>=0);
2733 assert(rt>=0);
2734 if(addr<0) addr=get_reg(i_regmap,-1);
2735 assert(addr>=0);
2736 int ftable=0;
2737 if(type==STOREB_STUB)
2738 ftable=(int)writememb;
2739 if(type==STOREH_STUB)
2740 ftable=(int)writememh;
2741 if(type==STOREW_STUB)
2742 ftable=(int)writemem;
24385cae 2743#ifndef FORCE32
57871462 2744 if(type==STORED_STUB)
2745 ftable=(int)writememd;
24385cae 2746#endif
2747 assert(ftable!=0);
57871462 2748 emit_writeword(rs,(int)&address);
2749 //emit_shrimm(rs,16,rs);
2750 //emit_movmem_indexedx4(ftable,rs,rs);
2751 if(type==STOREB_STUB)
2752 emit_writebyte(rt,(int)&byte);
2753 if(type==STOREH_STUB)
2754 emit_writehword(rt,(int)&hword);
2755 if(type==STOREW_STUB)
2756 emit_writeword(rt,(int)&word);
2757 if(type==STORED_STUB) {
3d624f89 2758#ifndef FORCE32
57871462 2759 emit_writeword(rt,(int)&dword);
2760 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2761#else
2762 printf("STORED_STUB\n");
2763#endif
57871462 2764 }
2765 //emit_pusha();
2766 save_regs(reglist);
2767 ds=i_regs!=&regs[i];
2768 int real_rs=get_reg(i_regmap,rs1[i]);
2769 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2770 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2771 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2772 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2773 emit_shrimm(rs,16,1);
2774 int cc=get_reg(i_regmap,CCREG);
2775 if(cc<0) {
2776 emit_loadreg(CCREG,2);
2777 }
2778 emit_movimm(ftable,0);
2779 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2780 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2781 //emit_readword((int)&last_count,temp);
2782 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2783 //emit_add(cc,temp,cc);
2784 //emit_writeword(cc,(int)&Count);
2785 emit_call((int)&indirect_jump_indexed);
2786 //emit_callreg(rs);
2787 emit_readword((int)&Count,HOST_TEMPREG);
2788 emit_readword((int)&next_interupt,2);
2789 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2790 emit_writeword(2,(int)&last_count);
2791 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2792 if(cc<0) {
2793 emit_storereg(CCREG,HOST_TEMPREG);
2794 }
2795 //emit_popa();
2796 restore_regs(reglist);
2797 //if((cc=get_reg(regmap,CCREG))>=0) {
2798 // emit_loadreg(CCREG,cc);
2799 //}
2800 emit_jmp(stubs[n][2]); // return address
2801}
2802
2803inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2804{
2805 int rs=get_reg(regmap,-1);
2806 int rth=get_reg(regmap,target|64);
2807 int rt=get_reg(regmap,target);
2808 assert(rs>=0);
2809 assert(rt>=0);
2810 int ftable=0;
2811 if(type==STOREB_STUB)
2812 ftable=(int)writememb;
2813 if(type==STOREH_STUB)
2814 ftable=(int)writememh;
2815 if(type==STOREW_STUB)
2816 ftable=(int)writemem;
24385cae 2817#ifndef FORCE32
57871462 2818 if(type==STORED_STUB)
2819 ftable=(int)writememd;
24385cae 2820#endif
2821 assert(ftable!=0);
57871462 2822 emit_writeword(rs,(int)&address);
2823 //emit_shrimm(rs,16,rs);
2824 //emit_movmem_indexedx4(ftable,rs,rs);
2825 if(type==STOREB_STUB)
2826 emit_writebyte(rt,(int)&byte);
2827 if(type==STOREH_STUB)
2828 emit_writehword(rt,(int)&hword);
2829 if(type==STOREW_STUB)
2830 emit_writeword(rt,(int)&word);
2831 if(type==STORED_STUB) {
3d624f89 2832#ifndef FORCE32
57871462 2833 emit_writeword(rt,(int)&dword);
2834 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2835#else
2836 printf("STORED_STUB\n");
2837#endif
57871462 2838 }
2839 //emit_pusha();
2840 save_regs(reglist);
2841 //emit_shrimm(rs,16,1);
2842 int cc=get_reg(regmap,CCREG);
2843 if(cc<0) {
2844 emit_loadreg(CCREG,2);
2845 }
2846 //emit_movimm(ftable,0);
2847 emit_movimm(((u_int *)ftable)[addr>>16],0);
2848 //emit_readword((int)&last_count,12);
2849 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2850 if((signed int)addr>=(signed int)0xC0000000) {
2851 // Pagefault address
2852 int ds=regmap!=regs[i].regmap;
2853 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2854 }
2855 //emit_add(12,2,2);
2856 //emit_writeword(2,(int)&Count);
2857 //emit_call(((u_int *)ftable)[addr>>16]);
2858 emit_call((int)&indirect_jump);
2859 emit_readword((int)&Count,HOST_TEMPREG);
2860 emit_readword((int)&next_interupt,2);
2861 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2862 emit_writeword(2,(int)&last_count);
2863 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2864 if(cc<0) {
2865 emit_storereg(CCREG,HOST_TEMPREG);
2866 }
2867 //emit_popa();
2868 restore_regs(reglist);
2869}
2870
2871do_unalignedwritestub(int n)
2872{
b7918751 2873 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2874 literal_pool(256);
57871462 2875 set_jump_target(stubs[n][1],(int)out);
b7918751 2876
2877 int i=stubs[n][3];
2878 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2879 int addr=stubs[n][5];
2880 u_int reglist=stubs[n][7];
2881 signed char *i_regmap=i_regs->regmap;
2882 int temp2=get_reg(i_regmap,FTEMP);
2883 int rt;
2884 int ds, real_rs;
2885 rt=get_reg(i_regmap,rs2[i]);
2886 assert(rt>=0);
2887 assert(addr>=0);
2888 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2889 reglist|=(1<<addr);
2890 reglist&=~(1<<temp2);
2891
2892 emit_andimm(addr,0xfffffffc,temp2);
2893 emit_writeword(temp2,(int)&address);
2894
2895 save_regs(reglist);
2896 ds=i_regs!=&regs[i];
2897 real_rs=get_reg(i_regmap,rs1[i]);
2898 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2899 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2900 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2901 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2902 emit_shrimm(addr,16,1);
2903 int cc=get_reg(i_regmap,CCREG);
2904 if(cc<0) {
2905 emit_loadreg(CCREG,2);
2906 }
2907 emit_movimm((u_int)readmem,0);
2908 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2909 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3); // XXX: can be rm'd?
2910 emit_call((int)&indirect_jump_indexed);
2911 restore_regs(reglist);
2912
2913 emit_readword((int)&readmem_dword,temp2);
2914 int temp=addr; //hmh
2915 emit_shlimm(addr,3,temp);
2916 emit_andimm(temp,24,temp);
2917#ifdef BIG_ENDIAN_MIPS
2918 if (opcode[i]==0x2e) // SWR
2919#else
2920 if (opcode[i]==0x2a) // SWL
2921#endif
2922 emit_xorimm(temp,24,temp);
2923 emit_movimm(-1,HOST_TEMPREG);
55439448 2924 if (opcode[i]==0x2a) { // SWL
b7918751 2925 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2926 emit_orrshr(rt,temp,temp2);
2927 }else{
2928 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2929 emit_orrshl(rt,temp,temp2);
2930 }
2931 emit_readword((int)&address,addr);
2932 emit_writeword(temp2,(int)&word);
2933 //save_regs(reglist); // don't need to, no state changes
2934 emit_shrimm(addr,16,1);
2935 emit_movimm((u_int)writemem,0);
2936 //emit_call((int)&indirect_jump_indexed);
2937 emit_mov(15,14);
2938 emit_readword_dualindexedx4(0,1,15);
2939 emit_readword((int)&Count,HOST_TEMPREG);
2940 emit_readword((int)&next_interupt,2);
2941 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2942 emit_writeword(2,(int)&last_count);
2943 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2944 if(cc<0) {
2945 emit_storereg(CCREG,HOST_TEMPREG);
2946 }
2947 restore_regs(reglist);
57871462 2948 emit_jmp(stubs[n][2]); // return address
2949}
2950
2951void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2952{
2953 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2954}
2955
2956do_invstub(int n)
2957{
2958 literal_pool(20);
2959 u_int reglist=stubs[n][3];
2960 set_jump_target(stubs[n][1],(int)out);
2961 save_regs(reglist);
2962 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2963 emit_call((int)&invalidate_addr);
2964 restore_regs(reglist);
2965 emit_jmp(stubs[n][2]); // return address
2966}
2967
2968int do_dirty_stub(int i)
2969{
2970 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 2971 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
2972 #ifdef PCSX
2973 addr=(u_int)source;
2974 #endif
57871462 2975 // Careful about the code output here, verify_dirty needs to parse it.
2976 #ifdef ARMv5_ONLY
ac545b3a 2977 emit_loadlp(addr,1);
57871462 2978 emit_loadlp((int)copy,2);
2979 emit_loadlp(slen*4,3);
2980 #else
ac545b3a 2981 emit_movw(addr&0x0000FFFF,1);
57871462 2982 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 2983 emit_movt(addr&0xFFFF0000,1);
57871462 2984 emit_movt(((u_int)copy)&0xFFFF0000,2);
2985 emit_movw(slen*4,3);
2986 #endif
2987 emit_movimm(start+i*4,0);
2988 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2989 int entry=(int)out;
2990 load_regs_entry(i);
2991 if(entry==(int)out) entry=instr_addr[i];
2992 emit_jmp(instr_addr[i]);
2993 return entry;
2994}
2995
2996void do_dirty_stub_ds()
2997{
2998 // Careful about the code output here, verify_dirty needs to parse it.
2999 #ifdef ARMv5_ONLY
3000 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3001 emit_loadlp((int)copy,2);
3002 emit_loadlp(slen*4,3);
3003 #else
3004 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3005 emit_movw(((u_int)copy)&0x0000FFFF,2);
3006 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3007 emit_movt(((u_int)copy)&0xFFFF0000,2);
3008 emit_movw(slen*4,3);
3009 #endif
3010 emit_movimm(start+1,0);
3011 emit_call((int)&verify_code_ds);
3012}
3013
3014do_cop1stub(int n)
3015{
3016 literal_pool(256);
3017 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3018 set_jump_target(stubs[n][1],(int)out);
3019 int i=stubs[n][3];
3d624f89 3020// int rs=stubs[n][4];
57871462 3021 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3022 int ds=stubs[n][6];
3023 if(!ds) {
3024 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3025 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3026 }
3027 //else {printf("fp exception in delay slot\n");}
3028 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3029 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3030 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3031 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3032 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3033}
3034
3035/* TLB */
3036
3037int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3038{
3039 if(c) {
3040 if((signed int)addr>=(signed int)0xC0000000) {
3041 // address_generation already loaded the const
3042 emit_readword_dualindexedx4(FP,map,map);
3043 }
3044 else
3045 return -1; // No mapping
3046 }
3047 else {
3048 assert(s!=map);
3049 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3050 emit_addsr12(map,s,map);
3051 // Schedule this while we wait on the load
3052 //if(x) emit_xorimm(s,x,ar);
3053 if(shift>=0) emit_shlimm(s,3,shift);
3054 if(~a) emit_andimm(s,a,ar);
3055 emit_readword_dualindexedx4(FP,map,map);
3056 }
3057 return map;
3058}
3059int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3060{
3061 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3062 emit_test(map,map);
3063 *jaddr=(int)out;
3064 emit_js(0);
3065 }
3066 return map;
3067}
3068
3069int gen_tlb_addr_r(int ar, int map) {
3070 if(map>=0) {
3071 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3072 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3073 }
3074}
3075
3076int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3077{
3078 if(c) {
3079 if(addr<0x80800000||addr>=0xC0000000) {
3080 // address_generation already loaded the const
3081 emit_readword_dualindexedx4(FP,map,map);
3082 }
3083 else
3084 return -1; // No mapping
3085 }
3086 else {
3087 assert(s!=map);
3088 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3089 emit_addsr12(map,s,map);
3090 // Schedule this while we wait on the load
3091 //if(x) emit_xorimm(s,x,ar);
3092 emit_readword_dualindexedx4(FP,map,map);
3093 }
3094 return map;
3095}
3096int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3097{
3098 if(!c||addr<0x80800000||addr>=0xC0000000) {
3099 emit_testimm(map,0x40000000);
3100 *jaddr=(int)out;
3101 emit_jne(0);
3102 }
3103}
3104
3105int gen_tlb_addr_w(int ar, int map) {
3106 if(map>=0) {
3107 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3108 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3109 }
3110}
3111
3112// Generate the address of the memory_map entry, relative to dynarec_local
3113generate_map_const(u_int addr,int reg) {
3114 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3115 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3116}
3117
3118/* Special assem */
3119
3120void shift_assemble_arm(int i,struct regstat *i_regs)
3121{
3122 if(rt1[i]) {
3123 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3124 {
3125 signed char s,t,shift;
3126 t=get_reg(i_regs->regmap,rt1[i]);
3127 s=get_reg(i_regs->regmap,rs1[i]);
3128 shift=get_reg(i_regs->regmap,rs2[i]);
3129 if(t>=0){
3130 if(rs1[i]==0)
3131 {
3132 emit_zeroreg(t);
3133 }
3134 else if(rs2[i]==0)
3135 {
3136 assert(s>=0);
3137 if(s!=t) emit_mov(s,t);
3138 }
3139 else
3140 {
3141 emit_andimm(shift,31,HOST_TEMPREG);
3142 if(opcode2[i]==4) // SLLV
3143 {
3144 emit_shl(s,HOST_TEMPREG,t);
3145 }
3146 if(opcode2[i]==6) // SRLV
3147 {
3148 emit_shr(s,HOST_TEMPREG,t);
3149 }
3150 if(opcode2[i]==7) // SRAV
3151 {
3152 emit_sar(s,HOST_TEMPREG,t);
3153 }
3154 }
3155 }
3156 } else { // DSLLV/DSRLV/DSRAV
3157 signed char sh,sl,th,tl,shift;
3158 th=get_reg(i_regs->regmap,rt1[i]|64);
3159 tl=get_reg(i_regs->regmap,rt1[i]);
3160 sh=get_reg(i_regs->regmap,rs1[i]|64);
3161 sl=get_reg(i_regs->regmap,rs1[i]);
3162 shift=get_reg(i_regs->regmap,rs2[i]);
3163 if(tl>=0){
3164 if(rs1[i]==0)
3165 {
3166 emit_zeroreg(tl);
3167 if(th>=0) emit_zeroreg(th);
3168 }
3169 else if(rs2[i]==0)
3170 {
3171 assert(sl>=0);
3172 if(sl!=tl) emit_mov(sl,tl);
3173 if(th>=0&&sh!=th) emit_mov(sh,th);
3174 }
3175 else
3176 {
3177 // FIXME: What if shift==tl ?
3178 assert(shift!=tl);
3179 int temp=get_reg(i_regs->regmap,-1);
3180 int real_th=th;
3181 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3182 assert(sl>=0);
3183 assert(sh>=0);
3184 emit_andimm(shift,31,HOST_TEMPREG);
3185 if(opcode2[i]==0x14) // DSLLV
3186 {
3187 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3188 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3189 emit_orrshr(sl,HOST_TEMPREG,th);
3190 emit_andimm(shift,31,HOST_TEMPREG);
3191 emit_testimm(shift,32);
3192 emit_shl(sl,HOST_TEMPREG,tl);
3193 if(th>=0) emit_cmovne_reg(tl,th);
3194 emit_cmovne_imm(0,tl);
3195 }
3196 if(opcode2[i]==0x16) // DSRLV
3197 {
3198 assert(th>=0);
3199 emit_shr(sl,HOST_TEMPREG,tl);
3200 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3201 emit_orrshl(sh,HOST_TEMPREG,tl);
3202 emit_andimm(shift,31,HOST_TEMPREG);
3203 emit_testimm(shift,32);
3204 emit_shr(sh,HOST_TEMPREG,th);
3205 emit_cmovne_reg(th,tl);
3206 if(real_th>=0) emit_cmovne_imm(0,th);
3207 }
3208 if(opcode2[i]==0x17) // DSRAV
3209 {
3210 assert(th>=0);
3211 emit_shr(sl,HOST_TEMPREG,tl);
3212 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3213 if(real_th>=0) {
3214 assert(temp>=0);
3215 emit_sarimm(th,31,temp);
3216 }
3217 emit_orrshl(sh,HOST_TEMPREG,tl);
3218 emit_andimm(shift,31,HOST_TEMPREG);
3219 emit_testimm(shift,32);
3220 emit_sar(sh,HOST_TEMPREG,th);
3221 emit_cmovne_reg(th,tl);
3222 if(real_th>=0) emit_cmovne_reg(temp,th);
3223 }
3224 }
3225 }
3226 }
3227 }
3228}
3229#define shift_assemble shift_assemble_arm
3230
3231void loadlr_assemble_arm(int i,struct regstat *i_regs)
3232{
3233 int s,th,tl,temp,temp2,addr,map=-1;
3234 int offset;
3235 int jaddr=0;
3236 int memtarget,c=0;
3237 u_int hr,reglist=0;
3238 th=get_reg(i_regs->regmap,rt1[i]|64);
3239 tl=get_reg(i_regs->regmap,rt1[i]);
3240 s=get_reg(i_regs->regmap,rs1[i]);
3241 temp=get_reg(i_regs->regmap,-1);
3242 temp2=get_reg(i_regs->regmap,FTEMP);
3243 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3244 assert(addr<0);
3245 offset=imm[i];
3246 for(hr=0;hr<HOST_REGS;hr++) {
3247 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3248 }
3249 reglist|=1<<temp;
3250 if(offset||s<0||c) addr=temp2;
3251 else addr=s;
3252 if(s>=0) {
3253 c=(i_regs->wasconst>>s)&1;
4cb76aa4 3254 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
57871462 3255 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3256 }
3257 if(tl>=0) {
3258 //assert(tl>=0);
3259 //assert(rt1[i]);
3260 if(!using_tlb) {
3261 if(!c) {
3262 emit_shlimm(addr,3,temp);
3263 if (opcode[i]==0x22||opcode[i]==0x26) {
3264 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3265 }else{
3266 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3267 }
4cb76aa4 3268 emit_cmpimm(addr,RAM_SIZE);
57871462 3269 jaddr=(int)out;
3270 emit_jno(0);
3271 }
3272 else {
3273 if (opcode[i]==0x22||opcode[i]==0x26) {
3274 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3275 }else{
3276 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3277 }
3278 }
3279 }else{ // using tlb
3280 int a;
3281 if(c) {
3282 a=-1;
3283 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3284 a=0xFFFFFFFC; // LWL/LWR
3285 }else{
3286 a=0xFFFFFFF8; // LDL/LDR
3287 }
3288 map=get_reg(i_regs->regmap,TLREG);
3289 assert(map>=0);
3290 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3291 if(c) {
3292 if (opcode[i]==0x22||opcode[i]==0x26) {
3293 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3294 }else{
3295 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3296 }
3297 }
3298 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3299 }
3300 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3301 if(!c||memtarget) {
3302 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3303 emit_readword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2);
3304 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3305 }
3306 else
3307 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3308 emit_andimm(temp,24,temp);
2002a1db 3309#ifdef BIG_ENDIAN_MIPS
3310 if (opcode[i]==0x26) // LWR
3311#else
3312 if (opcode[i]==0x22) // LWL
3313#endif
3314 emit_xorimm(temp,24,temp);
57871462 3315 emit_movimm(-1,HOST_TEMPREG);
3316 if (opcode[i]==0x26) {
3317 emit_shr(temp2,temp,temp2);
3318 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3319 }else{
3320 emit_shl(temp2,temp,temp2);
3321 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3322 }
3323 emit_or(temp2,tl,tl);
3324 //emit_storereg(rt1[i],tl); // DEBUG
3325 }
3326 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2002a1db 3327 // FIXME: little endian
57871462 3328 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3329 if(!c||memtarget) {
3330 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3331 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3332 emit_readdword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2h,temp2);
3333 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3334 }
3335 else
3336 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3337 emit_testimm(temp,32);
3338 emit_andimm(temp,24,temp);
3339 if (opcode[i]==0x1A) { // LDL
3340 emit_rsbimm(temp,32,HOST_TEMPREG);
3341 emit_shl(temp2h,temp,temp2h);
3342 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3343 emit_movimm(-1,HOST_TEMPREG);
3344 emit_shl(temp2,temp,temp2);
3345 emit_cmove_reg(temp2h,th);
3346 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3347 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3348 emit_orreq(temp2,tl,tl);
3349 emit_orrne(temp2,th,th);
3350 }
3351 if (opcode[i]==0x1B) { // LDR
3352 emit_xorimm(temp,24,temp);
3353 emit_rsbimm(temp,32,HOST_TEMPREG);
3354 emit_shr(temp2,temp,temp2);
3355 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3356 emit_movimm(-1,HOST_TEMPREG);
3357 emit_shr(temp2h,temp,temp2h);
3358 emit_cmovne_reg(temp2,tl);
3359 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3360 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3361 emit_orrne(temp2h,th,th);
3362 emit_orreq(temp2h,tl,tl);
3363 }
3364 }
3365 }
3366}
3367#define loadlr_assemble loadlr_assemble_arm
3368
3369void cop0_assemble(int i,struct regstat *i_regs)
3370{
3371 if(opcode2[i]==0) // MFC0
3372 {
3373 signed char t=get_reg(i_regs->regmap,rt1[i]);
3374 char copr=(source[i]>>11)&0x1f;
3375 //assert(t>=0); // Why does this happen? OOT is weird
3376 if(t>=0) {
7139f3c8 3377#ifdef MUPEN64
57871462 3378 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3379 emit_movimm((source[i]>>11)&0x1f,1);
3380 emit_writeword(0,(int)&PC);
3381 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3382 if(copr==9) {
3383 emit_readword((int)&last_count,ECX);
3384 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3385 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3386 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3387 emit_writeword(HOST_CCREG,(int)&Count);
3388 }
3389 emit_call((int)MFC0);
3390 emit_readword((int)&readmem_dword,t);
7139f3c8 3391#else
3392 emit_readword((int)&reg_cop0+copr*4,t);
3393#endif
57871462 3394 }
3395 }
3396 else if(opcode2[i]==4) // MTC0
3397 {
3398 signed char s=get_reg(i_regs->regmap,rs1[i]);
3399 char copr=(source[i]>>11)&0x1f;
3400 assert(s>=0);
3401 emit_writeword(s,(int)&readmem_dword);
3402 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3d624f89 3403#ifdef MUPEN64 /// FIXME
57871462 3404 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3405 emit_movimm((source[i]>>11)&0x1f,1);
3406 emit_writeword(0,(int)&PC);
3407 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3d624f89 3408#endif
7139f3c8 3409#ifdef PCSX
3410 emit_movimm(source[i],0);
3411 emit_writeword(0,(int)&psxRegs.code);
3412#endif
3413 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3414 emit_readword((int)&last_count,ECX);
3415 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3416 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3417 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3418 emit_writeword(HOST_CCREG,(int)&Count);
3419 }
3420 // What a mess. The status register (12) can enable interrupts,
3421 // so needs a special case to handle a pending interrupt.
3422 // The interrupt must be taken immediately, because a subsequent
3423 // instruction might disable interrupts again.
7139f3c8 3424 if(copr==12||copr==13) {
57871462 3425 emit_movimm(start+i*4+4,0);
3426 emit_movimm(0,1);
3427 emit_writeword(0,(int)&pcaddr);
3428 emit_writeword(1,(int)&pending_exception);
3429 }
3430 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3431 //else
3432 emit_call((int)MTC0);
7139f3c8 3433 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3434 emit_readword((int)&Count,HOST_CCREG);
3435 emit_readword((int)&next_interupt,ECX);
3436 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3437 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3438 emit_writeword(ECX,(int)&last_count);
3439 emit_storereg(CCREG,HOST_CCREG);
3440 }
7139f3c8 3441 if(copr==12||copr==13) {
57871462 3442 assert(!is_delayslot);
3443 emit_readword((int)&pending_exception,14);
3444 }
3445 emit_loadreg(rs1[i],s);
3446 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3447 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 3448 if(copr==12||copr==13) {
57871462 3449 emit_test(14,14);
3450 emit_jne((int)&do_interrupt);
3451 }
3452 cop1_usable=0;
3453 }
3454 else
3455 {
3456 assert(opcode2[i]==0x10);
3d624f89 3457#ifndef DISABLE_TLB
57871462 3458 if((source[i]&0x3f)==0x01) // TLBR
3459 emit_call((int)TLBR);
3460 if((source[i]&0x3f)==0x02) // TLBWI
3461 emit_call((int)TLBWI_new);
3462 if((source[i]&0x3f)==0x06) { // TLBWR
3463 // The TLB entry written by TLBWR is dependent on the count,
3464 // so update the cycle count
3465 emit_readword((int)&last_count,ECX);
3466 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3467 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3468 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3469 emit_writeword(HOST_CCREG,(int)&Count);
3470 emit_call((int)TLBWR_new);
3471 }
3472 if((source[i]&0x3f)==0x08) // TLBP
3473 emit_call((int)TLBP);
3d624f89 3474#endif
576bbd8f 3475#ifdef PCSX
3476 if((source[i]&0x3f)==0x10) // RFE
3477 {
3478 emit_readword((int)&Status,0);
3479 emit_andimm(0,0x3c,1);
3480 emit_andimm(0,~0xf,0);
3481 emit_orrshr_imm(1,2,0);
3482 emit_writeword(0,(int)&Status);
3483 }
3484#else
57871462 3485 if((source[i]&0x3f)==0x18) // ERET
3486 {
3487 int count=ccadj[i];
3488 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3489 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3490 emit_jmp((int)jump_eret);
3491 }
576bbd8f 3492#endif
57871462 3493 }
3494}
3495
b9b61529 3496static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3497{
3498 switch (copr) {
3499 case 1:
3500 case 3:
3501 case 5:
3502 case 8:
3503 case 9:
3504 case 10: