drc: get rid of pass 7/provisional_r32 too
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69#include "fpu.h"
70
71/* Linker */
72
73void set_jump_target(int addr,u_int target)
74{
75 u_char *ptr=(u_char *)addr;
76 u_int *ptr2=(u_int *)ptr;
77 if(ptr[3]==0xe2) {
78 assert((target-(u_int)ptr2-8)<1024);
79 assert((addr&3)==0);
80 assert((target&3)==0);
81 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
82 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
83 }
84 else if(ptr[3]==0x72) {
85 // generated by emit_jno_unlikely
86 if((target-(u_int)ptr2-8)<1024) {
87 assert((addr&3)==0);
88 assert((target&3)==0);
89 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
90 }
91 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
92 assert((addr&3)==0);
93 assert((target&3)==0);
94 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
95 }
96 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
97 }
98 else {
99 assert((ptr[3]&0x0e)==0xa);
100 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
101 }
102}
103
104// This optionally copies the instruction from the target of the branch into
105// the space before the branch. Works, but the difference in speed is
106// usually insignificant.
107void set_jump_target_fillslot(int addr,u_int target,int copy)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 assert(!copy||ptr2[-1]==0xe28dd000);
112 if(ptr[3]==0xe2) {
113 assert(!copy);
114 assert((target-(u_int)ptr2-8)<4096);
115 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
116 }
117 else {
118 assert((ptr[3]&0x0e)==0xa);
119 u_int target_insn=*(u_int *)target;
120 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
121 copy=0;
122 }
123 if((target_insn&0x0c100000)==0x04100000) { // Load
124 copy=0;
125 }
126 if(target_insn&0x08000000) {
127 copy=0;
128 }
129 if(copy) {
130 ptr2[-1]=target_insn;
131 target+=4;
132 }
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137/* Literal pool */
138add_literal(int addr,int val)
139{
140 literals[literalcount][0]=addr;
141 literals[literalcount][1]=val;
142 literalcount++;
143}
144
f76eeef9 145void *kill_pointer(void *stub)
57871462 146{
147 int *ptr=(int *)(stub+4);
148 assert((*ptr&0x0ff00000)==0x05900000);
149 u_int offset=*ptr&0xfff;
150 int **l_ptr=(void *)ptr+offset+8;
151 int *i_ptr=*l_ptr;
152 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 153 return i_ptr;
57871462 154}
155
156int get_pointer(void *stub)
157{
158 //printf("get_pointer(%x)\n",(int)stub);
159 int *ptr=(int *)(stub+4);
160 assert((*ptr&0x0ff00000)==0x05900000);
161 u_int offset=*ptr&0xfff;
162 int **l_ptr=(void *)ptr+offset+8;
163 int *i_ptr=*l_ptr;
164 assert((*i_ptr&0x0f000000)==0x0a000000);
165 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
166}
167
168// Find the "clean" entry point from a "dirty" entry point
169// by skipping past the call to verify_code
170u_int get_clean_addr(int addr)
171{
172 int *ptr=(int *)addr;
173 #ifdef ARMv5_ONLY
174 ptr+=4;
175 #else
176 ptr+=6;
177 #endif
178 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
179 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
180 ptr++;
181 if((*ptr&0xFF000000)==0xea000000) {
182 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
183 }
184 return (u_int)ptr;
185}
186
187int verify_dirty(int addr)
188{
189 u_int *ptr=(u_int *)addr;
190 #ifdef ARMv5_ONLY
191 // get from literal pool
192 assert((*ptr&0xFFF00000)==0xe5900000);
193 u_int offset=*ptr&0xfff;
194 u_int *l_ptr=(void *)ptr+offset+8;
195 u_int source=l_ptr[0];
196 u_int copy=l_ptr[1];
197 u_int len=l_ptr[2];
198 ptr+=4;
199 #else
200 // ARMv7 movw/movt
201 assert((*ptr&0xFFF00000)==0xe3000000);
202 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
203 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
204 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
205 ptr+=6;
206 #endif
207 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
208 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 209 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 210 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
211 unsigned int page=source>>12;
212 unsigned int map_value=memory_map[page];
213 if(map_value>=0x80000000) return 0;
214 while(page<((source+len-1)>>12)) {
215 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
216 }
217 source = source+(map_value<<2);
218 }
219 //printf("verify_dirty: %x %x %x\n",source,copy,len);
220 return !memcmp((void *)source,(void *)copy,len);
221}
222
223// This doesn't necessarily find all clean entry points, just
224// guarantees that it's not dirty
225int isclean(int addr)
226{
227 #ifdef ARMv5_ONLY
228 int *ptr=((u_int *)addr)+4;
229 #else
230 int *ptr=((u_int *)addr)+6;
231 #endif
232 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
233 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
234 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
235 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
236 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
237 return 1;
238}
239
240void get_bounds(int addr,u_int *start,u_int *end)
241{
242 u_int *ptr=(u_int *)addr;
243 #ifdef ARMv5_ONLY
244 // get from literal pool
245 assert((*ptr&0xFFF00000)==0xe5900000);
246 u_int offset=*ptr&0xfff;
247 u_int *l_ptr=(void *)ptr+offset+8;
248 u_int source=l_ptr[0];
249 //u_int copy=l_ptr[1];
250 u_int len=l_ptr[2];
251 ptr+=4;
252 #else
253 // ARMv7 movw/movt
254 assert((*ptr&0xFFF00000)==0xe3000000);
255 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
256 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
257 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
258 ptr+=6;
259 #endif
260 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
261 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 262 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 263 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
264 if(memory_map[source>>12]>=0x80000000) source = 0;
265 else source = source+(memory_map[source>>12]<<2);
266 }
267 *start=source;
268 *end=source+len;
269}
270
271/* Register allocation */
272
273// Note: registers are allocated clean (unmodified state)
274// if you intend to modify the register, you must call dirty_reg().
275void alloc_reg(struct regstat *cur,int i,signed char reg)
276{
277 int r,hr;
278 int preferred_reg = (reg&7);
279 if(reg==CCREG) preferred_reg=HOST_CCREG;
280 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
281
282 // Don't allocate unused registers
283 if((cur->u>>reg)&1) return;
284
285 // see if it's already allocated
286 for(hr=0;hr<HOST_REGS;hr++)
287 {
288 if(cur->regmap[hr]==reg) return;
289 }
290
291 // Keep the same mapping if the register was already allocated in a loop
292 preferred_reg = loop_reg(i,reg,preferred_reg);
293
294 // Try to allocate the preferred register
295 if(cur->regmap[preferred_reg]==-1) {
296 cur->regmap[preferred_reg]=reg;
297 cur->dirty&=~(1<<preferred_reg);
298 cur->isconst&=~(1<<preferred_reg);
299 return;
300 }
301 r=cur->regmap[preferred_reg];
302 if(r<64&&((cur->u>>r)&1)) {
303 cur->regmap[preferred_reg]=reg;
304 cur->dirty&=~(1<<preferred_reg);
305 cur->isconst&=~(1<<preferred_reg);
306 return;
307 }
308 if(r>=64&&((cur->uu>>(r&63))&1)) {
309 cur->regmap[preferred_reg]=reg;
310 cur->dirty&=~(1<<preferred_reg);
311 cur->isconst&=~(1<<preferred_reg);
312 return;
313 }
314
315 // Clear any unneeded registers
316 // We try to keep the mapping consistent, if possible, because it
317 // makes branches easier (especially loops). So we try to allocate
318 // first (see above) before removing old mappings. If this is not
319 // possible then go ahead and clear out the registers that are no
320 // longer needed.
321 for(hr=0;hr<HOST_REGS;hr++)
322 {
323 r=cur->regmap[hr];
324 if(r>=0) {
325 if(r<64) {
326 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
327 }
328 else
329 {
330 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
331 }
332 }
333 }
334 // Try to allocate any available register, but prefer
335 // registers that have not been used recently.
336 if(i>0) {
337 for(hr=0;hr<HOST_REGS;hr++) {
338 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
339 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
340 cur->regmap[hr]=reg;
341 cur->dirty&=~(1<<hr);
342 cur->isconst&=~(1<<hr);
343 return;
344 }
345 }
346 }
347 }
348 // Try to allocate any available register
349 for(hr=0;hr<HOST_REGS;hr++) {
350 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
351 cur->regmap[hr]=reg;
352 cur->dirty&=~(1<<hr);
353 cur->isconst&=~(1<<hr);
354 return;
355 }
356 }
357
358 // Ok, now we have to evict someone
359 // Pick a register we hopefully won't need soon
360 u_char hsn[MAXREG+1];
361 memset(hsn,10,sizeof(hsn));
362 int j;
363 lsn(hsn,i,&preferred_reg);
364 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
365 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
366 if(i>0) {
367 // Don't evict the cycle count at entry points, otherwise the entry
368 // stub will have to write it.
369 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
370 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
371 for(j=10;j>=3;j--)
372 {
373 // Alloc preferred register if available
374 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
375 for(hr=0;hr<HOST_REGS;hr++) {
376 // Evict both parts of a 64-bit register
377 if((cur->regmap[hr]&63)==r) {
378 cur->regmap[hr]=-1;
379 cur->dirty&=~(1<<hr);
380 cur->isconst&=~(1<<hr);
381 }
382 }
383 cur->regmap[preferred_reg]=reg;
384 return;
385 }
386 for(r=1;r<=MAXREG;r++)
387 {
388 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
389 for(hr=0;hr<HOST_REGS;hr++) {
390 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
391 if(cur->regmap[hr]==r+64) {
392 cur->regmap[hr]=reg;
393 cur->dirty&=~(1<<hr);
394 cur->isconst&=~(1<<hr);
395 return;
396 }
397 }
398 }
399 for(hr=0;hr<HOST_REGS;hr++) {
400 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
401 if(cur->regmap[hr]==r) {
402 cur->regmap[hr]=reg;
403 cur->dirty&=~(1<<hr);
404 cur->isconst&=~(1<<hr);
405 return;
406 }
407 }
408 }
409 }
410 }
411 }
412 }
413 for(j=10;j>=0;j--)
414 {
415 for(r=1;r<=MAXREG;r++)
416 {
417 if(hsn[r]==j) {
418 for(hr=0;hr<HOST_REGS;hr++) {
419 if(cur->regmap[hr]==r+64) {
420 cur->regmap[hr]=reg;
421 cur->dirty&=~(1<<hr);
422 cur->isconst&=~(1<<hr);
423 return;
424 }
425 }
426 for(hr=0;hr<HOST_REGS;hr++) {
427 if(cur->regmap[hr]==r) {
428 cur->regmap[hr]=reg;
429 cur->dirty&=~(1<<hr);
430 cur->isconst&=~(1<<hr);
431 return;
432 }
433 }
434 }
435 }
436 }
437 printf("This shouldn't happen (alloc_reg)");exit(1);
438}
439
440void alloc_reg64(struct regstat *cur,int i,signed char reg)
441{
442 int preferred_reg = 8+(reg&1);
443 int r,hr;
444
445 // allocate the lower 32 bits
446 alloc_reg(cur,i,reg);
447
448 // Don't allocate unused registers
449 if((cur->uu>>reg)&1) return;
450
451 // see if the upper half is already allocated
452 for(hr=0;hr<HOST_REGS;hr++)
453 {
454 if(cur->regmap[hr]==reg+64) return;
455 }
456
457 // Keep the same mapping if the register was already allocated in a loop
458 preferred_reg = loop_reg(i,reg,preferred_reg);
459
460 // Try to allocate the preferred register
461 if(cur->regmap[preferred_reg]==-1) {
462 cur->regmap[preferred_reg]=reg|64;
463 cur->dirty&=~(1<<preferred_reg);
464 cur->isconst&=~(1<<preferred_reg);
465 return;
466 }
467 r=cur->regmap[preferred_reg];
468 if(r<64&&((cur->u>>r)&1)) {
469 cur->regmap[preferred_reg]=reg|64;
470 cur->dirty&=~(1<<preferred_reg);
471 cur->isconst&=~(1<<preferred_reg);
472 return;
473 }
474 if(r>=64&&((cur->uu>>(r&63))&1)) {
475 cur->regmap[preferred_reg]=reg|64;
476 cur->dirty&=~(1<<preferred_reg);
477 cur->isconst&=~(1<<preferred_reg);
478 return;
479 }
480
481 // Clear any unneeded registers
482 // We try to keep the mapping consistent, if possible, because it
483 // makes branches easier (especially loops). So we try to allocate
484 // first (see above) before removing old mappings. If this is not
485 // possible then go ahead and clear out the registers that are no
486 // longer needed.
487 for(hr=HOST_REGS-1;hr>=0;hr--)
488 {
489 r=cur->regmap[hr];
490 if(r>=0) {
491 if(r<64) {
492 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
493 }
494 else
495 {
496 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
497 }
498 }
499 }
500 // Try to allocate any available register, but prefer
501 // registers that have not been used recently.
502 if(i>0) {
503 for(hr=0;hr<HOST_REGS;hr++) {
504 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
505 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
506 cur->regmap[hr]=reg|64;
507 cur->dirty&=~(1<<hr);
508 cur->isconst&=~(1<<hr);
509 return;
510 }
511 }
512 }
513 }
514 // Try to allocate any available register
515 for(hr=0;hr<HOST_REGS;hr++) {
516 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
517 cur->regmap[hr]=reg|64;
518 cur->dirty&=~(1<<hr);
519 cur->isconst&=~(1<<hr);
520 return;
521 }
522 }
523
524 // Ok, now we have to evict someone
525 // Pick a register we hopefully won't need soon
526 u_char hsn[MAXREG+1];
527 memset(hsn,10,sizeof(hsn));
528 int j;
529 lsn(hsn,i,&preferred_reg);
530 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
531 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
532 if(i>0) {
533 // Don't evict the cycle count at entry points, otherwise the entry
534 // stub will have to write it.
535 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
536 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
537 for(j=10;j>=3;j--)
538 {
539 // Alloc preferred register if available
540 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
541 for(hr=0;hr<HOST_REGS;hr++) {
542 // Evict both parts of a 64-bit register
543 if((cur->regmap[hr]&63)==r) {
544 cur->regmap[hr]=-1;
545 cur->dirty&=~(1<<hr);
546 cur->isconst&=~(1<<hr);
547 }
548 }
549 cur->regmap[preferred_reg]=reg|64;
550 return;
551 }
552 for(r=1;r<=MAXREG;r++)
553 {
554 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
555 for(hr=0;hr<HOST_REGS;hr++) {
556 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
557 if(cur->regmap[hr]==r+64) {
558 cur->regmap[hr]=reg|64;
559 cur->dirty&=~(1<<hr);
560 cur->isconst&=~(1<<hr);
561 return;
562 }
563 }
564 }
565 for(hr=0;hr<HOST_REGS;hr++) {
566 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
567 if(cur->regmap[hr]==r) {
568 cur->regmap[hr]=reg|64;
569 cur->dirty&=~(1<<hr);
570 cur->isconst&=~(1<<hr);
571 return;
572 }
573 }
574 }
575 }
576 }
577 }
578 }
579 for(j=10;j>=0;j--)
580 {
581 for(r=1;r<=MAXREG;r++)
582 {
583 if(hsn[r]==j) {
584 for(hr=0;hr<HOST_REGS;hr++) {
585 if(cur->regmap[hr]==r+64) {
586 cur->regmap[hr]=reg|64;
587 cur->dirty&=~(1<<hr);
588 cur->isconst&=~(1<<hr);
589 return;
590 }
591 }
592 for(hr=0;hr<HOST_REGS;hr++) {
593 if(cur->regmap[hr]==r) {
594 cur->regmap[hr]=reg|64;
595 cur->dirty&=~(1<<hr);
596 cur->isconst&=~(1<<hr);
597 return;
598 }
599 }
600 }
601 }
602 }
603 printf("This shouldn't happen");exit(1);
604}
605
606// Allocate a temporary register. This is done without regard to
607// dirty status or whether the register we request is on the unneeded list
608// Note: This will only allocate one register, even if called multiple times
609void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
610{
611 int r,hr;
612 int preferred_reg = -1;
613
614 // see if it's already allocated
615 for(hr=0;hr<HOST_REGS;hr++)
616 {
617 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
618 }
619
620 // Try to allocate any available register
621 for(hr=HOST_REGS-1;hr>=0;hr--) {
622 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
623 cur->regmap[hr]=reg;
624 cur->dirty&=~(1<<hr);
625 cur->isconst&=~(1<<hr);
626 return;
627 }
628 }
629
630 // Find an unneeded register
631 for(hr=HOST_REGS-1;hr>=0;hr--)
632 {
633 r=cur->regmap[hr];
634 if(r>=0) {
635 if(r<64) {
636 if((cur->u>>r)&1) {
637 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
638 cur->regmap[hr]=reg;
639 cur->dirty&=~(1<<hr);
640 cur->isconst&=~(1<<hr);
641 return;
642 }
643 }
644 }
645 else
646 {
647 if((cur->uu>>(r&63))&1) {
648 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
649 cur->regmap[hr]=reg;
650 cur->dirty&=~(1<<hr);
651 cur->isconst&=~(1<<hr);
652 return;
653 }
654 }
655 }
656 }
657 }
658
659 // Ok, now we have to evict someone
660 // Pick a register we hopefully won't need soon
661 // TODO: we might want to follow unconditional jumps here
662 // TODO: get rid of dupe code and make this into a function
663 u_char hsn[MAXREG+1];
664 memset(hsn,10,sizeof(hsn));
665 int j;
666 lsn(hsn,i,&preferred_reg);
667 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
668 if(i>0) {
669 // Don't evict the cycle count at entry points, otherwise the entry
670 // stub will have to write it.
671 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
672 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
673 for(j=10;j>=3;j--)
674 {
675 for(r=1;r<=MAXREG;r++)
676 {
677 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
678 for(hr=0;hr<HOST_REGS;hr++) {
679 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
680 if(cur->regmap[hr]==r+64) {
681 cur->regmap[hr]=reg;
682 cur->dirty&=~(1<<hr);
683 cur->isconst&=~(1<<hr);
684 return;
685 }
686 }
687 }
688 for(hr=0;hr<HOST_REGS;hr++) {
689 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
690 if(cur->regmap[hr]==r) {
691 cur->regmap[hr]=reg;
692 cur->dirty&=~(1<<hr);
693 cur->isconst&=~(1<<hr);
694 return;
695 }
696 }
697 }
698 }
699 }
700 }
701 }
702 for(j=10;j>=0;j--)
703 {
704 for(r=1;r<=MAXREG;r++)
705 {
706 if(hsn[r]==j) {
707 for(hr=0;hr<HOST_REGS;hr++) {
708 if(cur->regmap[hr]==r+64) {
709 cur->regmap[hr]=reg;
710 cur->dirty&=~(1<<hr);
711 cur->isconst&=~(1<<hr);
712 return;
713 }
714 }
715 for(hr=0;hr<HOST_REGS;hr++) {
716 if(cur->regmap[hr]==r) {
717 cur->regmap[hr]=reg;
718 cur->dirty&=~(1<<hr);
719 cur->isconst&=~(1<<hr);
720 return;
721 }
722 }
723 }
724 }
725 }
726 printf("This shouldn't happen");exit(1);
727}
728// Allocate a specific ARM register.
729void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
730{
731 int n;
732
733 // see if it's already allocated (and dealloc it)
734 for(n=0;n<HOST_REGS;n++)
735 {
736 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
737 }
738
739 cur->regmap[hr]=reg;
740 cur->dirty&=~(1<<hr);
741 cur->isconst&=~(1<<hr);
742}
743
744// Alloc cycle count into dedicated register
745alloc_cc(struct regstat *cur,int i)
746{
747 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
748}
749
750/* Special alloc */
751
752
753/* Assembler */
754
755char regname[16][4] = {
756 "r0",
757 "r1",
758 "r2",
759 "r3",
760 "r4",
761 "r5",
762 "r6",
763 "r7",
764 "r8",
765 "r9",
766 "r10",
767 "fp",
768 "r12",
769 "sp",
770 "lr",
771 "pc"};
772
773void output_byte(u_char byte)
774{
775 *(out++)=byte;
776}
777void output_modrm(u_char mod,u_char rm,u_char ext)
778{
779 assert(mod<4);
780 assert(rm<8);
781 assert(ext<8);
782 u_char byte=(mod<<6)|(ext<<3)|rm;
783 *(out++)=byte;
784}
785void output_sib(u_char scale,u_char index,u_char base)
786{
787 assert(scale<4);
788 assert(index<8);
789 assert(base<8);
790 u_char byte=(scale<<6)|(index<<3)|base;
791 *(out++)=byte;
792}
793void output_w32(u_int word)
794{
795 *((u_int *)out)=word;
796 out+=4;
797}
798u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
799{
800 assert(rd<16);
801 assert(rn<16);
802 assert(rm<16);
803 return((rn<<16)|(rd<<12)|rm);
804}
805u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
806{
807 assert(rd<16);
808 assert(rn<16);
809 assert(imm<256);
810 assert((shift&1)==0);
811 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
812}
813u_int genimm(u_int imm,u_int *encoded)
814{
815 if(imm==0) {*encoded=0;return 1;}
816 int i=32;
817 while(i>0)
818 {
819 if(imm<256) {
820 *encoded=((i&30)<<7)|imm;
821 return 1;
822 }
823 imm=(imm>>2)|(imm<<30);i-=2;
824 }
825 return 0;
826}
cfbd3c6e 827void genimm_checked(u_int imm,u_int *encoded)
828{
829 u_int ret=genimm(imm,encoded);
830 assert(ret);
831}
57871462 832u_int genjmp(u_int addr)
833{
834 int offset=addr-(int)out-8;
e80343e2 835 if(offset<-33554432||offset>=33554432) {
836 if (addr>2) {
837 printf("genjmp: out of range: %08x\n", offset);
838 exit(1);
839 }
840 return 0;
841 }
57871462 842 return ((u_int)offset>>2)&0xffffff;
843}
844
845void emit_mov(int rs,int rt)
846{
847 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
848 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
849}
850
851void emit_movs(int rs,int rt)
852{
853 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
854 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
855}
856
857void emit_add(int rs1,int rs2,int rt)
858{
859 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
860 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
861}
862
863void emit_adds(int rs1,int rs2,int rt)
864{
865 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
866 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
867}
868
869void emit_adcs(int rs1,int rs2,int rt)
870{
871 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
872 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
873}
874
875void emit_sbc(int rs1,int rs2,int rt)
876{
877 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
878 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
879}
880
881void emit_sbcs(int rs1,int rs2,int rt)
882{
883 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
884 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
885}
886
887void emit_neg(int rs, int rt)
888{
889 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
890 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
891}
892
893void emit_negs(int rs, int rt)
894{
895 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
896 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
897}
898
899void emit_sub(int rs1,int rs2,int rt)
900{
901 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
902 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
903}
904
905void emit_subs(int rs1,int rs2,int rt)
906{
907 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
908 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
909}
910
911void emit_zeroreg(int rt)
912{
913 assem_debug("mov %s,#0\n",regname[rt]);
914 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
915}
916
917void emit_loadreg(int r, int hr)
918{
3d624f89 919#ifdef FORCE32
920 if(r&64) {
921 printf("64bit load in 32bit mode!\n");
922 exit(1);
923 }
924#endif
57871462 925 if((r&63)==0)
926 emit_zeroreg(hr);
927 else {
3d624f89 928 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 929 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
930 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
931 if(r==CCREG) addr=(int)&cycle_count;
932 if(r==CSREG) addr=(int)&Status;
933 if(r==FSREG) addr=(int)&FCR31;
934 if(r==INVCP) addr=(int)&invc_ptr;
935 u_int offset = addr-(u_int)&dynarec_local;
936 assert(offset<4096);
937 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
938 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
939 }
940}
941void emit_storereg(int r, int hr)
942{
3d624f89 943#ifdef FORCE32
944 if(r&64) {
945 printf("64bit store in 32bit mode!\n");
946 exit(1);
947 }
948#endif
949 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 950 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
951 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
952 if(r==CCREG) addr=(int)&cycle_count;
953 if(r==FSREG) addr=(int)&FCR31;
954 u_int offset = addr-(u_int)&dynarec_local;
955 assert(offset<4096);
956 assem_debug("str %s,fp+%d\n",regname[hr],offset);
957 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
958}
959
960void emit_test(int rs, int rt)
961{
962 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
963 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
964}
965
966void emit_testimm(int rs,int imm)
967{
968 u_int armval;
969 assem_debug("tst %s,$%d\n",regname[rs],imm);
cfbd3c6e 970 genimm_checked(imm,&armval);
57871462 971 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
972}
973
b9b61529 974void emit_testeqimm(int rs,int imm)
975{
976 u_int armval;
977 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 978 genimm_checked(imm,&armval);
b9b61529 979 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
980}
981
57871462 982void emit_not(int rs,int rt)
983{
984 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
985 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
986}
987
b9b61529 988void emit_mvnmi(int rs,int rt)
989{
990 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
991 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
992}
993
57871462 994void emit_and(u_int rs1,u_int rs2,u_int rt)
995{
996 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
997 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
998}
999
1000void emit_or(u_int rs1,u_int rs2,u_int rt)
1001{
1002 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1003 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1004}
1005void emit_or_and_set_flags(int rs1,int rs2,int rt)
1006{
1007 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1008 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1009}
1010
f70d384d 1011void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1012{
1013 assert(rs<16);
1014 assert(rt<16);
1015 assert(imm<32);
1016 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1017 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1018}
1019
576bbd8f 1020void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1021{
1022 assert(rs<16);
1023 assert(rt<16);
1024 assert(imm<32);
1025 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1026 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1027}
1028
57871462 1029void emit_xor(u_int rs1,u_int rs2,u_int rt)
1030{
1031 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1032 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1033}
1034
1035void emit_loadlp(u_int imm,u_int rt)
1036{
1037 add_literal((int)out,imm);
1038 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
1039 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
1040}
1041void emit_movw(u_int imm,u_int rt)
1042{
1043 assert(imm<65536);
1044 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
1045 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
1046}
1047void emit_movt(u_int imm,u_int rt)
1048{
1049 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
1050 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
1051}
1052void emit_movimm(u_int imm,u_int rt)
1053{
1054 u_int armval;
1055 if(genimm(imm,&armval)) {
1056 assem_debug("mov %s,#%d\n",regname[rt],imm);
1057 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1058 }else if(genimm(~imm,&armval)) {
1059 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1060 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1061 }else if(imm<65536) {
1062 #ifdef ARMv5_ONLY
1063 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1064 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1065 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1066 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1067 #else
1068 emit_movw(imm,rt);
1069 #endif
1070 }else{
1071 #ifdef ARMv5_ONLY
1072 emit_loadlp(imm,rt);
1073 #else
1074 emit_movw(imm&0x0000FFFF,rt);
1075 emit_movt(imm&0xFFFF0000,rt);
1076 #endif
1077 }
1078}
1079void emit_pcreladdr(u_int rt)
1080{
1081 assem_debug("add %s,pc,#?\n",regname[rt]);
1082 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1083}
1084
1085void emit_addimm(u_int rs,int imm,u_int rt)
1086{
1087 assert(rs<16);
1088 assert(rt<16);
1089 if(imm!=0) {
1090 assert(imm>-65536&&imm<65536);
1091 u_int armval;
1092 if(genimm(imm,&armval)) {
1093 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1094 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1095 }else if(genimm(-imm,&armval)) {
1096 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1097 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1098 }else if(imm<0) {
1099 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1100 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1101 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1102 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1103 }else{
1104 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1105 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1106 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1107 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1108 }
1109 }
1110 else if(rs!=rt) emit_mov(rs,rt);
1111}
1112
1113void emit_addimm_and_set_flags(int imm,int rt)
1114{
1115 assert(imm>-65536&&imm<65536);
1116 u_int armval;
1117 if(genimm(imm,&armval)) {
1118 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1119 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1120 }else if(genimm(-imm,&armval)) {
1121 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1122 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1123 }else if(imm<0) {
1124 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1125 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1126 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1127 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1128 }else{
1129 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1130 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1131 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1132 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1133 }
1134}
1135void emit_addimm_no_flags(u_int imm,u_int rt)
1136{
1137 emit_addimm(rt,imm,rt);
1138}
1139
1140void emit_addnop(u_int r)
1141{
1142 assert(r<16);
1143 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1144 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1145}
1146
1147void emit_adcimm(u_int rs,int imm,u_int rt)
1148{
1149 u_int armval;
cfbd3c6e 1150 genimm_checked(imm,&armval);
57871462 1151 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1152 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1153}
1154/*void emit_sbcimm(int imm,u_int rt)
1155{
1156 u_int armval;
cfbd3c6e 1157 genimm_checked(imm,&armval);
57871462 1158 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1159 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1160}*/
1161void emit_sbbimm(int imm,u_int rt)
1162{
1163 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1164 assert(rt<8);
1165 if(imm<128&&imm>=-128) {
1166 output_byte(0x83);
1167 output_modrm(3,rt,3);
1168 output_byte(imm);
1169 }
1170 else
1171 {
1172 output_byte(0x81);
1173 output_modrm(3,rt,3);
1174 output_w32(imm);
1175 }
1176}
1177void emit_rscimm(int rs,int imm,u_int rt)
1178{
1179 assert(0);
1180 u_int armval;
cfbd3c6e 1181 genimm_checked(imm,&armval);
57871462 1182 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1183 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1184}
1185
1186void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1187{
1188 // TODO: if(genimm(imm,&armval)) ...
1189 // else
1190 emit_movimm(imm,HOST_TEMPREG);
1191 emit_adds(HOST_TEMPREG,rsl,rtl);
1192 emit_adcimm(rsh,0,rth);
1193}
1194
1195void emit_sbb(int rs1,int rs2)
1196{
1197 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1198 output_byte(0x19);
1199 output_modrm(3,rs1,rs2);
1200}
1201
1202void emit_andimm(int rs,int imm,int rt)
1203{
1204 u_int armval;
1205 if(genimm(imm,&armval)) {
1206 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1207 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1208 }else if(genimm(~imm,&armval)) {
1209 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1210 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1211 }else if(imm==65535) {
1212 #ifdef ARMv5_ONLY
1213 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1214 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1215 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1216 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1217 #else
1218 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1219 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1220 #endif
1221 }else{
1222 assert(imm>0&&imm<65535);
1223 #ifdef ARMv5_ONLY
1224 assem_debug("mov r14,#%d\n",imm&0xFF00);
1225 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1226 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1227 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1228 #else
1229 emit_movw(imm,HOST_TEMPREG);
1230 #endif
1231 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1232 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1233 }
1234}
1235
1236void emit_orimm(int rs,int imm,int rt)
1237{
1238 u_int armval;
1239 if(genimm(imm,&armval)) {
1240 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1241 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1242 }else{
1243 assert(imm>0&&imm<65536);
1244 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1245 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1246 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1247 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1248 }
1249}
1250
1251void emit_xorimm(int rs,int imm,int rt)
1252{
57871462 1253 u_int armval;
1254 if(genimm(imm,&armval)) {
1255 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1256 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1257 }else{
514ed0d9 1258 assert(imm>0&&imm<65536);
57871462 1259 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1260 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1261 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1262 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1263 }
1264}
1265
1266void emit_shlimm(int rs,u_int imm,int rt)
1267{
1268 assert(imm>0);
1269 assert(imm<32);
1270 //if(imm==1) ...
1271 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1272 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1273}
1274
1275void emit_shrimm(int rs,u_int imm,int rt)
1276{
1277 assert(imm>0);
1278 assert(imm<32);
1279 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1280 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1281}
1282
1283void emit_sarimm(int rs,u_int imm,int rt)
1284{
1285 assert(imm>0);
1286 assert(imm<32);
1287 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1288 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1289}
1290
1291void emit_rorimm(int rs,u_int imm,int rt)
1292{
1293 assert(imm>0);
1294 assert(imm<32);
1295 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1296 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1297}
1298
1299void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1300{
1301 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1302 assert(imm>0);
1303 assert(imm<32);
1304 //if(imm==1) ...
1305 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1306 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1307 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1308 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1309}
1310
1311void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1312{
1313 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1314 assert(imm>0);
1315 assert(imm<32);
1316 //if(imm==1) ...
1317 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1318 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1319 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1320 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1321}
1322
b9b61529 1323void emit_signextend16(int rs,int rt)
1324{
1325 #ifdef ARMv5_ONLY
1326 emit_shlimm(rs,16,rt);
1327 emit_sarimm(rt,16,rt);
1328 #else
1329 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1330 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1331 #endif
1332}
1333
57871462 1334void emit_shl(u_int rs,u_int shift,u_int rt)
1335{
1336 assert(rs<16);
1337 assert(rt<16);
1338 assert(shift<16);
1339 //if(imm==1) ...
1340 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1341 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1342}
1343void emit_shr(u_int rs,u_int shift,u_int rt)
1344{
1345 assert(rs<16);
1346 assert(rt<16);
1347 assert(shift<16);
1348 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1349 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1350}
1351void emit_sar(u_int rs,u_int shift,u_int rt)
1352{
1353 assert(rs<16);
1354 assert(rt<16);
1355 assert(shift<16);
1356 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1357 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1358}
1359void emit_shlcl(int r)
1360{
1361 assem_debug("shl %%%s,%%cl\n",regname[r]);
1362 assert(0);
1363}
1364void emit_shrcl(int r)
1365{
1366 assem_debug("shr %%%s,%%cl\n",regname[r]);
1367 assert(0);
1368}
1369void emit_sarcl(int r)
1370{
1371 assem_debug("sar %%%s,%%cl\n",regname[r]);
1372 assert(0);
1373}
1374
1375void emit_shldcl(int r1,int r2)
1376{
1377 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1378 assert(0);
1379}
1380void emit_shrdcl(int r1,int r2)
1381{
1382 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1383 assert(0);
1384}
1385void emit_orrshl(u_int rs,u_int shift,u_int rt)
1386{
1387 assert(rs<16);
1388 assert(rt<16);
1389 assert(shift<16);
1390 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1391 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1392}
1393void emit_orrshr(u_int rs,u_int shift,u_int rt)
1394{
1395 assert(rs<16);
1396 assert(rt<16);
1397 assert(shift<16);
1398 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1399 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1400}
1401
1402void emit_cmpimm(int rs,int imm)
1403{
1404 u_int armval;
1405 if(genimm(imm,&armval)) {
1406 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1407 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1408 }else if(genimm(-imm,&armval)) {
1409 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1410 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1411 }else if(imm>0) {
1412 assert(imm<65536);
1413 #ifdef ARMv5_ONLY
1414 emit_movimm(imm,HOST_TEMPREG);
1415 #else
1416 emit_movw(imm,HOST_TEMPREG);
1417 #endif
1418 assem_debug("cmp %s,r14\n",regname[rs]);
1419 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1420 }else{
1421 assert(imm>-65536);
1422 #ifdef ARMv5_ONLY
1423 emit_movimm(-imm,HOST_TEMPREG);
1424 #else
1425 emit_movw(-imm,HOST_TEMPREG);
1426 #endif
1427 assem_debug("cmn %s,r14\n",regname[rs]);
1428 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1429 }
1430}
1431
1432void emit_cmovne(u_int *addr,int rt)
1433{
1434 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1435 assert(0);
1436}
1437void emit_cmovl(u_int *addr,int rt)
1438{
1439 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1440 assert(0);
1441}
1442void emit_cmovs(u_int *addr,int rt)
1443{
1444 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1445 assert(0);
1446}
1447void emit_cmovne_imm(int imm,int rt)
1448{
1449 assem_debug("movne %s,#%d\n",regname[rt],imm);
1450 u_int armval;
cfbd3c6e 1451 genimm_checked(imm,&armval);
57871462 1452 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1453}
1454void emit_cmovl_imm(int imm,int rt)
1455{
1456 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1457 u_int armval;
cfbd3c6e 1458 genimm_checked(imm,&armval);
57871462 1459 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1460}
1461void emit_cmovb_imm(int imm,int rt)
1462{
1463 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1464 u_int armval;
cfbd3c6e 1465 genimm_checked(imm,&armval);
57871462 1466 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1467}
1468void emit_cmovs_imm(int imm,int rt)
1469{
1470 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1471 u_int armval;
cfbd3c6e 1472 genimm_checked(imm,&armval);
57871462 1473 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1474}
1475void emit_cmove_reg(int rs,int rt)
1476{
1477 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1478 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1479}
1480void emit_cmovne_reg(int rs,int rt)
1481{
1482 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1483 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1484}
1485void emit_cmovl_reg(int rs,int rt)
1486{
1487 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1488 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1489}
1490void emit_cmovs_reg(int rs,int rt)
1491{
1492 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1493 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1494}
1495
1496void emit_slti32(int rs,int imm,int rt)
1497{
1498 if(rs!=rt) emit_zeroreg(rt);
1499 emit_cmpimm(rs,imm);
1500 if(rs==rt) emit_movimm(0,rt);
1501 emit_cmovl_imm(1,rt);
1502}
1503void emit_sltiu32(int rs,int imm,int rt)
1504{
1505 if(rs!=rt) emit_zeroreg(rt);
1506 emit_cmpimm(rs,imm);
1507 if(rs==rt) emit_movimm(0,rt);
1508 emit_cmovb_imm(1,rt);
1509}
1510void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1511{
1512 assert(rsh!=rt);
1513 emit_slti32(rsl,imm,rt);
1514 if(imm>=0)
1515 {
1516 emit_test(rsh,rsh);
1517 emit_cmovne_imm(0,rt);
1518 emit_cmovs_imm(1,rt);
1519 }
1520 else
1521 {
1522 emit_cmpimm(rsh,-1);
1523 emit_cmovne_imm(0,rt);
1524 emit_cmovl_imm(1,rt);
1525 }
1526}
1527void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1528{
1529 assert(rsh!=rt);
1530 emit_sltiu32(rsl,imm,rt);
1531 if(imm>=0)
1532 {
1533 emit_test(rsh,rsh);
1534 emit_cmovne_imm(0,rt);
1535 }
1536 else
1537 {
1538 emit_cmpimm(rsh,-1);
1539 emit_cmovne_imm(1,rt);
1540 }
1541}
1542
1543void emit_cmp(int rs,int rt)
1544{
1545 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1546 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1547}
1548void emit_set_gz32(int rs, int rt)
1549{
1550 //assem_debug("set_gz32\n");
1551 emit_cmpimm(rs,1);
1552 emit_movimm(1,rt);
1553 emit_cmovl_imm(0,rt);
1554}
1555void emit_set_nz32(int rs, int rt)
1556{
1557 //assem_debug("set_nz32\n");
1558 if(rs!=rt) emit_movs(rs,rt);
1559 else emit_test(rs,rs);
1560 emit_cmovne_imm(1,rt);
1561}
1562void emit_set_gz64_32(int rsh, int rsl, int rt)
1563{
1564 //assem_debug("set_gz64\n");
1565 emit_set_gz32(rsl,rt);
1566 emit_test(rsh,rsh);
1567 emit_cmovne_imm(1,rt);
1568 emit_cmovs_imm(0,rt);
1569}
1570void emit_set_nz64_32(int rsh, int rsl, int rt)
1571{
1572 //assem_debug("set_nz64\n");
1573 emit_or_and_set_flags(rsh,rsl,rt);
1574 emit_cmovne_imm(1,rt);
1575}
1576void emit_set_if_less32(int rs1, int rs2, int rt)
1577{
1578 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1579 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1580 emit_cmp(rs1,rs2);
1581 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1582 emit_cmovl_imm(1,rt);
1583}
1584void emit_set_if_carry32(int rs1, int rs2, int rt)
1585{
1586 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1587 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1588 emit_cmp(rs1,rs2);
1589 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1590 emit_cmovb_imm(1,rt);
1591}
1592void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1593{
1594 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1595 assert(u1!=rt);
1596 assert(u2!=rt);
1597 emit_cmp(l1,l2);
1598 emit_movimm(0,rt);
1599 emit_sbcs(u1,u2,HOST_TEMPREG);
1600 emit_cmovl_imm(1,rt);
1601}
1602void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1603{
1604 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1605 assert(u1!=rt);
1606 assert(u2!=rt);
1607 emit_cmp(l1,l2);
1608 emit_movimm(0,rt);
1609 emit_sbcs(u1,u2,HOST_TEMPREG);
1610 emit_cmovb_imm(1,rt);
1611}
1612
1613void emit_call(int a)
1614{
1615 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1616 u_int offset=genjmp(a);
1617 output_w32(0xeb000000|offset);
1618}
1619void emit_jmp(int a)
1620{
1621 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1622 u_int offset=genjmp(a);
1623 output_w32(0xea000000|offset);
1624}
1625void emit_jne(int a)
1626{
1627 assem_debug("bne %x\n",a);
1628 u_int offset=genjmp(a);
1629 output_w32(0x1a000000|offset);
1630}
1631void emit_jeq(int a)
1632{
1633 assem_debug("beq %x\n",a);
1634 u_int offset=genjmp(a);
1635 output_w32(0x0a000000|offset);
1636}
1637void emit_js(int a)
1638{
1639 assem_debug("bmi %x\n",a);
1640 u_int offset=genjmp(a);
1641 output_w32(0x4a000000|offset);
1642}
1643void emit_jns(int a)
1644{
1645 assem_debug("bpl %x\n",a);
1646 u_int offset=genjmp(a);
1647 output_w32(0x5a000000|offset);
1648}
1649void emit_jl(int a)
1650{
1651 assem_debug("blt %x\n",a);
1652 u_int offset=genjmp(a);
1653 output_w32(0xba000000|offset);
1654}
1655void emit_jge(int a)
1656{
1657 assem_debug("bge %x\n",a);
1658 u_int offset=genjmp(a);
1659 output_w32(0xaa000000|offset);
1660}
1661void emit_jno(int a)
1662{
1663 assem_debug("bvc %x\n",a);
1664 u_int offset=genjmp(a);
1665 output_w32(0x7a000000|offset);
1666}
1667void emit_jc(int a)
1668{
1669 assem_debug("bcs %x\n",a);
1670 u_int offset=genjmp(a);
1671 output_w32(0x2a000000|offset);
1672}
1673void emit_jcc(int a)
1674{
1675 assem_debug("bcc %x\n",a);
1676 u_int offset=genjmp(a);
1677 output_w32(0x3a000000|offset);
1678}
1679
1680void emit_pushimm(int imm)
1681{
1682 assem_debug("push $%x\n",imm);
1683 assert(0);
1684}
1685void emit_pusha()
1686{
1687 assem_debug("pusha\n");
1688 assert(0);
1689}
1690void emit_popa()
1691{
1692 assem_debug("popa\n");
1693 assert(0);
1694}
1695void emit_pushreg(u_int r)
1696{
1697 assem_debug("push %%%s\n",regname[r]);
1698 assert(0);
1699}
1700void emit_popreg(u_int r)
1701{
1702 assem_debug("pop %%%s\n",regname[r]);
1703 assert(0);
1704}
1705void emit_callreg(u_int r)
1706{
1707 assem_debug("call *%%%s\n",regname[r]);
1708 assert(0);
1709}
1710void emit_jmpreg(u_int r)
1711{
1712 assem_debug("mov pc,%s\n",regname[r]);
1713 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1714}
1715
1716void emit_readword_indexed(int offset, int rs, int rt)
1717{
1718 assert(offset>-4096&&offset<4096);
1719 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1720 if(offset>=0) {
1721 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1722 }else{
1723 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1724 }
1725}
1726void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1727{
1728 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1729 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1730}
1731void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1732{
1733 if(map<0) emit_readword_indexed(addr, rs, rt);
1734 else {
1735 assert(addr==0);
1736 emit_readword_dualindexedx4(rs, map, rt);
1737 }
1738}
1739void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1740{
1741 if(map<0) {
1742 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1743 emit_readword_indexed(addr+4, rs, rl);
1744 }else{
1745 assert(rh!=rs);
1746 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1747 emit_addimm(map,1,map);
1748 emit_readword_indexed_tlb(addr, rs, map, rl);
1749 }
1750}
1751void emit_movsbl_indexed(int offset, int rs, int rt)
1752{
1753 assert(offset>-256&&offset<256);
1754 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1755 if(offset>=0) {
1756 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1757 }else{
1758 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1759 }
1760}
1761void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1762{
1763 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1764 else {
1765 if(addr==0) {
1766 emit_shlimm(map,2,map);
1767 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1768 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1769 }else{
1770 assert(addr>-256&&addr<256);
1771 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1772 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1773 emit_movsbl_indexed(addr, rt, rt);
1774 }
1775 }
1776}
1777void emit_movswl_indexed(int offset, int rs, int rt)
1778{
1779 assert(offset>-256&&offset<256);
1780 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1781 if(offset>=0) {
1782 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1783 }else{
1784 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1785 }
1786}
1787void emit_movzbl_indexed(int offset, int rs, int rt)
1788{
1789 assert(offset>-4096&&offset<4096);
1790 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1791 if(offset>=0) {
1792 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1793 }else{
1794 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1795 }
1796}
1797void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1798{
1799 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1800 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1801}
1802void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1803{
1804 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1805 else {
1806 if(addr==0) {
1807 emit_movzbl_dualindexedx4(rs, map, rt);
1808 }else{
1809 emit_addimm(rs,addr,rt);
1810 emit_movzbl_dualindexedx4(rt, map, rt);
1811 }
1812 }
1813}
1814void emit_movzwl_indexed(int offset, int rs, int rt)
1815{
1816 assert(offset>-256&&offset<256);
1817 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1818 if(offset>=0) {
1819 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1820 }else{
1821 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1822 }
1823}
1824void emit_readword(int addr, int rt)
1825{
1826 u_int offset = addr-(u_int)&dynarec_local;
1827 assert(offset<4096);
1828 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1829 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1830}
1831void emit_movsbl(int addr, int rt)
1832{
1833 u_int offset = addr-(u_int)&dynarec_local;
1834 assert(offset<256);
1835 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1836 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1837}
1838void emit_movswl(int addr, int rt)
1839{
1840 u_int offset = addr-(u_int)&dynarec_local;
1841 assert(offset<256);
1842 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1843 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1844}
1845void emit_movzbl(int addr, int rt)
1846{
1847 u_int offset = addr-(u_int)&dynarec_local;
1848 assert(offset<4096);
1849 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1850 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1851}
1852void emit_movzwl(int addr, int rt)
1853{
1854 u_int offset = addr-(u_int)&dynarec_local;
1855 assert(offset<256);
1856 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1857 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1858}
1859void emit_movzwl_reg(int rs, int rt)
1860{
1861 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1862 assert(0);
1863}
1864
1865void emit_xchg(int rs, int rt)
1866{
1867 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1868 assert(0);
1869}
1870void emit_writeword_indexed(int rt, int offset, int rs)
1871{
1872 assert(offset>-4096&&offset<4096);
1873 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1874 if(offset>=0) {
1875 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1876 }else{
1877 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1878 }
1879}
1880void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1881{
1882 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1883 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1884}
1885void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1886{
1887 if(map<0) emit_writeword_indexed(rt, addr, rs);
1888 else {
1889 assert(addr==0);
1890 emit_writeword_dualindexedx4(rt, rs, map);
1891 }
1892}
1893void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1894{
1895 if(map<0) {
1896 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1897 emit_writeword_indexed(rl, addr+4, rs);
1898 }else{
1899 assert(rh>=0);
1900 if(temp!=rs) emit_addimm(map,1,temp);
1901 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1902 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1903 else {
1904 emit_addimm(rs,4,rs);
1905 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1906 }
1907 }
1908}
1909void emit_writehword_indexed(int rt, int offset, int rs)
1910{
1911 assert(offset>-256&&offset<256);
1912 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1913 if(offset>=0) {
1914 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1915 }else{
1916 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1917 }
1918}
1919void emit_writebyte_indexed(int rt, int offset, int rs)
1920{
1921 assert(offset>-4096&&offset<4096);
1922 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1923 if(offset>=0) {
1924 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1925 }else{
1926 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1927 }
1928}
1929void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1930{
1931 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1932 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1933}
1934void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1935{
1936 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1937 else {
1938 if(addr==0) {
1939 emit_writebyte_dualindexedx4(rt, rs, map);
1940 }else{
1941 emit_addimm(rs,addr,temp);
1942 emit_writebyte_dualindexedx4(rt, temp, map);
1943 }
1944 }
1945}
1946void emit_writeword(int rt, int addr)
1947{
1948 u_int offset = addr-(u_int)&dynarec_local;
1949 assert(offset<4096);
1950 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1951 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1952}
1953void emit_writehword(int rt, int addr)
1954{
1955 u_int offset = addr-(u_int)&dynarec_local;
1956 assert(offset<256);
1957 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1958 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1959}
1960void emit_writebyte(int rt, int addr)
1961{
1962 u_int offset = addr-(u_int)&dynarec_local;
1963 assert(offset<4096);
74426039 1964 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 1965 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1966}
1967void emit_writeword_imm(int imm, int addr)
1968{
1969 assem_debug("movl $%x,%x\n",imm,addr);
1970 assert(0);
1971}
1972void emit_writebyte_imm(int imm, int addr)
1973{
1974 assem_debug("movb $%x,%x\n",imm,addr);
1975 assert(0);
1976}
1977
1978void emit_mul(int rs)
1979{
1980 assem_debug("mul %%%s\n",regname[rs]);
1981 assert(0);
1982}
1983void emit_imul(int rs)
1984{
1985 assem_debug("imul %%%s\n",regname[rs]);
1986 assert(0);
1987}
1988void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1989{
1990 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1991 assert(rs1<16);
1992 assert(rs2<16);
1993 assert(hi<16);
1994 assert(lo<16);
1995 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1996}
1997void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1998{
1999 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2000 assert(rs1<16);
2001 assert(rs2<16);
2002 assert(hi<16);
2003 assert(lo<16);
2004 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2005}
2006
2007void emit_div(int rs)
2008{
2009 assem_debug("div %%%s\n",regname[rs]);
2010 assert(0);
2011}
2012void emit_idiv(int rs)
2013{
2014 assem_debug("idiv %%%s\n",regname[rs]);
2015 assert(0);
2016}
2017void emit_cdq()
2018{
2019 assem_debug("cdq\n");
2020 assert(0);
2021}
2022
2023void emit_clz(int rs,int rt)
2024{
2025 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2026 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2027}
2028
2029void emit_subcs(int rs1,int rs2,int rt)
2030{
2031 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2032 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2033}
2034
2035void emit_shrcc_imm(int rs,u_int imm,int rt)
2036{
2037 assert(imm>0);
2038 assert(imm<32);
2039 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2040 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2041}
2042
2043void emit_negmi(int rs, int rt)
2044{
2045 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2046 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2047}
2048
2049void emit_negsmi(int rs, int rt)
2050{
2051 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2052 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2053}
2054
2055void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2056{
2057 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2058 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2059}
2060
2061void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2062{
2063 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2064 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2065}
2066
2067void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2068{
2069 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2070 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2071}
2072
2073void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2074{
2075 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2076 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2077}
2078
2079void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2080{
2081 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2082 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2083}
2084
2085void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2086{
2087 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2088 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2089}
2090
2091void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2092{
2093 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2094 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2095}
2096
2097void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2098{
2099 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2100 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2101}
2102
2103void emit_teq(int rs, int rt)
2104{
2105 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2106 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2107}
2108
2109void emit_rsbimm(int rs, int imm, int rt)
2110{
2111 u_int armval;
cfbd3c6e 2112 genimm_checked(imm,&armval);
57871462 2113 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2114 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2115}
2116
2117// Load 2 immediates optimizing for small code size
2118void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2119{
2120 emit_movimm(imm1,rt1);
2121 u_int armval;
2122 if(genimm(imm2-imm1,&armval)) {
2123 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2124 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2125 }else if(genimm(imm1-imm2,&armval)) {
2126 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2127 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2128 }
2129 else emit_movimm(imm2,rt2);
2130}
2131
2132// Conditionally select one of two immediates, optimizing for small code size
2133// This will only be called if HAVE_CMOV_IMM is defined
2134void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2135{
2136 u_int armval;
2137 if(genimm(imm2-imm1,&armval)) {
2138 emit_movimm(imm1,rt);
2139 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2140 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2141 }else if(genimm(imm1-imm2,&armval)) {
2142 emit_movimm(imm1,rt);
2143 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2144 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2145 }
2146 else {
2147 #ifdef ARMv5_ONLY
2148 emit_movimm(imm1,rt);
2149 add_literal((int)out,imm2);
2150 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2151 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2152 #else
2153 emit_movw(imm1&0x0000FFFF,rt);
2154 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2155 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2156 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2157 }
2158 emit_movt(imm1&0xFFFF0000,rt);
2159 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2160 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2161 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2162 }
2163 #endif
2164 }
2165}
2166
2167// special case for checking invalid_code
2168void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2169{
2170 assert(0);
2171}
2172
2173// special case for checking invalid_code
2174void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2175{
2176 assert(imm<128&&imm>=0);
2177 assert(r>=0&&r<16);
2178 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2179 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2180 emit_cmpimm(HOST_TEMPREG,imm);
2181}
2182
2183// special case for tlb mapping
2184void emit_addsr12(int rs1,int rs2,int rt)
2185{
2186 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2187 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2188}
2189
2190// Used to preload hash table entries
2191void emit_prefetch(void *addr)
2192{
2193 assem_debug("prefetch %x\n",(int)addr);
2194 output_byte(0x0F);
2195 output_byte(0x18);
2196 output_modrm(0,5,1);
2197 output_w32((int)addr);
2198}
2199void emit_prefetchreg(int r)
2200{
2201 assem_debug("pld %s\n",regname[r]);
2202 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2203}
2204
2205// Special case for mini_ht
2206void emit_ldreq_indexed(int rs, u_int offset, int rt)
2207{
2208 assert(offset<4096);
2209 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2210 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2211}
2212
2213void emit_flds(int r,int sr)
2214{
2215 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2216 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2217}
2218
2219void emit_vldr(int r,int vr)
2220{
2221 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2222 output_w32(0xed900b00|(vr<<12)|(r<<16));
2223}
2224
2225void emit_fsts(int sr,int r)
2226{
2227 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2228 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2229}
2230
2231void emit_vstr(int vr,int r)
2232{
2233 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2234 output_w32(0xed800b00|(vr<<12)|(r<<16));
2235}
2236
2237void emit_ftosizs(int s,int d)
2238{
2239 assem_debug("ftosizs s%d,s%d\n",d,s);
2240 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2241}
2242
2243void emit_ftosizd(int s,int d)
2244{
2245 assem_debug("ftosizd s%d,d%d\n",d,s);
2246 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2247}
2248
2249void emit_fsitos(int s,int d)
2250{
2251 assem_debug("fsitos s%d,s%d\n",d,s);
2252 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2253}
2254
2255void emit_fsitod(int s,int d)
2256{
2257 assem_debug("fsitod d%d,s%d\n",d,s);
2258 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2259}
2260
2261void emit_fcvtds(int s,int d)
2262{
2263 assem_debug("fcvtds d%d,s%d\n",d,s);
2264 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2265}
2266
2267void emit_fcvtsd(int s,int d)
2268{
2269 assem_debug("fcvtsd s%d,d%d\n",d,s);
2270 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2271}
2272
2273void emit_fsqrts(int s,int d)
2274{
2275 assem_debug("fsqrts d%d,s%d\n",d,s);
2276 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2277}
2278
2279void emit_fsqrtd(int s,int d)
2280{
2281 assem_debug("fsqrtd s%d,d%d\n",d,s);
2282 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2283}
2284
2285void emit_fabss(int s,int d)
2286{
2287 assem_debug("fabss d%d,s%d\n",d,s);
2288 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2289}
2290
2291void emit_fabsd(int s,int d)
2292{
2293 assem_debug("fabsd s%d,d%d\n",d,s);
2294 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2295}
2296
2297void emit_fnegs(int s,int d)
2298{
2299 assem_debug("fnegs d%d,s%d\n",d,s);
2300 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2301}
2302
2303void emit_fnegd(int s,int d)
2304{
2305 assem_debug("fnegd s%d,d%d\n",d,s);
2306 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2307}
2308
2309void emit_fadds(int s1,int s2,int d)
2310{
2311 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2312 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2313}
2314
2315void emit_faddd(int s1,int s2,int d)
2316{
2317 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2318 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2319}
2320
2321void emit_fsubs(int s1,int s2,int d)
2322{
2323 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2324 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2325}
2326
2327void emit_fsubd(int s1,int s2,int d)
2328{
2329 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2330 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2331}
2332
2333void emit_fmuls(int s1,int s2,int d)
2334{
2335 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2336 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2337}
2338
2339void emit_fmuld(int s1,int s2,int d)
2340{
2341 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2342 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2343}
2344
2345void emit_fdivs(int s1,int s2,int d)
2346{
2347 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2348 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2349}
2350
2351void emit_fdivd(int s1,int s2,int d)
2352{
2353 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2354 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2355}
2356
2357void emit_fcmps(int x,int y)
2358{
2359 assem_debug("fcmps s14, s15\n");
2360 output_w32(0xeeb47a67);
2361}
2362
2363void emit_fcmpd(int x,int y)
2364{
2365 assem_debug("fcmpd d6, d7\n");
2366 output_w32(0xeeb46b47);
2367}
2368
2369void emit_fmstat()
2370{
2371 assem_debug("fmstat\n");
2372 output_w32(0xeef1fa10);
2373}
2374
2375void emit_bicne_imm(int rs,int imm,int rt)
2376{
2377 u_int armval;
cfbd3c6e 2378 genimm_checked(imm,&armval);
57871462 2379 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2380 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2381}
2382
2383void emit_biccs_imm(int rs,int imm,int rt)
2384{
2385 u_int armval;
cfbd3c6e 2386 genimm_checked(imm,&armval);
57871462 2387 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2388 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2389}
2390
2391void emit_bicvc_imm(int rs,int imm,int rt)
2392{
2393 u_int armval;
cfbd3c6e 2394 genimm_checked(imm,&armval);
57871462 2395 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2396 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2397}
2398
2399void emit_bichi_imm(int rs,int imm,int rt)
2400{
2401 u_int armval;
cfbd3c6e 2402 genimm_checked(imm,&armval);
57871462 2403 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2404 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2405}
2406
2407void emit_orrvs_imm(int rs,int imm,int rt)
2408{
2409 u_int armval;
cfbd3c6e 2410 genimm_checked(imm,&armval);
57871462 2411 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2412 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2413}
2414
b9b61529 2415void emit_orrne_imm(int rs,int imm,int rt)
2416{
2417 u_int armval;
cfbd3c6e 2418 genimm_checked(imm,&armval);
b9b61529 2419 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2420 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2421}
2422
2423void emit_andne_imm(int rs,int imm,int rt)
2424{
2425 u_int armval;
cfbd3c6e 2426 genimm_checked(imm,&armval);
b9b61529 2427 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2428 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2429}
2430
57871462 2431void emit_jno_unlikely(int a)
2432{
2433 //emit_jno(a);
2434 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2435 output_w32(0x72800000|rd_rn_rm(15,15,0));
2436}
2437
2438// Save registers before function call
2439void save_regs(u_int reglist)
2440{
2441 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2442 if(!reglist) return;
2443 assem_debug("stmia fp,{");
2444 if(reglist&1) assem_debug("r0, ");
2445 if(reglist&2) assem_debug("r1, ");
2446 if(reglist&4) assem_debug("r2, ");
2447 if(reglist&8) assem_debug("r3, ");
2448 if(reglist&0x1000) assem_debug("r12");
2449 assem_debug("}\n");
2450 output_w32(0xe88b0000|reglist);
2451}
2452// Restore registers after function call
2453void restore_regs(u_int reglist)
2454{
2455 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2456 if(!reglist) return;
2457 assem_debug("ldmia fp,{");
2458 if(reglist&1) assem_debug("r0, ");
2459 if(reglist&2) assem_debug("r1, ");
2460 if(reglist&4) assem_debug("r2, ");
2461 if(reglist&8) assem_debug("r3, ");
2462 if(reglist&0x1000) assem_debug("r12");
2463 assem_debug("}\n");
2464 output_w32(0xe89b0000|reglist);
2465}
2466
2467// Write back consts using r14 so we don't disturb the other registers
2468void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2469{
2470 int hr;
2471 for(hr=0;hr<HOST_REGS;hr++) {
2472 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2473 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2474 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2475 int value=constmap[i][hr];
2476 if(value==0) {
2477 emit_zeroreg(HOST_TEMPREG);
2478 }
2479 else {
2480 emit_movimm(value,HOST_TEMPREG);
2481 }
2482 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2483#ifndef FORCE32
57871462 2484 if((i_is32>>i_regmap[hr])&1) {
2485 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2486 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2487 }
24385cae 2488#endif
57871462 2489 }
2490 }
2491 }
2492 }
2493}
2494
2495/* Stubs/epilogue */
2496
2497void literal_pool(int n)
2498{
2499 if(!literalcount) return;
2500 if(n) {
2501 if((int)out-literals[0][0]<4096-n) return;
2502 }
2503 u_int *ptr;
2504 int i;
2505 for(i=0;i<literalcount;i++)
2506 {
2507 ptr=(u_int *)literals[i][0];
2508 u_int offset=(u_int)out-(u_int)ptr-8;
2509 assert(offset<4096);
2510 assert(!(offset&3));
2511 *ptr|=offset;
2512 output_w32(literals[i][1]);
2513 }
2514 literalcount=0;
2515}
2516
2517void literal_pool_jumpover(int n)
2518{
2519 if(!literalcount) return;
2520 if(n) {
2521 if((int)out-literals[0][0]<4096-n) return;
2522 }
2523 int jaddr=(int)out;
2524 emit_jmp(0);
2525 literal_pool(0);
2526 set_jump_target(jaddr,(int)out);
2527}
2528
2529emit_extjump2(int addr, int target, int linker)
2530{
2531 u_char *ptr=(u_char *)addr;
2532 assert((ptr[3]&0x0e)==0xa);
2533 emit_loadlp(target,0);
2534 emit_loadlp(addr,1);
24385cae 2535 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2536 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2537//DEBUG >
2538#ifdef DEBUG_CYCLE_COUNT
2539 emit_readword((int)&last_count,ECX);
2540 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2541 emit_readword((int)&next_interupt,ECX);
2542 emit_writeword(HOST_CCREG,(int)&Count);
2543 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2544 emit_writeword(ECX,(int)&last_count);
2545#endif
2546//DEBUG <
2547 emit_jmp(linker);
2548}
2549
2550emit_extjump(int addr, int target)
2551{
2552 emit_extjump2(addr, target, (int)dyna_linker);
2553}
2554emit_extjump_ds(int addr, int target)
2555{
2556 emit_extjump2(addr, target, (int)dyna_linker_ds);
2557}
2558
2559do_readstub(int n)
2560{
2561 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2562 literal_pool(256);
2563 set_jump_target(stubs[n][1],(int)out);
2564 int type=stubs[n][0];
2565 int i=stubs[n][3];
2566 int rs=stubs[n][4];
2567 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2568 u_int reglist=stubs[n][7];
2569 signed char *i_regmap=i_regs->regmap;
2570 int addr=get_reg(i_regmap,AGEN1+(i&1));
2571 int rth,rt;
2572 int ds;
b9b61529 2573 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2574 rth=get_reg(i_regmap,FTEMP|64);
2575 rt=get_reg(i_regmap,FTEMP);
2576 }else{
2577 rth=get_reg(i_regmap,rt1[i]|64);
2578 rt=get_reg(i_regmap,rt1[i]);
2579 }
2580 assert(rs>=0);
57871462 2581 if(addr<0) addr=rt;
f18c0f46 2582 if(addr<0)
2583 // assume dummy read, no alloced reg
2584 addr=get_reg(i_regmap,-1);
57871462 2585 assert(addr>=0);
2586 int ftable=0;
2587 if(type==LOADB_STUB||type==LOADBU_STUB)
2588 ftable=(int)readmemb;
2589 if(type==LOADH_STUB||type==LOADHU_STUB)
2590 ftable=(int)readmemh;
2591 if(type==LOADW_STUB)
2592 ftable=(int)readmem;
24385cae 2593#ifndef FORCE32
57871462 2594 if(type==LOADD_STUB)
2595 ftable=(int)readmemd;
24385cae 2596#endif
2597 assert(ftable!=0);
57871462 2598 emit_writeword(rs,(int)&address);
2599 //emit_pusha();
2600 save_regs(reglist);
2601 ds=i_regs!=&regs[i];
2602 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2603 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2604 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2605 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2606 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2607 emit_shrimm(rs,16,1);
2608 int cc=get_reg(i_regmap,CCREG);
2609 if(cc<0) {
2610 emit_loadreg(CCREG,2);
2611 }
2612 emit_movimm(ftable,0);
2613 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2614 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2615 //emit_readword((int)&last_count,temp);
2616 //emit_add(cc,temp,cc);
2617 //emit_writeword(cc,(int)&Count);
2618 //emit_mov(15,14);
2619 emit_call((int)&indirect_jump_indexed);
2620 //emit_callreg(rs);
2621 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2622 // We really shouldn't need to update the count here,
2623 // but not doing so causes random crashes...
2624 emit_readword((int)&Count,HOST_TEMPREG);
2625 emit_readword((int)&next_interupt,2);
2626 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2627 emit_writeword(2,(int)&last_count);
2628 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2629 if(cc<0) {
2630 emit_storereg(CCREG,HOST_TEMPREG);
2631 }
2632 //emit_popa();
2633 restore_regs(reglist);
2634 //if((cc=get_reg(regmap,CCREG))>=0) {
2635 // emit_loadreg(CCREG,cc);
2636 //}
f18c0f46 2637 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2638 assert(rt>=0);
2639 if(type==LOADB_STUB)
2640 emit_movsbl((int)&readmem_dword,rt);
2641 if(type==LOADBU_STUB)
2642 emit_movzbl((int)&readmem_dword,rt);
2643 if(type==LOADH_STUB)
2644 emit_movswl((int)&readmem_dword,rt);
2645 if(type==LOADHU_STUB)
2646 emit_movzwl((int)&readmem_dword,rt);
2647 if(type==LOADW_STUB)
2648 emit_readword((int)&readmem_dword,rt);
2649 if(type==LOADD_STUB) {
2650 emit_readword((int)&readmem_dword,rt);
2651 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2652 }
57871462 2653 }
2654 emit_jmp(stubs[n][2]); // return address
2655}
2656
2657inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2658{
2659 int rs=get_reg(regmap,target);
2660 int rth=get_reg(regmap,target|64);
2661 int rt=get_reg(regmap,target);
fd99c415 2662 // allow for PCSX dummy reads
2663 //assert(rt>=0);
2664 if(rs<0)
2665 rs=get_reg(regmap,-1);
57871462 2666 assert(rs>=0);
57871462 2667 int ftable=0;
2668 if(type==LOADB_STUB||type==LOADBU_STUB)
2669 ftable=(int)readmemb;
2670 if(type==LOADH_STUB||type==LOADHU_STUB)
2671 ftable=(int)readmemh;
2672 if(type==LOADW_STUB)
2673 ftable=(int)readmem;
24385cae 2674#ifndef FORCE32
57871462 2675 if(type==LOADD_STUB)
2676 ftable=(int)readmemd;
24385cae 2677#endif
2678 assert(ftable!=0);
fd99c415 2679 if(target==0)
2680 emit_movimm(addr,rs);
57871462 2681 emit_writeword(rs,(int)&address);
2682 //emit_pusha();
2683 save_regs(reglist);
2684 //emit_shrimm(rs,16,1);
2685 int cc=get_reg(regmap,CCREG);
2686 if(cc<0) {
2687 emit_loadreg(CCREG,2);
2688 }
2689 //emit_movimm(ftable,0);
2690 emit_movimm(((u_int *)ftable)[addr>>16],0);
2691 //emit_readword((int)&last_count,12);
2692 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2693 if((signed int)addr>=(signed int)0xC0000000) {
2694 // Pagefault address
2695 int ds=regmap!=regs[i].regmap;
2696 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2697 }
2698 //emit_add(12,2,2);
2699 //emit_writeword(2,(int)&Count);
2700 //emit_call(((u_int *)ftable)[addr>>16]);
2701 emit_call((int)&indirect_jump);
2702 // We really shouldn't need to update the count here,
2703 // but not doing so causes random crashes...
2704 emit_readword((int)&Count,HOST_TEMPREG);
2705 emit_readword((int)&next_interupt,2);
2706 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2707 emit_writeword(2,(int)&last_count);
2708 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2709 if(cc<0) {
2710 emit_storereg(CCREG,HOST_TEMPREG);
2711 }
2712 //emit_popa();
2713 restore_regs(reglist);
fd99c415 2714 if(rt>=0) {
2715 if(type==LOADB_STUB)
2716 emit_movsbl((int)&readmem_dword,rt);
2717 if(type==LOADBU_STUB)
2718 emit_movzbl((int)&readmem_dword,rt);
2719 if(type==LOADH_STUB)
2720 emit_movswl((int)&readmem_dword,rt);
2721 if(type==LOADHU_STUB)
2722 emit_movzwl((int)&readmem_dword,rt);
2723 if(type==LOADW_STUB)
2724 emit_readword((int)&readmem_dword,rt);
2725 if(type==LOADD_STUB) {
2726 emit_readword((int)&readmem_dword,rt);
2727 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2728 }
57871462 2729 }
2730}
2731
2732do_writestub(int n)
2733{
2734 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2735 literal_pool(256);
2736 set_jump_target(stubs[n][1],(int)out);
2737 int type=stubs[n][0];
2738 int i=stubs[n][3];
2739 int rs=stubs[n][4];
2740 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2741 u_int reglist=stubs[n][7];
2742 signed char *i_regmap=i_regs->regmap;
2743 int addr=get_reg(i_regmap,AGEN1+(i&1));
2744 int rth,rt,r;
2745 int ds;
b9b61529 2746 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2747 rth=get_reg(i_regmap,FTEMP|64);
2748 rt=get_reg(i_regmap,r=FTEMP);
2749 }else{
2750 rth=get_reg(i_regmap,rs2[i]|64);
2751 rt=get_reg(i_regmap,r=rs2[i]);
2752 }
2753 assert(rs>=0);
2754 assert(rt>=0);
2755 if(addr<0) addr=get_reg(i_regmap,-1);
2756 assert(addr>=0);
2757 int ftable=0;
2758 if(type==STOREB_STUB)
2759 ftable=(int)writememb;
2760 if(type==STOREH_STUB)
2761 ftable=(int)writememh;
2762 if(type==STOREW_STUB)
2763 ftable=(int)writemem;
24385cae 2764#ifndef FORCE32
57871462 2765 if(type==STORED_STUB)
2766 ftable=(int)writememd;
24385cae 2767#endif
2768 assert(ftable!=0);
57871462 2769 emit_writeword(rs,(int)&address);
2770 //emit_shrimm(rs,16,rs);
2771 //emit_movmem_indexedx4(ftable,rs,rs);
2772 if(type==STOREB_STUB)
2773 emit_writebyte(rt,(int)&byte);
2774 if(type==STOREH_STUB)
2775 emit_writehword(rt,(int)&hword);
2776 if(type==STOREW_STUB)
2777 emit_writeword(rt,(int)&word);
2778 if(type==STORED_STUB) {
3d624f89 2779#ifndef FORCE32
57871462 2780 emit_writeword(rt,(int)&dword);
2781 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2782#else
2783 printf("STORED_STUB\n");
2784#endif
57871462 2785 }
2786 //emit_pusha();
2787 save_regs(reglist);
2788 ds=i_regs!=&regs[i];
2789 int real_rs=get_reg(i_regmap,rs1[i]);
2790 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2791 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2792 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2793 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2794 emit_shrimm(rs,16,1);
2795 int cc=get_reg(i_regmap,CCREG);
2796 if(cc<0) {
2797 emit_loadreg(CCREG,2);
2798 }
2799 emit_movimm(ftable,0);
2800 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2801 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2802 //emit_readword((int)&last_count,temp);
2803 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2804 //emit_add(cc,temp,cc);
2805 //emit_writeword(cc,(int)&Count);
2806 emit_call((int)&indirect_jump_indexed);
2807 //emit_callreg(rs);
2808 emit_readword((int)&Count,HOST_TEMPREG);
2809 emit_readword((int)&next_interupt,2);
2810 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2811 emit_writeword(2,(int)&last_count);
2812 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2813 if(cc<0) {
2814 emit_storereg(CCREG,HOST_TEMPREG);
2815 }
2816 //emit_popa();
2817 restore_regs(reglist);
2818 //if((cc=get_reg(regmap,CCREG))>=0) {
2819 // emit_loadreg(CCREG,cc);
2820 //}
2821 emit_jmp(stubs[n][2]); // return address
2822}
2823
2824inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2825{
2826 int rs=get_reg(regmap,-1);
2827 int rth=get_reg(regmap,target|64);
2828 int rt=get_reg(regmap,target);
2829 assert(rs>=0);
2830 assert(rt>=0);
2831 int ftable=0;
2832 if(type==STOREB_STUB)
2833 ftable=(int)writememb;
2834 if(type==STOREH_STUB)
2835 ftable=(int)writememh;
2836 if(type==STOREW_STUB)
2837 ftable=(int)writemem;
24385cae 2838#ifndef FORCE32
57871462 2839 if(type==STORED_STUB)
2840 ftable=(int)writememd;
24385cae 2841#endif
2842 assert(ftable!=0);
57871462 2843 emit_writeword(rs,(int)&address);
2844 //emit_shrimm(rs,16,rs);
2845 //emit_movmem_indexedx4(ftable,rs,rs);
2846 if(type==STOREB_STUB)
2847 emit_writebyte(rt,(int)&byte);
2848 if(type==STOREH_STUB)
2849 emit_writehword(rt,(int)&hword);
2850 if(type==STOREW_STUB)
2851 emit_writeword(rt,(int)&word);
2852 if(type==STORED_STUB) {
3d624f89 2853#ifndef FORCE32
57871462 2854 emit_writeword(rt,(int)&dword);
2855 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2856#else
2857 printf("STORED_STUB\n");
2858#endif
57871462 2859 }
2860 //emit_pusha();
2861 save_regs(reglist);
2862 //emit_shrimm(rs,16,1);
2863 int cc=get_reg(regmap,CCREG);
2864 if(cc<0) {
2865 emit_loadreg(CCREG,2);
2866 }
2867 //emit_movimm(ftable,0);
2868 emit_movimm(((u_int *)ftable)[addr>>16],0);
2869 //emit_readword((int)&last_count,12);
2870 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2871 if((signed int)addr>=(signed int)0xC0000000) {
2872 // Pagefault address
2873 int ds=regmap!=regs[i].regmap;
2874 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2875 }
2876 //emit_add(12,2,2);
2877 //emit_writeword(2,(int)&Count);
2878 //emit_call(((u_int *)ftable)[addr>>16]);
2879 emit_call((int)&indirect_jump);
2880 emit_readword((int)&Count,HOST_TEMPREG);
2881 emit_readword((int)&next_interupt,2);
2882 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2883 emit_writeword(2,(int)&last_count);
2884 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2885 if(cc<0) {
2886 emit_storereg(CCREG,HOST_TEMPREG);
2887 }
2888 //emit_popa();
2889 restore_regs(reglist);
2890}
2891
2892do_unalignedwritestub(int n)
2893{
b7918751 2894 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2895 literal_pool(256);
57871462 2896 set_jump_target(stubs[n][1],(int)out);
b7918751 2897
2898 int i=stubs[n][3];
2899 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2900 int addr=stubs[n][5];
2901 u_int reglist=stubs[n][7];
2902 signed char *i_regmap=i_regs->regmap;
2903 int temp2=get_reg(i_regmap,FTEMP);
2904 int rt;
2905 int ds, real_rs;
2906 rt=get_reg(i_regmap,rs2[i]);
2907 assert(rt>=0);
2908 assert(addr>=0);
2909 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2910 reglist|=(1<<addr);
2911 reglist&=~(1<<temp2);
2912
2913 emit_andimm(addr,0xfffffffc,temp2);
2914 emit_writeword(temp2,(int)&address);
2915
2916 save_regs(reglist);
2917 ds=i_regs!=&regs[i];
2918 real_rs=get_reg(i_regmap,rs1[i]);
2919 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2920 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2921 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2922 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2923 emit_shrimm(addr,16,1);
2924 int cc=get_reg(i_regmap,CCREG);
2925 if(cc<0) {
2926 emit_loadreg(CCREG,2);
2927 }
2928 emit_movimm((u_int)readmem,0);
2929 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2930 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3); // XXX: can be rm'd?
2931 emit_call((int)&indirect_jump_indexed);
2932 restore_regs(reglist);
2933
2934 emit_readword((int)&readmem_dword,temp2);
2935 int temp=addr; //hmh
2936 emit_shlimm(addr,3,temp);
2937 emit_andimm(temp,24,temp);
2938#ifdef BIG_ENDIAN_MIPS
2939 if (opcode[i]==0x2e) // SWR
2940#else
2941 if (opcode[i]==0x2a) // SWL
2942#endif
2943 emit_xorimm(temp,24,temp);
2944 emit_movimm(-1,HOST_TEMPREG);
55439448 2945 if (opcode[i]==0x2a) { // SWL
b7918751 2946 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2947 emit_orrshr(rt,temp,temp2);
2948 }else{
2949 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2950 emit_orrshl(rt,temp,temp2);
2951 }
2952 emit_readword((int)&address,addr);
2953 emit_writeword(temp2,(int)&word);
2954 //save_regs(reglist); // don't need to, no state changes
2955 emit_shrimm(addr,16,1);
2956 emit_movimm((u_int)writemem,0);
2957 //emit_call((int)&indirect_jump_indexed);
2958 emit_mov(15,14);
2959 emit_readword_dualindexedx4(0,1,15);
2960 emit_readword((int)&Count,HOST_TEMPREG);
2961 emit_readword((int)&next_interupt,2);
2962 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2963 emit_writeword(2,(int)&last_count);
2964 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2965 if(cc<0) {
2966 emit_storereg(CCREG,HOST_TEMPREG);
2967 }
2968 restore_regs(reglist);
57871462 2969 emit_jmp(stubs[n][2]); // return address
2970}
2971
2972void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2973{
2974 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2975}
2976
2977do_invstub(int n)
2978{
2979 literal_pool(20);
2980 u_int reglist=stubs[n][3];
2981 set_jump_target(stubs[n][1],(int)out);
2982 save_regs(reglist);
2983 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2984 emit_call((int)&invalidate_addr);
2985 restore_regs(reglist);
2986 emit_jmp(stubs[n][2]); // return address
2987}
2988
2989int do_dirty_stub(int i)
2990{
2991 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 2992 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
2993 #ifdef PCSX
2994 addr=(u_int)source;
2995 #endif
57871462 2996 // Careful about the code output here, verify_dirty needs to parse it.
2997 #ifdef ARMv5_ONLY
ac545b3a 2998 emit_loadlp(addr,1);
57871462 2999 emit_loadlp((int)copy,2);
3000 emit_loadlp(slen*4,3);
3001 #else
ac545b3a 3002 emit_movw(addr&0x0000FFFF,1);
57871462 3003 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3004 emit_movt(addr&0xFFFF0000,1);
57871462 3005 emit_movt(((u_int)copy)&0xFFFF0000,2);
3006 emit_movw(slen*4,3);
3007 #endif
3008 emit_movimm(start+i*4,0);
3009 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3010 int entry=(int)out;
3011 load_regs_entry(i);
3012 if(entry==(int)out) entry=instr_addr[i];
3013 emit_jmp(instr_addr[i]);
3014 return entry;
3015}
3016
3017void do_dirty_stub_ds()
3018{
3019 // Careful about the code output here, verify_dirty needs to parse it.
3020 #ifdef ARMv5_ONLY
3021 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3022 emit_loadlp((int)copy,2);
3023 emit_loadlp(slen*4,3);
3024 #else
3025 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3026 emit_movw(((u_int)copy)&0x0000FFFF,2);
3027 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3028 emit_movt(((u_int)copy)&0xFFFF0000,2);
3029 emit_movw(slen*4,3);
3030 #endif
3031 emit_movimm(start+1,0);
3032 emit_call((int)&verify_code_ds);
3033}
3034
3035do_cop1stub(int n)
3036{
3037 literal_pool(256);
3038 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3039 set_jump_target(stubs[n][1],(int)out);
3040 int i=stubs[n][3];
3d624f89 3041// int rs=stubs[n][4];
57871462 3042 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3043 int ds=stubs[n][6];
3044 if(!ds) {
3045 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3046 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3047 }
3048 //else {printf("fp exception in delay slot\n");}
3049 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3050 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3051 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3052 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3053 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3054}
3055
3056/* TLB */
3057
3058int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3059{
3060 if(c) {
3061 if((signed int)addr>=(signed int)0xC0000000) {
3062 // address_generation already loaded the const
3063 emit_readword_dualindexedx4(FP,map,map);
3064 }
3065 else
3066 return -1; // No mapping
3067 }
3068 else {
3069 assert(s!=map);
3070 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3071 emit_addsr12(map,s,map);
3072 // Schedule this while we wait on the load
3073 //if(x) emit_xorimm(s,x,ar);
3074 if(shift>=0) emit_shlimm(s,3,shift);
3075 if(~a) emit_andimm(s,a,ar);
3076 emit_readword_dualindexedx4(FP,map,map);
3077 }
3078 return map;
3079}
3080int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3081{
3082 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3083 emit_test(map,map);
3084 *jaddr=(int)out;
3085 emit_js(0);
3086 }
3087 return map;
3088}
3089
3090int gen_tlb_addr_r(int ar, int map) {
3091 if(map>=0) {
3092 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3093 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3094 }
3095}
3096
3097int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3098{
3099 if(c) {
3100 if(addr<0x80800000||addr>=0xC0000000) {
3101 // address_generation already loaded the const
3102 emit_readword_dualindexedx4(FP,map,map);
3103 }
3104 else
3105 return -1; // No mapping
3106 }
3107 else {
3108 assert(s!=map);
3109 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3110 emit_addsr12(map,s,map);
3111 // Schedule this while we wait on the load
3112 //if(x) emit_xorimm(s,x,ar);
3113 emit_readword_dualindexedx4(FP,map,map);
3114 }
3115 return map;
3116}
3117int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3118{
3119 if(!c||addr<0x80800000||addr>=0xC0000000) {
3120 emit_testimm(map,0x40000000);
3121 *jaddr=(int)out;
3122 emit_jne(0);
3123 }
3124}
3125
3126int gen_tlb_addr_w(int ar, int map) {
3127 if(map>=0) {
3128 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3129 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3130 }
3131}
3132
3133// Generate the address of the memory_map entry, relative to dynarec_local
3134generate_map_const(u_int addr,int reg) {
3135 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3136 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3137}
3138
3139/* Special assem */
3140
3141void shift_assemble_arm(int i,struct regstat *i_regs)
3142{
3143 if(rt1[i]) {
3144 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3145 {
3146 signed char s,t,shift;
3147 t=get_reg(i_regs->regmap,rt1[i]);
3148 s=get_reg(i_regs->regmap,rs1[i]);
3149 shift=get_reg(i_regs->regmap,rs2[i]);
3150 if(t>=0){
3151 if(rs1[i]==0)
3152 {
3153 emit_zeroreg(t);
3154 }
3155 else if(rs2[i]==0)
3156 {
3157 assert(s>=0);
3158 if(s!=t) emit_mov(s,t);
3159 }
3160 else
3161 {
3162 emit_andimm(shift,31,HOST_TEMPREG);
3163 if(opcode2[i]==4) // SLLV
3164 {
3165 emit_shl(s,HOST_TEMPREG,t);
3166 }
3167 if(opcode2[i]==6) // SRLV
3168 {
3169 emit_shr(s,HOST_TEMPREG,t);
3170 }
3171 if(opcode2[i]==7) // SRAV
3172 {
3173 emit_sar(s,HOST_TEMPREG,t);
3174 }
3175 }
3176 }
3177 } else { // DSLLV/DSRLV/DSRAV
3178 signed char sh,sl,th,tl,shift;
3179 th=get_reg(i_regs->regmap,rt1[i]|64);
3180 tl=get_reg(i_regs->regmap,rt1[i]);
3181 sh=get_reg(i_regs->regmap,rs1[i]|64);
3182 sl=get_reg(i_regs->regmap,rs1[i]);
3183 shift=get_reg(i_regs->regmap,rs2[i]);
3184 if(tl>=0){
3185 if(rs1[i]==0)
3186 {
3187 emit_zeroreg(tl);
3188 if(th>=0) emit_zeroreg(th);
3189 }
3190 else if(rs2[i]==0)
3191 {
3192 assert(sl>=0);
3193 if(sl!=tl) emit_mov(sl,tl);
3194 if(th>=0&&sh!=th) emit_mov(sh,th);
3195 }
3196 else
3197 {
3198 // FIXME: What if shift==tl ?
3199 assert(shift!=tl);
3200 int temp=get_reg(i_regs->regmap,-1);
3201 int real_th=th;
3202 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3203 assert(sl>=0);
3204 assert(sh>=0);
3205 emit_andimm(shift,31,HOST_TEMPREG);
3206 if(opcode2[i]==0x14) // DSLLV
3207 {
3208 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3209 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3210 emit_orrshr(sl,HOST_TEMPREG,th);
3211 emit_andimm(shift,31,HOST_TEMPREG);
3212 emit_testimm(shift,32);
3213 emit_shl(sl,HOST_TEMPREG,tl);
3214 if(th>=0) emit_cmovne_reg(tl,th);
3215 emit_cmovne_imm(0,tl);
3216 }
3217 if(opcode2[i]==0x16) // DSRLV
3218 {
3219 assert(th>=0);
3220 emit_shr(sl,HOST_TEMPREG,tl);
3221 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3222 emit_orrshl(sh,HOST_TEMPREG,tl);
3223 emit_andimm(shift,31,HOST_TEMPREG);
3224 emit_testimm(shift,32);
3225 emit_shr(sh,HOST_TEMPREG,th);
3226 emit_cmovne_reg(th,tl);
3227 if(real_th>=0) emit_cmovne_imm(0,th);
3228 }
3229 if(opcode2[i]==0x17) // DSRAV
3230 {
3231 assert(th>=0);
3232 emit_shr(sl,HOST_TEMPREG,tl);
3233 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3234 if(real_th>=0) {
3235 assert(temp>=0);
3236 emit_sarimm(th,31,temp);
3237 }
3238 emit_orrshl(sh,HOST_TEMPREG,tl);
3239 emit_andimm(shift,31,HOST_TEMPREG);
3240 emit_testimm(shift,32);
3241 emit_sar(sh,HOST_TEMPREG,th);
3242 emit_cmovne_reg(th,tl);
3243 if(real_th>=0) emit_cmovne_reg(temp,th);
3244 }
3245 }
3246 }
3247 }
3248 }
3249}
3250#define shift_assemble shift_assemble_arm
3251
3252void loadlr_assemble_arm(int i,struct regstat *i_regs)
3253{
3254 int s,th,tl,temp,temp2,addr,map=-1;
3255 int offset;
3256 int jaddr=0;
3257 int memtarget,c=0;
3258 u_int hr,reglist=0;
3259 th=get_reg(i_regs->regmap,rt1[i]|64);
3260 tl=get_reg(i_regs->regmap,rt1[i]);
3261 s=get_reg(i_regs->regmap,rs1[i]);
3262 temp=get_reg(i_regs->regmap,-1);
3263 temp2=get_reg(i_regs->regmap,FTEMP);
3264 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3265 assert(addr<0);
3266 offset=imm[i];
3267 for(hr=0;hr<HOST_REGS;hr++) {
3268 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3269 }
3270 reglist|=1<<temp;
3271 if(offset||s<0||c) addr=temp2;
3272 else addr=s;
3273 if(s>=0) {
3274 c=(i_regs->wasconst>>s)&1;
4cb76aa4 3275 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
57871462 3276 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3277 }
3278 if(tl>=0) {
3279 //assert(tl>=0);
3280 //assert(rt1[i]);
3281 if(!using_tlb) {
3282 if(!c) {
3283 emit_shlimm(addr,3,temp);
3284 if (opcode[i]==0x22||opcode[i]==0x26) {
3285 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3286 }else{
3287 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3288 }
4cb76aa4 3289 emit_cmpimm(addr,RAM_SIZE);
57871462 3290 jaddr=(int)out;
3291 emit_jno(0);
3292 }
3293 else {
3294 if (opcode[i]==0x22||opcode[i]==0x26) {
3295 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3296 }else{
3297 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3298 }
3299 }
3300 }else{ // using tlb
3301 int a;
3302 if(c) {
3303 a=-1;
3304 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3305 a=0xFFFFFFFC; // LWL/LWR
3306 }else{
3307 a=0xFFFFFFF8; // LDL/LDR
3308 }
3309 map=get_reg(i_regs->regmap,TLREG);
3310 assert(map>=0);
3311 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3312 if(c) {
3313 if (opcode[i]==0x22||opcode[i]==0x26) {
3314 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3315 }else{
3316 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3317 }
3318 }
3319 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3320 }
3321 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3322 if(!c||memtarget) {
3323 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3324 emit_readword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2);
3325 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3326 }
3327 else
3328 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3329 emit_andimm(temp,24,temp);
2002a1db 3330#ifdef BIG_ENDIAN_MIPS
3331 if (opcode[i]==0x26) // LWR
3332#else
3333 if (opcode[i]==0x22) // LWL
3334#endif
3335 emit_xorimm(temp,24,temp);
57871462 3336 emit_movimm(-1,HOST_TEMPREG);
3337 if (opcode[i]==0x26) {
3338 emit_shr(temp2,temp,temp2);
3339 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3340 }else{
3341 emit_shl(temp2,temp,temp2);
3342 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3343 }
3344 emit_or(temp2,tl,tl);
3345 //emit_storereg(rt1[i],tl); // DEBUG
3346 }
3347 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2002a1db 3348 // FIXME: little endian
57871462 3349 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3350 if(!c||memtarget) {
3351 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3352 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3353 emit_readdword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2h,temp2);
3354 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3355 }
3356 else
3357 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3358 emit_testimm(temp,32);
3359 emit_andimm(temp,24,temp);
3360 if (opcode[i]==0x1A) { // LDL
3361 emit_rsbimm(temp,32,HOST_TEMPREG);
3362 emit_shl(temp2h,temp,temp2h);
3363 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3364 emit_movimm(-1,HOST_TEMPREG);
3365 emit_shl(temp2,temp,temp2);
3366 emit_cmove_reg(temp2h,th);
3367 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3368 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3369 emit_orreq(temp2,tl,tl);
3370 emit_orrne(temp2,th,th);
3371 }
3372 if (opcode[i]==0x1B) { // LDR
3373 emit_xorimm(temp,24,temp);
3374 emit_rsbimm(temp,32,HOST_TEMPREG);
3375 emit_shr(temp2,temp,temp2);
3376 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3377 emit_movimm(-1,HOST_TEMPREG);
3378 emit_shr(temp2h,temp,temp2h);
3379 emit_cmovne_reg(temp2,tl);
3380 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3381 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3382 emit_orrne(temp2h,th,th);
3383 emit_orreq(temp2h,tl,tl);
3384 }
3385 }
3386 }
3387}
3388#define loadlr_assemble loadlr_assemble_arm
3389
3390void cop0_assemble(int i,struct regstat *i_regs)
3391{
3392 if(opcode2[i]==0) // MFC0
3393 {
3394 signed char t=get_reg(i_regs->regmap,rt1[i]);
3395 char copr=(source[i]>>11)&0x1f;
3396 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3397 if(t>=0&&rt1[i]!=0) {
7139f3c8 3398#ifdef MUPEN64
57871462 3399 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3400 emit_movimm((source[i]>>11)&0x1f,1);
3401 emit_writeword(0,(int)&PC);
3402 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3403 if(copr==9) {
3404 emit_readword((int)&last_count,ECX);
3405 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3406 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3407 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3408 emit_writeword(HOST_CCREG,(int)&Count);
3409 }
3410 emit_call((int)MFC0);
3411 emit_readword((int)&readmem_dword,t);
7139f3c8 3412#else
3413 emit_readword((int)&reg_cop0+copr*4,t);
3414#endif
57871462 3415 }
3416 }
3417 else if(opcode2[i]==4) // MTC0
3418 {
3419 signed char s=get_reg(i_regs->regmap,rs1[i]);
3420 char copr=(source[i]>>11)&0x1f;
3421 assert(s>=0);
3422 emit_writeword(s,(int)&readmem_dword);
3423 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
fca1aef2 3424#ifdef MUPEN64
57871462 3425 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3426 emit_movimm((source[i]>>11)&0x1f,1);
3427 emit_writeword(0,(int)&PC);
3428 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
7139f3c8 3429#endif
3430 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3431 emit_readword((int)&last_count,ECX);
3432 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3433 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3434 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3435 emit_writeword(HOST_CCREG,(int)&Count);
3436 }
3437 // What a mess. The status register (12) can enable interrupts,
3438 // so needs a special case to handle a pending interrupt.
3439 // The interrupt must be taken immediately, because a subsequent
3440 // instruction might disable interrupts again.
7139f3c8 3441 if(copr==12||copr==13) {
fca1aef2 3442#ifdef PCSX
3443 if (is_delayslot) {
3444 // burn cycles to cause cc_interrupt, which will
3445 // reschedule next_interupt. Relies on CCREG from above.
3446 assem_debug("MTC0 DS %d\n", copr);
3447 emit_writeword(HOST_CCREG,(int)&last_count);
3448 emit_movimm(0,HOST_CCREG);
3449 emit_storereg(CCREG,HOST_CCREG);
3450 emit_movimm(copr,0);
3451 emit_call((int)pcsx_mtc0_ds);
3452 return;
3453 }
3454#endif
57871462 3455 emit_movimm(start+i*4+4,0);
3456 emit_movimm(0,1);
3457 emit_writeword(0,(int)&pcaddr);
3458 emit_writeword(1,(int)&pending_exception);
3459 }
3460 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3461 //else
fca1aef2 3462#ifdef PCSX
3463 emit_movimm(copr,0);
3464 emit_call((int)pcsx_mtc0);
3465#else
57871462 3466 emit_call((int)MTC0);
fca1aef2 3467#endif
7139f3c8 3468 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3469 emit_readword((int)&Count,HOST_CCREG);
3470 emit_readword((int)&next_interupt,ECX);
3471 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3472 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3473 emit_writeword(ECX,(int)&last_count);
3474 emit_storereg(CCREG,HOST_CCREG);
3475 }
7139f3c8 3476 if(copr==12||copr==13) {
57871462 3477 assert(!is_delayslot);
3478 emit_readword((int)&pending_exception,14);
3479 }