drc: remove funcs from assert()s to allow NDEBUG build
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69#include "fpu.h"
70
71/* Linker */
72
73void set_jump_target(int addr,u_int target)
74{
75 u_char *ptr=(u_char *)addr;
76 u_int *ptr2=(u_int *)ptr;
77 if(ptr[3]==0xe2) {
78 assert((target-(u_int)ptr2-8)<1024);
79 assert((addr&3)==0);
80 assert((target&3)==0);
81 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
82 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
83 }
84 else if(ptr[3]==0x72) {
85 // generated by emit_jno_unlikely
86 if((target-(u_int)ptr2-8)<1024) {
87 assert((addr&3)==0);
88 assert((target&3)==0);
89 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
90 }
91 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
92 assert((addr&3)==0);
93 assert((target&3)==0);
94 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
95 }
96 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
97 }
98 else {
99 assert((ptr[3]&0x0e)==0xa);
100 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
101 }
102}
103
104// This optionally copies the instruction from the target of the branch into
105// the space before the branch. Works, but the difference in speed is
106// usually insignificant.
107void set_jump_target_fillslot(int addr,u_int target,int copy)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 assert(!copy||ptr2[-1]==0xe28dd000);
112 if(ptr[3]==0xe2) {
113 assert(!copy);
114 assert((target-(u_int)ptr2-8)<4096);
115 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
116 }
117 else {
118 assert((ptr[3]&0x0e)==0xa);
119 u_int target_insn=*(u_int *)target;
120 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
121 copy=0;
122 }
123 if((target_insn&0x0c100000)==0x04100000) { // Load
124 copy=0;
125 }
126 if(target_insn&0x08000000) {
127 copy=0;
128 }
129 if(copy) {
130 ptr2[-1]=target_insn;
131 target+=4;
132 }
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137/* Literal pool */
138add_literal(int addr,int val)
139{
140 literals[literalcount][0]=addr;
141 literals[literalcount][1]=val;
142 literalcount++;
143}
144
f76eeef9 145void *kill_pointer(void *stub)
57871462 146{
147 int *ptr=(int *)(stub+4);
148 assert((*ptr&0x0ff00000)==0x05900000);
149 u_int offset=*ptr&0xfff;
150 int **l_ptr=(void *)ptr+offset+8;
151 int *i_ptr=*l_ptr;
152 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 153 return i_ptr;
57871462 154}
155
156int get_pointer(void *stub)
157{
158 //printf("get_pointer(%x)\n",(int)stub);
159 int *ptr=(int *)(stub+4);
160 assert((*ptr&0x0ff00000)==0x05900000);
161 u_int offset=*ptr&0xfff;
162 int **l_ptr=(void *)ptr+offset+8;
163 int *i_ptr=*l_ptr;
164 assert((*i_ptr&0x0f000000)==0x0a000000);
165 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
166}
167
168// Find the "clean" entry point from a "dirty" entry point
169// by skipping past the call to verify_code
170u_int get_clean_addr(int addr)
171{
172 int *ptr=(int *)addr;
173 #ifdef ARMv5_ONLY
174 ptr+=4;
175 #else
176 ptr+=6;
177 #endif
178 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
179 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
180 ptr++;
181 if((*ptr&0xFF000000)==0xea000000) {
182 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
183 }
184 return (u_int)ptr;
185}
186
187int verify_dirty(int addr)
188{
189 u_int *ptr=(u_int *)addr;
190 #ifdef ARMv5_ONLY
191 // get from literal pool
192 assert((*ptr&0xFFF00000)==0xe5900000);
193 u_int offset=*ptr&0xfff;
194 u_int *l_ptr=(void *)ptr+offset+8;
195 u_int source=l_ptr[0];
196 u_int copy=l_ptr[1];
197 u_int len=l_ptr[2];
198 ptr+=4;
199 #else
200 // ARMv7 movw/movt
201 assert((*ptr&0xFFF00000)==0xe3000000);
202 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
203 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
204 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
205 ptr+=6;
206 #endif
207 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
208 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 209 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 210 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
211 unsigned int page=source>>12;
212 unsigned int map_value=memory_map[page];
213 if(map_value>=0x80000000) return 0;
214 while(page<((source+len-1)>>12)) {
215 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
216 }
217 source = source+(map_value<<2);
218 }
219 //printf("verify_dirty: %x %x %x\n",source,copy,len);
220 return !memcmp((void *)source,(void *)copy,len);
221}
222
223// This doesn't necessarily find all clean entry points, just
224// guarantees that it's not dirty
225int isclean(int addr)
226{
227 #ifdef ARMv5_ONLY
228 int *ptr=((u_int *)addr)+4;
229 #else
230 int *ptr=((u_int *)addr)+6;
231 #endif
232 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
233 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
234 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
235 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
236 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
237 return 1;
238}
239
240void get_bounds(int addr,u_int *start,u_int *end)
241{
242 u_int *ptr=(u_int *)addr;
243 #ifdef ARMv5_ONLY
244 // get from literal pool
245 assert((*ptr&0xFFF00000)==0xe5900000);
246 u_int offset=*ptr&0xfff;
247 u_int *l_ptr=(void *)ptr+offset+8;
248 u_int source=l_ptr[0];
249 //u_int copy=l_ptr[1];
250 u_int len=l_ptr[2];
251 ptr+=4;
252 #else
253 // ARMv7 movw/movt
254 assert((*ptr&0xFFF00000)==0xe3000000);
255 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
256 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
257 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
258 ptr+=6;
259 #endif
260 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
261 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 262 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 263 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
264 if(memory_map[source>>12]>=0x80000000) source = 0;
265 else source = source+(memory_map[source>>12]<<2);
266 }
267 *start=source;
268 *end=source+len;
269}
270
271/* Register allocation */
272
273// Note: registers are allocated clean (unmodified state)
274// if you intend to modify the register, you must call dirty_reg().
275void alloc_reg(struct regstat *cur,int i,signed char reg)
276{
277 int r,hr;
278 int preferred_reg = (reg&7);
279 if(reg==CCREG) preferred_reg=HOST_CCREG;
280 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
281
282 // Don't allocate unused registers
283 if((cur->u>>reg)&1) return;
284
285 // see if it's already allocated
286 for(hr=0;hr<HOST_REGS;hr++)
287 {
288 if(cur->regmap[hr]==reg) return;
289 }
290
291 // Keep the same mapping if the register was already allocated in a loop
292 preferred_reg = loop_reg(i,reg,preferred_reg);
293
294 // Try to allocate the preferred register
295 if(cur->regmap[preferred_reg]==-1) {
296 cur->regmap[preferred_reg]=reg;
297 cur->dirty&=~(1<<preferred_reg);
298 cur->isconst&=~(1<<preferred_reg);
299 return;
300 }
301 r=cur->regmap[preferred_reg];
302 if(r<64&&((cur->u>>r)&1)) {
303 cur->regmap[preferred_reg]=reg;
304 cur->dirty&=~(1<<preferred_reg);
305 cur->isconst&=~(1<<preferred_reg);
306 return;
307 }
308 if(r>=64&&((cur->uu>>(r&63))&1)) {
309 cur->regmap[preferred_reg]=reg;
310 cur->dirty&=~(1<<preferred_reg);
311 cur->isconst&=~(1<<preferred_reg);
312 return;
313 }
314
315 // Clear any unneeded registers
316 // We try to keep the mapping consistent, if possible, because it
317 // makes branches easier (especially loops). So we try to allocate
318 // first (see above) before removing old mappings. If this is not
319 // possible then go ahead and clear out the registers that are no
320 // longer needed.
321 for(hr=0;hr<HOST_REGS;hr++)
322 {
323 r=cur->regmap[hr];
324 if(r>=0) {
325 if(r<64) {
326 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
327 }
328 else
329 {
330 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
331 }
332 }
333 }
334 // Try to allocate any available register, but prefer
335 // registers that have not been used recently.
336 if(i>0) {
337 for(hr=0;hr<HOST_REGS;hr++) {
338 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
339 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
340 cur->regmap[hr]=reg;
341 cur->dirty&=~(1<<hr);
342 cur->isconst&=~(1<<hr);
343 return;
344 }
345 }
346 }
347 }
348 // Try to allocate any available register
349 for(hr=0;hr<HOST_REGS;hr++) {
350 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
351 cur->regmap[hr]=reg;
352 cur->dirty&=~(1<<hr);
353 cur->isconst&=~(1<<hr);
354 return;
355 }
356 }
357
358 // Ok, now we have to evict someone
359 // Pick a register we hopefully won't need soon
360 u_char hsn[MAXREG+1];
361 memset(hsn,10,sizeof(hsn));
362 int j;
363 lsn(hsn,i,&preferred_reg);
364 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
365 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
366 if(i>0) {
367 // Don't evict the cycle count at entry points, otherwise the entry
368 // stub will have to write it.
369 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
370 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
371 for(j=10;j>=3;j--)
372 {
373 // Alloc preferred register if available
374 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
375 for(hr=0;hr<HOST_REGS;hr++) {
376 // Evict both parts of a 64-bit register
377 if((cur->regmap[hr]&63)==r) {
378 cur->regmap[hr]=-1;
379 cur->dirty&=~(1<<hr);
380 cur->isconst&=~(1<<hr);
381 }
382 }
383 cur->regmap[preferred_reg]=reg;
384 return;
385 }
386 for(r=1;r<=MAXREG;r++)
387 {
388 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
389 for(hr=0;hr<HOST_REGS;hr++) {
390 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
391 if(cur->regmap[hr]==r+64) {
392 cur->regmap[hr]=reg;
393 cur->dirty&=~(1<<hr);
394 cur->isconst&=~(1<<hr);
395 return;
396 }
397 }
398 }
399 for(hr=0;hr<HOST_REGS;hr++) {
400 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
401 if(cur->regmap[hr]==r) {
402 cur->regmap[hr]=reg;
403 cur->dirty&=~(1<<hr);
404 cur->isconst&=~(1<<hr);
405 return;
406 }
407 }
408 }
409 }
410 }
411 }
412 }
413 for(j=10;j>=0;j--)
414 {
415 for(r=1;r<=MAXREG;r++)
416 {
417 if(hsn[r]==j) {
418 for(hr=0;hr<HOST_REGS;hr++) {
419 if(cur->regmap[hr]==r+64) {
420 cur->regmap[hr]=reg;
421 cur->dirty&=~(1<<hr);
422 cur->isconst&=~(1<<hr);
423 return;
424 }
425 }
426 for(hr=0;hr<HOST_REGS;hr++) {
427 if(cur->regmap[hr]==r) {
428 cur->regmap[hr]=reg;
429 cur->dirty&=~(1<<hr);
430 cur->isconst&=~(1<<hr);
431 return;
432 }
433 }
434 }
435 }
436 }
437 printf("This shouldn't happen (alloc_reg)");exit(1);
438}
439
440void alloc_reg64(struct regstat *cur,int i,signed char reg)
441{
442 int preferred_reg = 8+(reg&1);
443 int r,hr;
444
445 // allocate the lower 32 bits
446 alloc_reg(cur,i,reg);
447
448 // Don't allocate unused registers
449 if((cur->uu>>reg)&1) return;
450
451 // see if the upper half is already allocated
452 for(hr=0;hr<HOST_REGS;hr++)
453 {
454 if(cur->regmap[hr]==reg+64) return;
455 }
456
457 // Keep the same mapping if the register was already allocated in a loop
458 preferred_reg = loop_reg(i,reg,preferred_reg);
459
460 // Try to allocate the preferred register
461 if(cur->regmap[preferred_reg]==-1) {
462 cur->regmap[preferred_reg]=reg|64;
463 cur->dirty&=~(1<<preferred_reg);
464 cur->isconst&=~(1<<preferred_reg);
465 return;
466 }
467 r=cur->regmap[preferred_reg];
468 if(r<64&&((cur->u>>r)&1)) {
469 cur->regmap[preferred_reg]=reg|64;
470 cur->dirty&=~(1<<preferred_reg);
471 cur->isconst&=~(1<<preferred_reg);
472 return;
473 }
474 if(r>=64&&((cur->uu>>(r&63))&1)) {
475 cur->regmap[preferred_reg]=reg|64;
476 cur->dirty&=~(1<<preferred_reg);
477 cur->isconst&=~(1<<preferred_reg);
478 return;
479 }
480
481 // Clear any unneeded registers
482 // We try to keep the mapping consistent, if possible, because it
483 // makes branches easier (especially loops). So we try to allocate
484 // first (see above) before removing old mappings. If this is not
485 // possible then go ahead and clear out the registers that are no
486 // longer needed.
487 for(hr=HOST_REGS-1;hr>=0;hr--)
488 {
489 r=cur->regmap[hr];
490 if(r>=0) {
491 if(r<64) {
492 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
493 }
494 else
495 {
496 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
497 }
498 }
499 }
500 // Try to allocate any available register, but prefer
501 // registers that have not been used recently.
502 if(i>0) {
503 for(hr=0;hr<HOST_REGS;hr++) {
504 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
505 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
506 cur->regmap[hr]=reg|64;
507 cur->dirty&=~(1<<hr);
508 cur->isconst&=~(1<<hr);
509 return;
510 }
511 }
512 }
513 }
514 // Try to allocate any available register
515 for(hr=0;hr<HOST_REGS;hr++) {
516 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
517 cur->regmap[hr]=reg|64;
518 cur->dirty&=~(1<<hr);
519 cur->isconst&=~(1<<hr);
520 return;
521 }
522 }
523
524 // Ok, now we have to evict someone
525 // Pick a register we hopefully won't need soon
526 u_char hsn[MAXREG+1];
527 memset(hsn,10,sizeof(hsn));
528 int j;
529 lsn(hsn,i,&preferred_reg);
530 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
531 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
532 if(i>0) {
533 // Don't evict the cycle count at entry points, otherwise the entry
534 // stub will have to write it.
535 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
536 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
537 for(j=10;j>=3;j--)
538 {
539 // Alloc preferred register if available
540 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
541 for(hr=0;hr<HOST_REGS;hr++) {
542 // Evict both parts of a 64-bit register
543 if((cur->regmap[hr]&63)==r) {
544 cur->regmap[hr]=-1;
545 cur->dirty&=~(1<<hr);
546 cur->isconst&=~(1<<hr);
547 }
548 }
549 cur->regmap[preferred_reg]=reg|64;
550 return;
551 }
552 for(r=1;r<=MAXREG;r++)
553 {
554 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
555 for(hr=0;hr<HOST_REGS;hr++) {
556 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
557 if(cur->regmap[hr]==r+64) {
558 cur->regmap[hr]=reg|64;
559 cur->dirty&=~(1<<hr);
560 cur->isconst&=~(1<<hr);
561 return;
562 }
563 }
564 }
565 for(hr=0;hr<HOST_REGS;hr++) {
566 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
567 if(cur->regmap[hr]==r) {
568 cur->regmap[hr]=reg|64;
569 cur->dirty&=~(1<<hr);
570 cur->isconst&=~(1<<hr);
571 return;
572 }
573 }
574 }
575 }
576 }
577 }
578 }
579 for(j=10;j>=0;j--)
580 {
581 for(r=1;r<=MAXREG;r++)
582 {
583 if(hsn[r]==j) {
584 for(hr=0;hr<HOST_REGS;hr++) {
585 if(cur->regmap[hr]==r+64) {
586 cur->regmap[hr]=reg|64;
587 cur->dirty&=~(1<<hr);
588 cur->isconst&=~(1<<hr);
589 return;
590 }
591 }
592 for(hr=0;hr<HOST_REGS;hr++) {
593 if(cur->regmap[hr]==r) {
594 cur->regmap[hr]=reg|64;
595 cur->dirty&=~(1<<hr);
596 cur->isconst&=~(1<<hr);
597 return;
598 }
599 }
600 }
601 }
602 }
603 printf("This shouldn't happen");exit(1);
604}
605
606// Allocate a temporary register. This is done without regard to
607// dirty status or whether the register we request is on the unneeded list
608// Note: This will only allocate one register, even if called multiple times
609void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
610{
611 int r,hr;
612 int preferred_reg = -1;
613
614 // see if it's already allocated
615 for(hr=0;hr<HOST_REGS;hr++)
616 {
617 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
618 }
619
620 // Try to allocate any available register
621 for(hr=HOST_REGS-1;hr>=0;hr--) {
622 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
623 cur->regmap[hr]=reg;
624 cur->dirty&=~(1<<hr);
625 cur->isconst&=~(1<<hr);
626 return;
627 }
628 }
629
630 // Find an unneeded register
631 for(hr=HOST_REGS-1;hr>=0;hr--)
632 {
633 r=cur->regmap[hr];
634 if(r>=0) {
635 if(r<64) {
636 if((cur->u>>r)&1) {
637 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
638 cur->regmap[hr]=reg;
639 cur->dirty&=~(1<<hr);
640 cur->isconst&=~(1<<hr);
641 return;
642 }
643 }
644 }
645 else
646 {
647 if((cur->uu>>(r&63))&1) {
648 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
649 cur->regmap[hr]=reg;
650 cur->dirty&=~(1<<hr);
651 cur->isconst&=~(1<<hr);
652 return;
653 }
654 }
655 }
656 }
657 }
658
659 // Ok, now we have to evict someone
660 // Pick a register we hopefully won't need soon
661 // TODO: we might want to follow unconditional jumps here
662 // TODO: get rid of dupe code and make this into a function
663 u_char hsn[MAXREG+1];
664 memset(hsn,10,sizeof(hsn));
665 int j;
666 lsn(hsn,i,&preferred_reg);
667 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
668 if(i>0) {
669 // Don't evict the cycle count at entry points, otherwise the entry
670 // stub will have to write it.
671 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
672 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
673 for(j=10;j>=3;j--)
674 {
675 for(r=1;r<=MAXREG;r++)
676 {
677 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
678 for(hr=0;hr<HOST_REGS;hr++) {
679 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
680 if(cur->regmap[hr]==r+64) {
681 cur->regmap[hr]=reg;
682 cur->dirty&=~(1<<hr);
683 cur->isconst&=~(1<<hr);
684 return;
685 }
686 }
687 }
688 for(hr=0;hr<HOST_REGS;hr++) {
689 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
690 if(cur->regmap[hr]==r) {
691 cur->regmap[hr]=reg;
692 cur->dirty&=~(1<<hr);
693 cur->isconst&=~(1<<hr);
694 return;
695 }
696 }
697 }
698 }
699 }
700 }
701 }
702 for(j=10;j>=0;j--)
703 {
704 for(r=1;r<=MAXREG;r++)
705 {
706 if(hsn[r]==j) {
707 for(hr=0;hr<HOST_REGS;hr++) {
708 if(cur->regmap[hr]==r+64) {
709 cur->regmap[hr]=reg;
710 cur->dirty&=~(1<<hr);
711 cur->isconst&=~(1<<hr);
712 return;
713 }
714 }
715 for(hr=0;hr<HOST_REGS;hr++) {
716 if(cur->regmap[hr]==r) {
717 cur->regmap[hr]=reg;
718 cur->dirty&=~(1<<hr);
719 cur->isconst&=~(1<<hr);
720 return;
721 }
722 }
723 }
724 }
725 }
726 printf("This shouldn't happen");exit(1);
727}
728// Allocate a specific ARM register.
729void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
730{
731 int n;
732
733 // see if it's already allocated (and dealloc it)
734 for(n=0;n<HOST_REGS;n++)
735 {
736 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
737 }
738
739 cur->regmap[hr]=reg;
740 cur->dirty&=~(1<<hr);
741 cur->isconst&=~(1<<hr);
742}
743
744// Alloc cycle count into dedicated register
745alloc_cc(struct regstat *cur,int i)
746{
747 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
748}
749
750/* Special alloc */
751
752
753/* Assembler */
754
755char regname[16][4] = {
756 "r0",
757 "r1",
758 "r2",
759 "r3",
760 "r4",
761 "r5",
762 "r6",
763 "r7",
764 "r8",
765 "r9",
766 "r10",
767 "fp",
768 "r12",
769 "sp",
770 "lr",
771 "pc"};
772
773void output_byte(u_char byte)
774{
775 *(out++)=byte;
776}
777void output_modrm(u_char mod,u_char rm,u_char ext)
778{
779 assert(mod<4);
780 assert(rm<8);
781 assert(ext<8);
782 u_char byte=(mod<<6)|(ext<<3)|rm;
783 *(out++)=byte;
784}
785void output_sib(u_char scale,u_char index,u_char base)
786{
787 assert(scale<4);
788 assert(index<8);
789 assert(base<8);
790 u_char byte=(scale<<6)|(index<<3)|base;
791 *(out++)=byte;
792}
793void output_w32(u_int word)
794{
795 *((u_int *)out)=word;
796 out+=4;
797}
798u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
799{
800 assert(rd<16);
801 assert(rn<16);
802 assert(rm<16);
803 return((rn<<16)|(rd<<12)|rm);
804}
805u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
806{
807 assert(rd<16);
808 assert(rn<16);
809 assert(imm<256);
810 assert((shift&1)==0);
811 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
812}
813u_int genimm(u_int imm,u_int *encoded)
814{
815 if(imm==0) {*encoded=0;return 1;}
816 int i=32;
817 while(i>0)
818 {
819 if(imm<256) {
820 *encoded=((i&30)<<7)|imm;
821 return 1;
822 }
823 imm=(imm>>2)|(imm<<30);i-=2;
824 }
825 return 0;
826}
cfbd3c6e 827void genimm_checked(u_int imm,u_int *encoded)
828{
829 u_int ret=genimm(imm,encoded);
830 assert(ret);
831}
57871462 832u_int genjmp(u_int addr)
833{
834 int offset=addr-(int)out-8;
e80343e2 835 if(offset<-33554432||offset>=33554432) {
836 if (addr>2) {
837 printf("genjmp: out of range: %08x\n", offset);
838 exit(1);
839 }
840 return 0;
841 }
57871462 842 return ((u_int)offset>>2)&0xffffff;
843}
844
845void emit_mov(int rs,int rt)
846{
847 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
848 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
849}
850
851void emit_movs(int rs,int rt)
852{
853 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
854 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
855}
856
857void emit_add(int rs1,int rs2,int rt)
858{
859 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
860 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
861}
862
863void emit_adds(int rs1,int rs2,int rt)
864{
865 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
866 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
867}
868
869void emit_adcs(int rs1,int rs2,int rt)
870{
871 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
872 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
873}
874
875void emit_sbc(int rs1,int rs2,int rt)
876{
877 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
878 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
879}
880
881void emit_sbcs(int rs1,int rs2,int rt)
882{
883 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
884 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
885}
886
887void emit_neg(int rs, int rt)
888{
889 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
890 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
891}
892
893void emit_negs(int rs, int rt)
894{
895 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
896 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
897}
898
899void emit_sub(int rs1,int rs2,int rt)
900{
901 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
902 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
903}
904
905void emit_subs(int rs1,int rs2,int rt)
906{
907 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
908 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
909}
910
911void emit_zeroreg(int rt)
912{
913 assem_debug("mov %s,#0\n",regname[rt]);
914 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
915}
916
917void emit_loadreg(int r, int hr)
918{
3d624f89 919#ifdef FORCE32
920 if(r&64) {
921 printf("64bit load in 32bit mode!\n");
922 exit(1);
923 }
924#endif
57871462 925 if((r&63)==0)
926 emit_zeroreg(hr);
927 else {
3d624f89 928 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 929 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
930 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
931 if(r==CCREG) addr=(int)&cycle_count;
932 if(r==CSREG) addr=(int)&Status;
933 if(r==FSREG) addr=(int)&FCR31;
934 if(r==INVCP) addr=(int)&invc_ptr;
935 u_int offset = addr-(u_int)&dynarec_local;
936 assert(offset<4096);
937 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
938 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
939 }
940}
941void emit_storereg(int r, int hr)
942{
3d624f89 943#ifdef FORCE32
944 if(r&64) {
945 printf("64bit store in 32bit mode!\n");
946 exit(1);
947 }
948#endif
949 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 950 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
951 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
952 if(r==CCREG) addr=(int)&cycle_count;
953 if(r==FSREG) addr=(int)&FCR31;
954 u_int offset = addr-(u_int)&dynarec_local;
955 assert(offset<4096);
956 assem_debug("str %s,fp+%d\n",regname[hr],offset);
957 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
958}
959
960void emit_test(int rs, int rt)
961{
962 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
963 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
964}
965
966void emit_testimm(int rs,int imm)
967{
968 u_int armval;
969 assem_debug("tst %s,$%d\n",regname[rs],imm);
cfbd3c6e 970 genimm_checked(imm,&armval);
57871462 971 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
972}
973
b9b61529 974void emit_testeqimm(int rs,int imm)
975{
976 u_int armval;
977 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 978 genimm_checked(imm,&armval);
b9b61529 979 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
980}
981
57871462 982void emit_not(int rs,int rt)
983{
984 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
985 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
986}
987
b9b61529 988void emit_mvnmi(int rs,int rt)
989{
990 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
991 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
992}
993
57871462 994void emit_and(u_int rs1,u_int rs2,u_int rt)
995{
996 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
997 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
998}
999
1000void emit_or(u_int rs1,u_int rs2,u_int rt)
1001{
1002 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1003 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1004}
1005void emit_or_and_set_flags(int rs1,int rs2,int rt)
1006{
1007 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1008 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1009}
1010
576bbd8f 1011void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1012{
1013 assert(rs<16);
1014 assert(rt<16);
1015 assert(imm<32);
1016 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1017 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1018}
1019
57871462 1020void emit_xor(u_int rs1,u_int rs2,u_int rt)
1021{
1022 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1023 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1024}
1025
1026void emit_loadlp(u_int imm,u_int rt)
1027{
1028 add_literal((int)out,imm);
1029 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
1030 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
1031}
1032void emit_movw(u_int imm,u_int rt)
1033{
1034 assert(imm<65536);
1035 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
1036 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
1037}
1038void emit_movt(u_int imm,u_int rt)
1039{
1040 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
1041 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
1042}
1043void emit_movimm(u_int imm,u_int rt)
1044{
1045 u_int armval;
1046 if(genimm(imm,&armval)) {
1047 assem_debug("mov %s,#%d\n",regname[rt],imm);
1048 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1049 }else if(genimm(~imm,&armval)) {
1050 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1051 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1052 }else if(imm<65536) {
1053 #ifdef ARMv5_ONLY
1054 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1055 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1056 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1057 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1058 #else
1059 emit_movw(imm,rt);
1060 #endif
1061 }else{
1062 #ifdef ARMv5_ONLY
1063 emit_loadlp(imm,rt);
1064 #else
1065 emit_movw(imm&0x0000FFFF,rt);
1066 emit_movt(imm&0xFFFF0000,rt);
1067 #endif
1068 }
1069}
1070void emit_pcreladdr(u_int rt)
1071{
1072 assem_debug("add %s,pc,#?\n",regname[rt]);
1073 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1074}
1075
1076void emit_addimm(u_int rs,int imm,u_int rt)
1077{
1078 assert(rs<16);
1079 assert(rt<16);
1080 if(imm!=0) {
1081 assert(imm>-65536&&imm<65536);
1082 u_int armval;
1083 if(genimm(imm,&armval)) {
1084 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1085 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1086 }else if(genimm(-imm,&armval)) {
1087 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1088 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1089 }else if(imm<0) {
1090 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1091 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1092 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1093 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1094 }else{
1095 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1096 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1097 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1098 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1099 }
1100 }
1101 else if(rs!=rt) emit_mov(rs,rt);
1102}
1103
1104void emit_addimm_and_set_flags(int imm,int rt)
1105{
1106 assert(imm>-65536&&imm<65536);
1107 u_int armval;
1108 if(genimm(imm,&armval)) {
1109 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1110 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1111 }else if(genimm(-imm,&armval)) {
1112 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1113 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1114 }else if(imm<0) {
1115 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1116 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1117 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1118 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1119 }else{
1120 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1121 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1122 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1123 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1124 }
1125}
1126void emit_addimm_no_flags(u_int imm,u_int rt)
1127{
1128 emit_addimm(rt,imm,rt);
1129}
1130
1131void emit_addnop(u_int r)
1132{
1133 assert(r<16);
1134 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1135 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1136}
1137
1138void emit_adcimm(u_int rs,int imm,u_int rt)
1139{
1140 u_int armval;
cfbd3c6e 1141 genimm_checked(imm,&armval);
57871462 1142 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1143 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1144}
1145/*void emit_sbcimm(int imm,u_int rt)
1146{
1147 u_int armval;
cfbd3c6e 1148 genimm_checked(imm,&armval);
57871462 1149 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1150 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1151}*/
1152void emit_sbbimm(int imm,u_int rt)
1153{
1154 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1155 assert(rt<8);
1156 if(imm<128&&imm>=-128) {
1157 output_byte(0x83);
1158 output_modrm(3,rt,3);
1159 output_byte(imm);
1160 }
1161 else
1162 {
1163 output_byte(0x81);
1164 output_modrm(3,rt,3);
1165 output_w32(imm);
1166 }
1167}
1168void emit_rscimm(int rs,int imm,u_int rt)
1169{
1170 assert(0);
1171 u_int armval;
cfbd3c6e 1172 genimm_checked(imm,&armval);
57871462 1173 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1174 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1175}
1176
1177void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1178{
1179 // TODO: if(genimm(imm,&armval)) ...
1180 // else
1181 emit_movimm(imm,HOST_TEMPREG);
1182 emit_adds(HOST_TEMPREG,rsl,rtl);
1183 emit_adcimm(rsh,0,rth);
1184}
1185
1186void emit_sbb(int rs1,int rs2)
1187{
1188 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1189 output_byte(0x19);
1190 output_modrm(3,rs1,rs2);
1191}
1192
1193void emit_andimm(int rs,int imm,int rt)
1194{
1195 u_int armval;
1196 if(genimm(imm,&armval)) {
1197 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1198 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1199 }else if(genimm(~imm,&armval)) {
1200 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1201 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1202 }else if(imm==65535) {
1203 #ifdef ARMv5_ONLY
1204 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1205 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1206 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1207 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1208 #else
1209 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1210 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1211 #endif
1212 }else{
1213 assert(imm>0&&imm<65535);
1214 #ifdef ARMv5_ONLY
1215 assem_debug("mov r14,#%d\n",imm&0xFF00);
1216 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1217 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1218 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1219 #else
1220 emit_movw(imm,HOST_TEMPREG);
1221 #endif
1222 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1223 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1224 }
1225}
1226
1227void emit_orimm(int rs,int imm,int rt)
1228{
1229 u_int armval;
1230 if(genimm(imm,&armval)) {
1231 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1232 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1233 }else{
1234 assert(imm>0&&imm<65536);
1235 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1236 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1237 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1238 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1239 }
1240}
1241
1242void emit_xorimm(int rs,int imm,int rt)
1243{
57871462 1244 u_int armval;
1245 if(genimm(imm,&armval)) {
1246 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1247 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1248 }else{
514ed0d9 1249 assert(imm>0&&imm<65536);
57871462 1250 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1251 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1252 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1253 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1254 }
1255}
1256
1257void emit_shlimm(int rs,u_int imm,int rt)
1258{
1259 assert(imm>0);
1260 assert(imm<32);
1261 //if(imm==1) ...
1262 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1263 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1264}
1265
1266void emit_shrimm(int rs,u_int imm,int rt)
1267{
1268 assert(imm>0);
1269 assert(imm<32);
1270 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1271 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1272}
1273
1274void emit_sarimm(int rs,u_int imm,int rt)
1275{
1276 assert(imm>0);
1277 assert(imm<32);
1278 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1279 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1280}
1281
1282void emit_rorimm(int rs,u_int imm,int rt)
1283{
1284 assert(imm>0);
1285 assert(imm<32);
1286 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1287 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1288}
1289
1290void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1291{
1292 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1293 assert(imm>0);
1294 assert(imm<32);
1295 //if(imm==1) ...
1296 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1297 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1298 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1299 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1300}
1301
1302void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1303{
1304 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1305 assert(imm>0);
1306 assert(imm<32);
1307 //if(imm==1) ...
1308 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1309 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1310 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1311 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1312}
1313
b9b61529 1314void emit_signextend16(int rs,int rt)
1315{
1316 #ifdef ARMv5_ONLY
1317 emit_shlimm(rs,16,rt);
1318 emit_sarimm(rt,16,rt);
1319 #else
1320 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1321 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1322 #endif
1323}
1324
57871462 1325void emit_shl(u_int rs,u_int shift,u_int rt)
1326{
1327 assert(rs<16);
1328 assert(rt<16);
1329 assert(shift<16);
1330 //if(imm==1) ...
1331 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1332 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1333}
1334void emit_shr(u_int rs,u_int shift,u_int rt)
1335{
1336 assert(rs<16);
1337 assert(rt<16);
1338 assert(shift<16);
1339 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1340 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1341}
1342void emit_sar(u_int rs,u_int shift,u_int rt)
1343{
1344 assert(rs<16);
1345 assert(rt<16);
1346 assert(shift<16);
1347 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1348 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1349}
1350void emit_shlcl(int r)
1351{
1352 assem_debug("shl %%%s,%%cl\n",regname[r]);
1353 assert(0);
1354}
1355void emit_shrcl(int r)
1356{
1357 assem_debug("shr %%%s,%%cl\n",regname[r]);
1358 assert(0);
1359}
1360void emit_sarcl(int r)
1361{
1362 assem_debug("sar %%%s,%%cl\n",regname[r]);
1363 assert(0);
1364}
1365
1366void emit_shldcl(int r1,int r2)
1367{
1368 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1369 assert(0);
1370}
1371void emit_shrdcl(int r1,int r2)
1372{
1373 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1374 assert(0);
1375}
1376void emit_orrshl(u_int rs,u_int shift,u_int rt)
1377{
1378 assert(rs<16);
1379 assert(rt<16);
1380 assert(shift<16);
1381 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1382 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1383}
1384void emit_orrshr(u_int rs,u_int shift,u_int rt)
1385{
1386 assert(rs<16);
1387 assert(rt<16);
1388 assert(shift<16);
1389 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1390 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1391}
1392
1393void emit_cmpimm(int rs,int imm)
1394{
1395 u_int armval;
1396 if(genimm(imm,&armval)) {
1397 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1398 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1399 }else if(genimm(-imm,&armval)) {
1400 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1401 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1402 }else if(imm>0) {
1403 assert(imm<65536);
1404 #ifdef ARMv5_ONLY
1405 emit_movimm(imm,HOST_TEMPREG);
1406 #else
1407 emit_movw(imm,HOST_TEMPREG);
1408 #endif
1409 assem_debug("cmp %s,r14\n",regname[rs]);
1410 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1411 }else{
1412 assert(imm>-65536);
1413 #ifdef ARMv5_ONLY
1414 emit_movimm(-imm,HOST_TEMPREG);
1415 #else
1416 emit_movw(-imm,HOST_TEMPREG);
1417 #endif
1418 assem_debug("cmn %s,r14\n",regname[rs]);
1419 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1420 }
1421}
1422
1423void emit_cmovne(u_int *addr,int rt)
1424{
1425 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1426 assert(0);
1427}
1428void emit_cmovl(u_int *addr,int rt)
1429{
1430 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1431 assert(0);
1432}
1433void emit_cmovs(u_int *addr,int rt)
1434{
1435 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1436 assert(0);
1437}
1438void emit_cmovne_imm(int imm,int rt)
1439{
1440 assem_debug("movne %s,#%d\n",regname[rt],imm);
1441 u_int armval;
cfbd3c6e 1442 genimm_checked(imm,&armval);
57871462 1443 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1444}
1445void emit_cmovl_imm(int imm,int rt)
1446{
1447 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1448 u_int armval;
cfbd3c6e 1449 genimm_checked(imm,&armval);
57871462 1450 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1451}
1452void emit_cmovb_imm(int imm,int rt)
1453{
1454 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1455 u_int armval;
cfbd3c6e 1456 genimm_checked(imm,&armval);
57871462 1457 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1458}
1459void emit_cmovs_imm(int imm,int rt)
1460{
1461 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1462 u_int armval;
cfbd3c6e 1463 genimm_checked(imm,&armval);
57871462 1464 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1465}
1466void emit_cmove_reg(int rs,int rt)
1467{
1468 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1469 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1470}
1471void emit_cmovne_reg(int rs,int rt)
1472{
1473 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1474 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1475}
1476void emit_cmovl_reg(int rs,int rt)
1477{
1478 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1479 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1480}
1481void emit_cmovs_reg(int rs,int rt)
1482{
1483 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1484 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1485}
1486
1487void emit_slti32(int rs,int imm,int rt)
1488{
1489 if(rs!=rt) emit_zeroreg(rt);
1490 emit_cmpimm(rs,imm);
1491 if(rs==rt) emit_movimm(0,rt);
1492 emit_cmovl_imm(1,rt);
1493}
1494void emit_sltiu32(int rs,int imm,int rt)
1495{
1496 if(rs!=rt) emit_zeroreg(rt);
1497 emit_cmpimm(rs,imm);
1498 if(rs==rt) emit_movimm(0,rt);
1499 emit_cmovb_imm(1,rt);
1500}
1501void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1502{
1503 assert(rsh!=rt);
1504 emit_slti32(rsl,imm,rt);
1505 if(imm>=0)
1506 {
1507 emit_test(rsh,rsh);
1508 emit_cmovne_imm(0,rt);
1509 emit_cmovs_imm(1,rt);
1510 }
1511 else
1512 {
1513 emit_cmpimm(rsh,-1);
1514 emit_cmovne_imm(0,rt);
1515 emit_cmovl_imm(1,rt);
1516 }
1517}
1518void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1519{
1520 assert(rsh!=rt);
1521 emit_sltiu32(rsl,imm,rt);
1522 if(imm>=0)
1523 {
1524 emit_test(rsh,rsh);
1525 emit_cmovne_imm(0,rt);
1526 }
1527 else
1528 {
1529 emit_cmpimm(rsh,-1);
1530 emit_cmovne_imm(1,rt);
1531 }
1532}
1533
1534void emit_cmp(int rs,int rt)
1535{
1536 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1537 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1538}
1539void emit_set_gz32(int rs, int rt)
1540{
1541 //assem_debug("set_gz32\n");
1542 emit_cmpimm(rs,1);
1543 emit_movimm(1,rt);
1544 emit_cmovl_imm(0,rt);
1545}
1546void emit_set_nz32(int rs, int rt)
1547{
1548 //assem_debug("set_nz32\n");
1549 if(rs!=rt) emit_movs(rs,rt);
1550 else emit_test(rs,rs);
1551 emit_cmovne_imm(1,rt);
1552}
1553void emit_set_gz64_32(int rsh, int rsl, int rt)
1554{
1555 //assem_debug("set_gz64\n");
1556 emit_set_gz32(rsl,rt);
1557 emit_test(rsh,rsh);
1558 emit_cmovne_imm(1,rt);
1559 emit_cmovs_imm(0,rt);
1560}
1561void emit_set_nz64_32(int rsh, int rsl, int rt)
1562{
1563 //assem_debug("set_nz64\n");
1564 emit_or_and_set_flags(rsh,rsl,rt);
1565 emit_cmovne_imm(1,rt);
1566}
1567void emit_set_if_less32(int rs1, int rs2, int rt)
1568{
1569 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1570 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1571 emit_cmp(rs1,rs2);
1572 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1573 emit_cmovl_imm(1,rt);
1574}
1575void emit_set_if_carry32(int rs1, int rs2, int rt)
1576{
1577 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1578 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1579 emit_cmp(rs1,rs2);
1580 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1581 emit_cmovb_imm(1,rt);
1582}
1583void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1584{
1585 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1586 assert(u1!=rt);
1587 assert(u2!=rt);
1588 emit_cmp(l1,l2);
1589 emit_movimm(0,rt);
1590 emit_sbcs(u1,u2,HOST_TEMPREG);
1591 emit_cmovl_imm(1,rt);
1592}
1593void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1594{
1595 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1596 assert(u1!=rt);
1597 assert(u2!=rt);
1598 emit_cmp(l1,l2);
1599 emit_movimm(0,rt);
1600 emit_sbcs(u1,u2,HOST_TEMPREG);
1601 emit_cmovb_imm(1,rt);
1602}
1603
1604void emit_call(int a)
1605{
1606 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1607 u_int offset=genjmp(a);
1608 output_w32(0xeb000000|offset);
1609}
1610void emit_jmp(int a)
1611{
1612 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1613 u_int offset=genjmp(a);
1614 output_w32(0xea000000|offset);
1615}
1616void emit_jne(int a)
1617{
1618 assem_debug("bne %x\n",a);
1619 u_int offset=genjmp(a);
1620 output_w32(0x1a000000|offset);
1621}
1622void emit_jeq(int a)
1623{
1624 assem_debug("beq %x\n",a);
1625 u_int offset=genjmp(a);
1626 output_w32(0x0a000000|offset);
1627}
1628void emit_js(int a)
1629{
1630 assem_debug("bmi %x\n",a);
1631 u_int offset=genjmp(a);
1632 output_w32(0x4a000000|offset);
1633}
1634void emit_jns(int a)
1635{
1636 assem_debug("bpl %x\n",a);
1637 u_int offset=genjmp(a);
1638 output_w32(0x5a000000|offset);
1639}
1640void emit_jl(int a)
1641{
1642 assem_debug("blt %x\n",a);
1643 u_int offset=genjmp(a);
1644 output_w32(0xba000000|offset);
1645}
1646void emit_jge(int a)
1647{
1648 assem_debug("bge %x\n",a);
1649 u_int offset=genjmp(a);
1650 output_w32(0xaa000000|offset);
1651}
1652void emit_jno(int a)
1653{
1654 assem_debug("bvc %x\n",a);
1655 u_int offset=genjmp(a);
1656 output_w32(0x7a000000|offset);
1657}
1658void emit_jc(int a)
1659{
1660 assem_debug("bcs %x\n",a);
1661 u_int offset=genjmp(a);
1662 output_w32(0x2a000000|offset);
1663}
1664void emit_jcc(int a)
1665{
1666 assem_debug("bcc %x\n",a);
1667 u_int offset=genjmp(a);
1668 output_w32(0x3a000000|offset);
1669}
1670
1671void emit_pushimm(int imm)
1672{
1673 assem_debug("push $%x\n",imm);
1674 assert(0);
1675}
1676void emit_pusha()
1677{
1678 assem_debug("pusha\n");
1679 assert(0);
1680}
1681void emit_popa()
1682{
1683 assem_debug("popa\n");
1684 assert(0);
1685}
1686void emit_pushreg(u_int r)
1687{
1688 assem_debug("push %%%s\n",regname[r]);
1689 assert(0);
1690}
1691void emit_popreg(u_int r)
1692{
1693 assem_debug("pop %%%s\n",regname[r]);
1694 assert(0);
1695}
1696void emit_callreg(u_int r)
1697{
1698 assem_debug("call *%%%s\n",regname[r]);
1699 assert(0);
1700}
1701void emit_jmpreg(u_int r)
1702{
1703 assem_debug("mov pc,%s\n",regname[r]);
1704 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1705}
1706
1707void emit_readword_indexed(int offset, int rs, int rt)
1708{
1709 assert(offset>-4096&&offset<4096);
1710 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1711 if(offset>=0) {
1712 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1713 }else{
1714 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1715 }
1716}
1717void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1718{
1719 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1720 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1721}
1722void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1723{
1724 if(map<0) emit_readword_indexed(addr, rs, rt);
1725 else {
1726 assert(addr==0);
1727 emit_readword_dualindexedx4(rs, map, rt);
1728 }
1729}
1730void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1731{
1732 if(map<0) {
1733 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1734 emit_readword_indexed(addr+4, rs, rl);
1735 }else{
1736 assert(rh!=rs);
1737 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1738 emit_addimm(map,1,map);
1739 emit_readword_indexed_tlb(addr, rs, map, rl);
1740 }
1741}
1742void emit_movsbl_indexed(int offset, int rs, int rt)
1743{
1744 assert(offset>-256&&offset<256);
1745 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1746 if(offset>=0) {
1747 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1748 }else{
1749 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1750 }
1751}
1752void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1753{
1754 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1755 else {
1756 if(addr==0) {
1757 emit_shlimm(map,2,map);
1758 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1759 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1760 }else{
1761 assert(addr>-256&&addr<256);
1762 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1763 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1764 emit_movsbl_indexed(addr, rt, rt);
1765 }
1766 }
1767}
1768void emit_movswl_indexed(int offset, int rs, int rt)
1769{
1770 assert(offset>-256&&offset<256);
1771 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1772 if(offset>=0) {
1773 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1774 }else{
1775 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1776 }
1777}
1778void emit_movzbl_indexed(int offset, int rs, int rt)
1779{
1780 assert(offset>-4096&&offset<4096);
1781 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1782 if(offset>=0) {
1783 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1784 }else{
1785 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1786 }
1787}
1788void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1789{
1790 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1791 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1792}
1793void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1794{
1795 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1796 else {
1797 if(addr==0) {
1798 emit_movzbl_dualindexedx4(rs, map, rt);
1799 }else{
1800 emit_addimm(rs,addr,rt);
1801 emit_movzbl_dualindexedx4(rt, map, rt);
1802 }
1803 }
1804}
1805void emit_movzwl_indexed(int offset, int rs, int rt)
1806{
1807 assert(offset>-256&&offset<256);
1808 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1809 if(offset>=0) {
1810 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1811 }else{
1812 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1813 }
1814}
1815void emit_readword(int addr, int rt)
1816{
1817 u_int offset = addr-(u_int)&dynarec_local;
1818 assert(offset<4096);
1819 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1820 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1821}
1822void emit_movsbl(int addr, int rt)
1823{
1824 u_int offset = addr-(u_int)&dynarec_local;
1825 assert(offset<256);
1826 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1827 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1828}
1829void emit_movswl(int addr, int rt)
1830{
1831 u_int offset = addr-(u_int)&dynarec_local;
1832 assert(offset<256);
1833 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1834 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1835}
1836void emit_movzbl(int addr, int rt)
1837{
1838 u_int offset = addr-(u_int)&dynarec_local;
1839 assert(offset<4096);
1840 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1841 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1842}
1843void emit_movzwl(int addr, int rt)
1844{
1845 u_int offset = addr-(u_int)&dynarec_local;
1846 assert(offset<256);
1847 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1848 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1849}
1850void emit_movzwl_reg(int rs, int rt)
1851{
1852 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1853 assert(0);
1854}
1855
1856void emit_xchg(int rs, int rt)
1857{
1858 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1859 assert(0);
1860}
1861void emit_writeword_indexed(int rt, int offset, int rs)
1862{
1863 assert(offset>-4096&&offset<4096);
1864 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1865 if(offset>=0) {
1866 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1867 }else{
1868 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1869 }
1870}
1871void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1872{
1873 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1874 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1875}
1876void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1877{
1878 if(map<0) emit_writeword_indexed(rt, addr, rs);
1879 else {
1880 assert(addr==0);
1881 emit_writeword_dualindexedx4(rt, rs, map);
1882 }
1883}
1884void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1885{
1886 if(map<0) {
1887 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1888 emit_writeword_indexed(rl, addr+4, rs);
1889 }else{
1890 assert(rh>=0);
1891 if(temp!=rs) emit_addimm(map,1,temp);
1892 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1893 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1894 else {
1895 emit_addimm(rs,4,rs);
1896 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1897 }
1898 }
1899}
1900void emit_writehword_indexed(int rt, int offset, int rs)
1901{
1902 assert(offset>-256&&offset<256);
1903 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1904 if(offset>=0) {
1905 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1906 }else{
1907 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1908 }
1909}
1910void emit_writebyte_indexed(int rt, int offset, int rs)
1911{
1912 assert(offset>-4096&&offset<4096);
1913 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1914 if(offset>=0) {
1915 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1916 }else{
1917 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1918 }
1919}
1920void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1921{
1922 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1923 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1924}
1925void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1926{
1927 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1928 else {
1929 if(addr==0) {
1930 emit_writebyte_dualindexedx4(rt, rs, map);
1931 }else{
1932 emit_addimm(rs,addr,temp);
1933 emit_writebyte_dualindexedx4(rt, temp, map);
1934 }
1935 }
1936}
1937void emit_writeword(int rt, int addr)
1938{
1939 u_int offset = addr-(u_int)&dynarec_local;
1940 assert(offset<4096);
1941 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1942 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1943}
1944void emit_writehword(int rt, int addr)
1945{
1946 u_int offset = addr-(u_int)&dynarec_local;
1947 assert(offset<256);
1948 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1949 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1950}
1951void emit_writebyte(int rt, int addr)
1952{
1953 u_int offset = addr-(u_int)&dynarec_local;
1954 assert(offset<4096);
74426039 1955 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 1956 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1957}
1958void emit_writeword_imm(int imm, int addr)
1959{
1960 assem_debug("movl $%x,%x\n",imm,addr);
1961 assert(0);
1962}
1963void emit_writebyte_imm(int imm, int addr)
1964{
1965 assem_debug("movb $%x,%x\n",imm,addr);
1966 assert(0);
1967}
1968
1969void emit_mul(int rs)
1970{
1971 assem_debug("mul %%%s\n",regname[rs]);
1972 assert(0);
1973}
1974void emit_imul(int rs)
1975{
1976 assem_debug("imul %%%s\n",regname[rs]);
1977 assert(0);
1978}
1979void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1980{
1981 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1982 assert(rs1<16);
1983 assert(rs2<16);
1984 assert(hi<16);
1985 assert(lo<16);
1986 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1987}
1988void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1989{
1990 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1991 assert(rs1<16);
1992 assert(rs2<16);
1993 assert(hi<16);
1994 assert(lo<16);
1995 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1996}
1997
1998void emit_div(int rs)
1999{
2000 assem_debug("div %%%s\n",regname[rs]);
2001 assert(0);
2002}
2003void emit_idiv(int rs)
2004{
2005 assem_debug("idiv %%%s\n",regname[rs]);
2006 assert(0);
2007}
2008void emit_cdq()
2009{
2010 assem_debug("cdq\n");
2011 assert(0);
2012}
2013
2014void emit_clz(int rs,int rt)
2015{
2016 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2017 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2018}
2019
2020void emit_subcs(int rs1,int rs2,int rt)
2021{
2022 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2023 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2024}
2025
2026void emit_shrcc_imm(int rs,u_int imm,int rt)
2027{
2028 assert(imm>0);
2029 assert(imm<32);
2030 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2031 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2032}
2033
2034void emit_negmi(int rs, int rt)
2035{
2036 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2037 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2038}
2039
2040void emit_negsmi(int rs, int rt)
2041{
2042 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2043 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2044}
2045
2046void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2047{
2048 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2049 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2050}
2051
2052void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2053{
2054 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2055 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2056}
2057
2058void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2059{
2060 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2061 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2062}
2063
2064void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2065{
2066 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2067 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2068}
2069
2070void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2071{
2072 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2073 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2074}
2075
2076void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2077{
2078 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2079 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2080}
2081
2082void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2083{
2084 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2085 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2086}
2087
2088void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2089{
2090 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2091 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2092}
2093
2094void emit_teq(int rs, int rt)
2095{
2096 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2097 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2098}
2099
2100void emit_rsbimm(int rs, int imm, int rt)
2101{
2102 u_int armval;
cfbd3c6e 2103 genimm_checked(imm,&armval);
57871462 2104 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2105 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2106}
2107
2108// Load 2 immediates optimizing for small code size
2109void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2110{
2111 emit_movimm(imm1,rt1);
2112 u_int armval;
2113 if(genimm(imm2-imm1,&armval)) {
2114 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2115 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2116 }else if(genimm(imm1-imm2,&armval)) {
2117 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2118 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2119 }
2120 else emit_movimm(imm2,rt2);
2121}
2122
2123// Conditionally select one of two immediates, optimizing for small code size
2124// This will only be called if HAVE_CMOV_IMM is defined
2125void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2126{
2127 u_int armval;
2128 if(genimm(imm2-imm1,&armval)) {
2129 emit_movimm(imm1,rt);
2130 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2131 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2132 }else if(genimm(imm1-imm2,&armval)) {
2133 emit_movimm(imm1,rt);
2134 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2135 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2136 }
2137 else {
2138 #ifdef ARMv5_ONLY
2139 emit_movimm(imm1,rt);
2140 add_literal((int)out,imm2);
2141 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2142 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2143 #else
2144 emit_movw(imm1&0x0000FFFF,rt);
2145 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2146 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2147 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2148 }
2149 emit_movt(imm1&0xFFFF0000,rt);
2150 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2151 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2152 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2153 }
2154 #endif
2155 }
2156}
2157
2158// special case for checking invalid_code
2159void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2160{
2161 assert(0);
2162}
2163
2164// special case for checking invalid_code
2165void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2166{
2167 assert(imm<128&&imm>=0);
2168 assert(r>=0&&r<16);
2169 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2170 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2171 emit_cmpimm(HOST_TEMPREG,imm);
2172}
2173
2174// special case for tlb mapping
2175void emit_addsr12(int rs1,int rs2,int rt)
2176{
2177 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2178 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2179}
2180
2181// Used to preload hash table entries
2182void emit_prefetch(void *addr)
2183{
2184 assem_debug("prefetch %x\n",(int)addr);
2185 output_byte(0x0F);
2186 output_byte(0x18);
2187 output_modrm(0,5,1);
2188 output_w32((int)addr);
2189}
2190void emit_prefetchreg(int r)
2191{
2192 assem_debug("pld %s\n",regname[r]);
2193 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2194}
2195
2196// Special case for mini_ht
2197void emit_ldreq_indexed(int rs, u_int offset, int rt)
2198{
2199 assert(offset<4096);
2200 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2201 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2202}
2203
2204void emit_flds(int r,int sr)
2205{
2206 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2207 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2208}
2209
2210void emit_vldr(int r,int vr)
2211{
2212 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2213 output_w32(0xed900b00|(vr<<12)|(r<<16));
2214}
2215
2216void emit_fsts(int sr,int r)
2217{
2218 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2219 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2220}
2221
2222void emit_vstr(int vr,int r)
2223{
2224 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2225 output_w32(0xed800b00|(vr<<12)|(r<<16));
2226}
2227
2228void emit_ftosizs(int s,int d)
2229{
2230 assem_debug("ftosizs s%d,s%d\n",d,s);
2231 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2232}
2233
2234void emit_ftosizd(int s,int d)
2235{
2236 assem_debug("ftosizd s%d,d%d\n",d,s);
2237 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2238}
2239
2240void emit_fsitos(int s,int d)
2241{
2242 assem_debug("fsitos s%d,s%d\n",d,s);
2243 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2244}
2245
2246void emit_fsitod(int s,int d)
2247{
2248 assem_debug("fsitod d%d,s%d\n",d,s);
2249 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2250}
2251
2252void emit_fcvtds(int s,int d)
2253{
2254 assem_debug("fcvtds d%d,s%d\n",d,s);
2255 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2256}
2257
2258void emit_fcvtsd(int s,int d)
2259{
2260 assem_debug("fcvtsd s%d,d%d\n",d,s);
2261 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2262}
2263
2264void emit_fsqrts(int s,int d)
2265{
2266 assem_debug("fsqrts d%d,s%d\n",d,s);
2267 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2268}
2269
2270void emit_fsqrtd(int s,int d)
2271{
2272 assem_debug("fsqrtd s%d,d%d\n",d,s);
2273 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2274}
2275
2276void emit_fabss(int s,int d)
2277{
2278 assem_debug("fabss d%d,s%d\n",d,s);
2279 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2280}
2281
2282void emit_fabsd(int s,int d)
2283{
2284 assem_debug("fabsd s%d,d%d\n",d,s);
2285 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2286}
2287
2288void emit_fnegs(int s,int d)
2289{
2290 assem_debug("fnegs d%d,s%d\n",d,s);
2291 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2292}
2293
2294void emit_fnegd(int s,int d)
2295{
2296 assem_debug("fnegd s%d,d%d\n",d,s);
2297 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2298}
2299
2300void emit_fadds(int s1,int s2,int d)
2301{
2302 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2303 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2304}
2305
2306void emit_faddd(int s1,int s2,int d)
2307{
2308 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2309 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2310}
2311
2312void emit_fsubs(int s1,int s2,int d)
2313{
2314 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2315 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2316}
2317
2318void emit_fsubd(int s1,int s2,int d)
2319{
2320 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2321 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2322}
2323
2324void emit_fmuls(int s1,int s2,int d)
2325{
2326 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2327 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2328}
2329
2330void emit_fmuld(int s1,int s2,int d)
2331{
2332 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2333 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2334}
2335
2336void emit_fdivs(int s1,int s2,int d)
2337{
2338 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2339 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2340}
2341
2342void emit_fdivd(int s1,int s2,int d)
2343{
2344 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2345 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2346}
2347
2348void emit_fcmps(int x,int y)
2349{
2350 assem_debug("fcmps s14, s15\n");
2351 output_w32(0xeeb47a67);
2352}
2353
2354void emit_fcmpd(int x,int y)
2355{
2356 assem_debug("fcmpd d6, d7\n");
2357 output_w32(0xeeb46b47);
2358}
2359
2360void emit_fmstat()
2361{
2362 assem_debug("fmstat\n");
2363 output_w32(0xeef1fa10);
2364}
2365
2366void emit_bicne_imm(int rs,int imm,int rt)
2367{
2368 u_int armval;
cfbd3c6e 2369 genimm_checked(imm,&armval);
57871462 2370 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2371 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2372}
2373
2374void emit_biccs_imm(int rs,int imm,int rt)
2375{
2376 u_int armval;
cfbd3c6e 2377 genimm_checked(imm,&armval);
57871462 2378 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2379 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2380}
2381
2382void emit_bicvc_imm(int rs,int imm,int rt)
2383{
2384 u_int armval;
cfbd3c6e 2385 genimm_checked(imm,&armval);
57871462 2386 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2387 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2388}
2389
2390void emit_bichi_imm(int rs,int imm,int rt)
2391{
2392 u_int armval;
cfbd3c6e 2393 genimm_checked(imm,&armval);
57871462 2394 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2395 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2396}
2397
2398void emit_orrvs_imm(int rs,int imm,int rt)
2399{
2400 u_int armval;
cfbd3c6e 2401 genimm_checked(imm,&armval);
57871462 2402 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2403 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2404}
2405
b9b61529 2406void emit_orrne_imm(int rs,int imm,int rt)
2407{
2408 u_int armval;
cfbd3c6e 2409 genimm_checked(imm,&armval);
b9b61529 2410 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2411 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2412}
2413
2414void emit_andne_imm(int rs,int imm,int rt)
2415{
2416 u_int armval;
cfbd3c6e 2417 genimm_checked(imm,&armval);
b9b61529 2418 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2419 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2420}
2421
57871462 2422void emit_jno_unlikely(int a)
2423{
2424 //emit_jno(a);
2425 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2426 output_w32(0x72800000|rd_rn_rm(15,15,0));
2427}
2428
2429// Save registers before function call
2430void save_regs(u_int reglist)
2431{
2432 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2433 if(!reglist) return;
2434 assem_debug("stmia fp,{");
2435 if(reglist&1) assem_debug("r0, ");
2436 if(reglist&2) assem_debug("r1, ");
2437 if(reglist&4) assem_debug("r2, ");
2438 if(reglist&8) assem_debug("r3, ");
2439 if(reglist&0x1000) assem_debug("r12");
2440 assem_debug("}\n");
2441 output_w32(0xe88b0000|reglist);
2442}
2443// Restore registers after function call
2444void restore_regs(u_int reglist)
2445{
2446 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2447 if(!reglist) return;
2448 assem_debug("ldmia fp,{");
2449 if(reglist&1) assem_debug("r0, ");
2450 if(reglist&2) assem_debug("r1, ");
2451 if(reglist&4) assem_debug("r2, ");
2452 if(reglist&8) assem_debug("r3, ");
2453 if(reglist&0x1000) assem_debug("r12");
2454 assem_debug("}\n");
2455 output_w32(0xe89b0000|reglist);
2456}
2457
2458// Write back consts using r14 so we don't disturb the other registers
2459void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2460{
2461 int hr;
2462 for(hr=0;hr<HOST_REGS;hr++) {
2463 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2464 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2465 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2466 int value=constmap[i][hr];
2467 if(value==0) {
2468 emit_zeroreg(HOST_TEMPREG);
2469 }
2470 else {
2471 emit_movimm(value,HOST_TEMPREG);
2472 }
2473 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2474#ifndef FORCE32
57871462 2475 if((i_is32>>i_regmap[hr])&1) {
2476 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2477 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2478 }
24385cae 2479#endif
57871462 2480 }
2481 }
2482 }
2483 }
2484}
2485
2486/* Stubs/epilogue */
2487
2488void literal_pool(int n)
2489{
2490 if(!literalcount) return;
2491 if(n) {
2492 if((int)out-literals[0][0]<4096-n) return;
2493 }
2494 u_int *ptr;
2495 int i;
2496 for(i=0;i<literalcount;i++)
2497 {
2498 ptr=(u_int *)literals[i][0];
2499 u_int offset=(u_int)out-(u_int)ptr-8;
2500 assert(offset<4096);
2501 assert(!(offset&3));
2502 *ptr|=offset;
2503 output_w32(literals[i][1]);
2504 }
2505 literalcount=0;
2506}
2507
2508void literal_pool_jumpover(int n)
2509{
2510 if(!literalcount) return;
2511 if(n) {
2512 if((int)out-literals[0][0]<4096-n) return;
2513 }
2514 int jaddr=(int)out;
2515 emit_jmp(0);
2516 literal_pool(0);
2517 set_jump_target(jaddr,(int)out);
2518}
2519
2520emit_extjump2(int addr, int target, int linker)
2521{
2522 u_char *ptr=(u_char *)addr;
2523 assert((ptr[3]&0x0e)==0xa);
2524 emit_loadlp(target,0);
2525 emit_loadlp(addr,1);
24385cae 2526 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2527 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2528//DEBUG >
2529#ifdef DEBUG_CYCLE_COUNT
2530 emit_readword((int)&last_count,ECX);
2531 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2532 emit_readword((int)&next_interupt,ECX);
2533 emit_writeword(HOST_CCREG,(int)&Count);
2534 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2535 emit_writeword(ECX,(int)&last_count);
2536#endif
2537//DEBUG <
2538 emit_jmp(linker);
2539}
2540
2541emit_extjump(int addr, int target)
2542{
2543 emit_extjump2(addr, target, (int)dyna_linker);
2544}
2545emit_extjump_ds(int addr, int target)
2546{
2547 emit_extjump2(addr, target, (int)dyna_linker_ds);
2548}
2549
2550do_readstub(int n)
2551{
2552 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2553 literal_pool(256);
2554 set_jump_target(stubs[n][1],(int)out);
2555 int type=stubs[n][0];
2556 int i=stubs[n][3];
2557 int rs=stubs[n][4];
2558 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2559 u_int reglist=stubs[n][7];
2560 signed char *i_regmap=i_regs->regmap;
2561 int addr=get_reg(i_regmap,AGEN1+(i&1));
2562 int rth,rt;
2563 int ds;
b9b61529 2564 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2565 rth=get_reg(i_regmap,FTEMP|64);
2566 rt=get_reg(i_regmap,FTEMP);
2567 }else{
2568 rth=get_reg(i_regmap,rt1[i]|64);
2569 rt=get_reg(i_regmap,rt1[i]);
2570 }
2571 assert(rs>=0);
57871462 2572 if(addr<0) addr=rt;
f18c0f46 2573 if(addr<0)
2574 // assume dummy read, no alloced reg
2575 addr=get_reg(i_regmap,-1);
57871462 2576 assert(addr>=0);
2577 int ftable=0;
2578 if(type==LOADB_STUB||type==LOADBU_STUB)
2579 ftable=(int)readmemb;
2580 if(type==LOADH_STUB||type==LOADHU_STUB)
2581 ftable=(int)readmemh;
2582 if(type==LOADW_STUB)
2583 ftable=(int)readmem;
24385cae 2584#ifndef FORCE32
57871462 2585 if(type==LOADD_STUB)
2586 ftable=(int)readmemd;
24385cae 2587#endif
2588 assert(ftable!=0);
57871462 2589 emit_writeword(rs,(int)&address);
2590 //emit_pusha();
2591 save_regs(reglist);
2592 ds=i_regs!=&regs[i];
2593 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2594 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2595 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2596 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2597 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2598 emit_shrimm(rs,16,1);
2599 int cc=get_reg(i_regmap,CCREG);
2600 if(cc<0) {
2601 emit_loadreg(CCREG,2);
2602 }
2603 emit_movimm(ftable,0);
2604 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2605 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2606 //emit_readword((int)&last_count,temp);
2607 //emit_add(cc,temp,cc);
2608 //emit_writeword(cc,(int)&Count);
2609 //emit_mov(15,14);
2610 emit_call((int)&indirect_jump_indexed);
2611 //emit_callreg(rs);
2612 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2613 // We really shouldn't need to update the count here,
2614 // but not doing so causes random crashes...
2615 emit_readword((int)&Count,HOST_TEMPREG);
2616 emit_readword((int)&next_interupt,2);
2617 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2618 emit_writeword(2,(int)&last_count);
2619 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2620 if(cc<0) {
2621 emit_storereg(CCREG,HOST_TEMPREG);
2622 }
2623 //emit_popa();
2624 restore_regs(reglist);
2625 //if((cc=get_reg(regmap,CCREG))>=0) {
2626 // emit_loadreg(CCREG,cc);
2627 //}
f18c0f46 2628 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2629 assert(rt>=0);
2630 if(type==LOADB_STUB)
2631 emit_movsbl((int)&readmem_dword,rt);
2632 if(type==LOADBU_STUB)
2633 emit_movzbl((int)&readmem_dword,rt);
2634 if(type==LOADH_STUB)
2635 emit_movswl((int)&readmem_dword,rt);
2636 if(type==LOADHU_STUB)
2637 emit_movzwl((int)&readmem_dword,rt);
2638 if(type==LOADW_STUB)
2639 emit_readword((int)&readmem_dword,rt);
2640 if(type==LOADD_STUB) {
2641 emit_readword((int)&readmem_dword,rt);
2642 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2643 }
57871462 2644 }
2645 emit_jmp(stubs[n][2]); // return address
2646}
2647
2648inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2649{
2650 int rs=get_reg(regmap,target);
2651 int rth=get_reg(regmap,target|64);
2652 int rt=get_reg(regmap,target);
2653 assert(rs>=0);
2654 assert(rt>=0);
2655 int ftable=0;
2656 if(type==LOADB_STUB||type==LOADBU_STUB)
2657 ftable=(int)readmemb;
2658 if(type==LOADH_STUB||type==LOADHU_STUB)
2659 ftable=(int)readmemh;
2660 if(type==LOADW_STUB)
2661 ftable=(int)readmem;
24385cae 2662#ifndef FORCE32
57871462 2663 if(type==LOADD_STUB)
2664 ftable=(int)readmemd;
24385cae 2665#endif
2666 assert(ftable!=0);
57871462 2667 emit_writeword(rs,(int)&address);
2668 //emit_pusha();
2669 save_regs(reglist);
2670 //emit_shrimm(rs,16,1);
2671 int cc=get_reg(regmap,CCREG);
2672 if(cc<0) {
2673 emit_loadreg(CCREG,2);
2674 }
2675 //emit_movimm(ftable,0);
2676 emit_movimm(((u_int *)ftable)[addr>>16],0);
2677 //emit_readword((int)&last_count,12);
2678 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2679 if((signed int)addr>=(signed int)0xC0000000) {
2680 // Pagefault address
2681 int ds=regmap!=regs[i].regmap;
2682 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2683 }
2684 //emit_add(12,2,2);
2685 //emit_writeword(2,(int)&Count);
2686 //emit_call(((u_int *)ftable)[addr>>16]);
2687 emit_call((int)&indirect_jump);
2688 // We really shouldn't need to update the count here,
2689 // but not doing so causes random crashes...
2690 emit_readword((int)&Count,HOST_TEMPREG);
2691 emit_readword((int)&next_interupt,2);
2692 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2693 emit_writeword(2,(int)&last_count);
2694 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2695 if(cc<0) {
2696 emit_storereg(CCREG,HOST_TEMPREG);
2697 }
2698 //emit_popa();
2699 restore_regs(reglist);
2700 if(type==LOADB_STUB)
2701 emit_movsbl((int)&readmem_dword,rt);
2702 if(type==LOADBU_STUB)
2703 emit_movzbl((int)&readmem_dword,rt);
2704 if(type==LOADH_STUB)
2705 emit_movswl((int)&readmem_dword,rt);
2706 if(type==LOADHU_STUB)
2707 emit_movzwl((int)&readmem_dword,rt);
2708 if(type==LOADW_STUB)
2709 emit_readword((int)&readmem_dword,rt);
2710 if(type==LOADD_STUB) {
2711 emit_readword((int)&readmem_dword,rt);
2712 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2713 }
2714}
2715
2716do_writestub(int n)
2717{
2718 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2719 literal_pool(256);
2720 set_jump_target(stubs[n][1],(int)out);
2721 int type=stubs[n][0];
2722 int i=stubs[n][3];
2723 int rs=stubs[n][4];
2724 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2725 u_int reglist=stubs[n][7];
2726 signed char *i_regmap=i_regs->regmap;
2727 int addr=get_reg(i_regmap,AGEN1+(i&1));
2728 int rth,rt,r;
2729 int ds;
b9b61529 2730 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2731 rth=get_reg(i_regmap,FTEMP|64);
2732 rt=get_reg(i_regmap,r=FTEMP);
2733 }else{
2734 rth=get_reg(i_regmap,rs2[i]|64);
2735 rt=get_reg(i_regmap,r=rs2[i]);
2736 }
2737 assert(rs>=0);
2738 assert(rt>=0);
2739 if(addr<0) addr=get_reg(i_regmap,-1);
2740 assert(addr>=0);
2741 int ftable=0;
2742 if(type==STOREB_STUB)
2743 ftable=(int)writememb;
2744 if(type==STOREH_STUB)
2745 ftable=(int)writememh;
2746 if(type==STOREW_STUB)
2747 ftable=(int)writemem;
24385cae 2748#ifndef FORCE32
57871462 2749 if(type==STORED_STUB)
2750 ftable=(int)writememd;
24385cae 2751#endif
2752 assert(ftable!=0);
57871462 2753 emit_writeword(rs,(int)&address);
2754 //emit_shrimm(rs,16,rs);
2755 //emit_movmem_indexedx4(ftable,rs,rs);
2756 if(type==STOREB_STUB)
2757 emit_writebyte(rt,(int)&byte);
2758 if(type==STOREH_STUB)
2759 emit_writehword(rt,(int)&hword);
2760 if(type==STOREW_STUB)
2761 emit_writeword(rt,(int)&word);
2762 if(type==STORED_STUB) {
3d624f89 2763#ifndef FORCE32
57871462 2764 emit_writeword(rt,(int)&dword);
2765 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2766#else
2767 printf("STORED_STUB\n");
2768#endif
57871462 2769 }
2770 //emit_pusha();
2771 save_regs(reglist);
2772 ds=i_regs!=&regs[i];
2773 int real_rs=get_reg(i_regmap,rs1[i]);
2774 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2775 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2776 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2777 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2778 emit_shrimm(rs,16,1);
2779 int cc=get_reg(i_regmap,CCREG);
2780 if(cc<0) {
2781 emit_loadreg(CCREG,2);
2782 }
2783 emit_movimm(ftable,0);
2784 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2785 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2786 //emit_readword((int)&last_count,temp);
2787 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2788 //emit_add(cc,temp,cc);
2789 //emit_writeword(cc,(int)&Count);
2790 emit_call((int)&indirect_jump_indexed);
2791 //emit_callreg(rs);
2792 emit_readword((int)&Count,HOST_TEMPREG);
2793 emit_readword((int)&next_interupt,2);
2794 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2795 emit_writeword(2,(int)&last_count);
2796 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2797 if(cc<0) {
2798 emit_storereg(CCREG,HOST_TEMPREG);
2799 }
2800 //emit_popa();
2801 restore_regs(reglist);
2802 //if((cc=get_reg(regmap,CCREG))>=0) {
2803 // emit_loadreg(CCREG,cc);
2804 //}
2805 emit_jmp(stubs[n][2]); // return address
2806}
2807
2808inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2809{
2810 int rs=get_reg(regmap,-1);
2811 int rth=get_reg(regmap,target|64);
2812 int rt=get_reg(regmap,target);
2813 assert(rs>=0);
2814 assert(rt>=0);
2815 int ftable=0;
2816 if(type==STOREB_STUB)
2817 ftable=(int)writememb;
2818 if(type==STOREH_STUB)
2819 ftable=(int)writememh;
2820 if(type==STOREW_STUB)
2821 ftable=(int)writemem;
24385cae 2822#ifndef FORCE32
57871462 2823 if(type==STORED_STUB)
2824 ftable=(int)writememd;
24385cae 2825#endif
2826 assert(ftable!=0);
57871462 2827 emit_writeword(rs,(int)&address);
2828 //emit_shrimm(rs,16,rs);
2829 //emit_movmem_indexedx4(ftable,rs,rs);
2830 if(type==STOREB_STUB)
2831 emit_writebyte(rt,(int)&byte);
2832 if(type==STOREH_STUB)
2833 emit_writehword(rt,(int)&hword);
2834 if(type==STOREW_STUB)
2835 emit_writeword(rt,(int)&word);
2836 if(type==STORED_STUB) {
3d624f89 2837#ifndef FORCE32
57871462 2838 emit_writeword(rt,(int)&dword);
2839 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2840#else
2841 printf("STORED_STUB\n");
2842#endif
57871462 2843 }
2844 //emit_pusha();
2845 save_regs(reglist);
2846 //emit_shrimm(rs,16,1);
2847 int cc=get_reg(regmap,CCREG);
2848 if(cc<0) {
2849 emit_loadreg(CCREG,2);
2850 }
2851 //emit_movimm(ftable,0);
2852 emit_movimm(((u_int *)ftable)[addr>>16],0);
2853 //emit_readword((int)&last_count,12);
2854 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2855 if((signed int)addr>=(signed int)0xC0000000) {
2856 // Pagefault address
2857 int ds=regmap!=regs[i].regmap;
2858 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2859 }
2860 //emit_add(12,2,2);
2861 //emit_writeword(2,(int)&Count);
2862 //emit_call(((u_int *)ftable)[addr>>16]);
2863 emit_call((int)&indirect_jump);
2864 emit_readword((int)&Count,HOST_TEMPREG);
2865 emit_readword((int)&next_interupt,2);
2866 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2867 emit_writeword(2,(int)&last_count);
2868 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2869 if(cc<0) {
2870 emit_storereg(CCREG,HOST_TEMPREG);
2871 }
2872 //emit_popa();
2873 restore_regs(reglist);
2874}
2875
2876do_unalignedwritestub(int n)
2877{
b7918751 2878 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2879 literal_pool(256);
57871462 2880 set_jump_target(stubs[n][1],(int)out);
b7918751 2881
2882 int i=stubs[n][3];
2883 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2884 int addr=stubs[n][5];
2885 u_int reglist=stubs[n][7];
2886 signed char *i_regmap=i_regs->regmap;
2887 int temp2=get_reg(i_regmap,FTEMP);
2888 int rt;
2889 int ds, real_rs;
2890 rt=get_reg(i_regmap,rs2[i]);
2891 assert(rt>=0);
2892 assert(addr>=0);
2893 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2894 reglist|=(1<<addr);
2895 reglist&=~(1<<temp2);
2896
2897 emit_andimm(addr,0xfffffffc,temp2);
2898 emit_writeword(temp2,(int)&address);
2899
2900 save_regs(reglist);
2901 ds=i_regs!=&regs[i];
2902 real_rs=get_reg(i_regmap,rs1[i]);
2903 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2904 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2905 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2906 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2907 emit_shrimm(addr,16,1);
2908 int cc=get_reg(i_regmap,CCREG);
2909 if(cc<0) {
2910 emit_loadreg(CCREG,2);
2911 }
2912 emit_movimm((u_int)readmem,0);
2913 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2914 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3); // XXX: can be rm'd?
2915 emit_call((int)&indirect_jump_indexed);
2916 restore_regs(reglist);
2917
2918 emit_readword((int)&readmem_dword,temp2);
2919 int temp=addr; //hmh
2920 emit_shlimm(addr,3,temp);
2921 emit_andimm(temp,24,temp);
2922#ifdef BIG_ENDIAN_MIPS
2923 if (opcode[i]==0x2e) // SWR
2924#else
2925 if (opcode[i]==0x2a) // SWL
2926#endif
2927 emit_xorimm(temp,24,temp);
2928 emit_movimm(-1,HOST_TEMPREG);
55439448 2929 if (opcode[i]==0x2a) { // SWL
b7918751 2930 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2931 emit_orrshr(rt,temp,temp2);
2932 }else{
2933 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2934 emit_orrshl(rt,temp,temp2);
2935 }
2936 emit_readword((int)&address,addr);
2937 emit_writeword(temp2,(int)&word);
2938 //save_regs(reglist); // don't need to, no state changes
2939 emit_shrimm(addr,16,1);
2940 emit_movimm((u_int)writemem,0);
2941 //emit_call((int)&indirect_jump_indexed);
2942 emit_mov(15,14);
2943 emit_readword_dualindexedx4(0,1,15);
2944 emit_readword((int)&Count,HOST_TEMPREG);
2945 emit_readword((int)&next_interupt,2);
2946 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2947 emit_writeword(2,(int)&last_count);
2948 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2949 if(cc<0) {
2950 emit_storereg(CCREG,HOST_TEMPREG);
2951 }
2952 restore_regs(reglist);
57871462 2953 emit_jmp(stubs[n][2]); // return address
2954}
2955
2956void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2957{
2958 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2959}
2960
2961do_invstub(int n)
2962{
2963 literal_pool(20);
2964 u_int reglist=stubs[n][3];
2965 set_jump_target(stubs[n][1],(int)out);
2966 save_regs(reglist);
2967 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2968 emit_call((int)&invalidate_addr);
2969 restore_regs(reglist);
2970 emit_jmp(stubs[n][2]); // return address
2971}
2972
2973int do_dirty_stub(int i)
2974{
2975 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 2976 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
2977 #ifdef PCSX
2978 addr=(u_int)source;
2979 #endif
57871462 2980 // Careful about the code output here, verify_dirty needs to parse it.
2981 #ifdef ARMv5_ONLY
ac545b3a 2982 emit_loadlp(addr,1);
57871462 2983 emit_loadlp((int)copy,2);
2984 emit_loadlp(slen*4,3);
2985 #else
ac545b3a 2986 emit_movw(addr&0x0000FFFF,1);
57871462 2987 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 2988 emit_movt(addr&0xFFFF0000,1);
57871462 2989 emit_movt(((u_int)copy)&0xFFFF0000,2);
2990 emit_movw(slen*4,3);
2991 #endif
2992 emit_movimm(start+i*4,0);
2993 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2994 int entry=(int)out;
2995 load_regs_entry(i);
2996 if(entry==(int)out) entry=instr_addr[i];
2997 emit_jmp(instr_addr[i]);
2998 return entry;
2999}
3000
3001void do_dirty_stub_ds()
3002{
3003 // Careful about the code output here, verify_dirty needs to parse it.
3004 #ifdef ARMv5_ONLY
3005 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3006 emit_loadlp((int)copy,2);
3007 emit_loadlp(slen*4,3);
3008 #else
3009 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3010 emit_movw(((u_int)copy)&0x0000FFFF,2);
3011 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3012 emit_movt(((u_int)copy)&0xFFFF0000,2);
3013 emit_movw(slen*4,3);
3014 #endif
3015 emit_movimm(start+1,0);
3016 emit_call((int)&verify_code_ds);
3017}
3018
3019do_cop1stub(int n)
3020{
3021 literal_pool(256);
3022 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3023 set_jump_target(stubs[n][1],(int)out);
3024 int i=stubs[n][3];
3d624f89 3025// int rs=stubs[n][4];
57871462 3026 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3027 int ds=stubs[n][6];
3028 if(!ds) {
3029 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3030 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3031 }
3032 //else {printf("fp exception in delay slot\n");}
3033 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3034 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3035 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3036 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3037 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3038}
3039
3040/* TLB */
3041
3042int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3043{
3044 if(c) {
3045 if((signed int)addr>=(signed int)0xC0000000) {
3046 // address_generation already loaded the const
3047 emit_readword_dualindexedx4(FP,map,map);
3048 }
3049 else
3050 return -1; // No mapping
3051 }
3052 else {
3053 assert(s!=map);
3054 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3055 emit_addsr12(map,s,map);
3056 // Schedule this while we wait on the load
3057 //if(x) emit_xorimm(s,x,ar);
3058 if(shift>=0) emit_shlimm(s,3,shift);
3059 if(~a) emit_andimm(s,a,ar);
3060 emit_readword_dualindexedx4(FP,map,map);
3061 }
3062 return map;
3063}
3064int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3065{
3066 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3067 emit_test(map,map);
3068 *jaddr=(int)out;
3069 emit_js(0);
3070 }
3071 return map;
3072}
3073
3074int gen_tlb_addr_r(int ar, int map) {
3075 if(map>=0) {
3076 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3077 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3078 }
3079}
3080
3081int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3082{
3083 if(c) {
3084 if(addr<0x80800000||addr>=0xC0000000) {
3085 // address_generation already loaded the const
3086 emit_readword_dualindexedx4(FP,map,map);
3087 }
3088 else
3089 return -1; // No mapping
3090 }
3091 else {
3092 assert(s!=map);
3093 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3094 emit_addsr12(map,s,map);
3095 // Schedule this while we wait on the load
3096 //if(x) emit_xorimm(s,x,ar);
3097 emit_readword_dualindexedx4(FP,map,map);
3098 }
3099 return map;
3100}
3101int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3102{
3103 if(!c||addr<0x80800000||addr>=0xC0000000) {
3104 emit_testimm(map,0x40000000);
3105 *jaddr=(int)out;
3106 emit_jne(0);
3107 }
3108}
3109
3110int gen_tlb_addr_w(int ar, int map) {
3111 if(map>=0) {
3112 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3113 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3114 }
3115}
3116
3117// Generate the address of the memory_map entry, relative to dynarec_local
3118generate_map_const(u_int addr,int reg) {
3119 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3120 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3121}
3122
3123/* Special assem */
3124
3125void shift_assemble_arm(int i,struct regstat *i_regs)
3126{
3127 if(rt1[i]) {
3128 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3129 {
3130 signed char s,t,shift;
3131 t=get_reg(i_regs->regmap,rt1[i]);
3132 s=get_reg(i_regs->regmap,rs1[i]);
3133 shift=get_reg(i_regs->regmap,rs2[i]);
3134 if(t>=0){
3135 if(rs1[i]==0)
3136 {
3137 emit_zeroreg(t);
3138 }
3139 else if(rs2[i]==0)
3140 {
3141 assert(s>=0);
3142 if(s!=t) emit_mov(s,t);
3143 }
3144 else
3145 {
3146 emit_andimm(shift,31,HOST_TEMPREG);
3147 if(opcode2[i]==4) // SLLV
3148 {
3149 emit_shl(s,HOST_TEMPREG,t);
3150 }
3151 if(opcode2[i]==6) // SRLV
3152 {
3153 emit_shr(s,HOST_TEMPREG,t);
3154 }
3155 if(opcode2[i]==7) // SRAV
3156 {
3157 emit_sar(s,HOST_TEMPREG,t);
3158 }
3159 }
3160 }
3161 } else { // DSLLV/DSRLV/DSRAV
3162 signed char sh,sl,th,tl,shift;
3163 th=get_reg(i_regs->regmap,rt1[i]|64);
3164 tl=get_reg(i_regs->regmap,rt1[i]);
3165 sh=get_reg(i_regs->regmap,rs1[i]|64);
3166 sl=get_reg(i_regs->regmap,rs1[i]);
3167 shift=get_reg(i_regs->regmap,rs2[i]);
3168 if(tl>=0){
3169 if(rs1[i]==0)
3170 {
3171 emit_zeroreg(tl);
3172 if(th>=0) emit_zeroreg(th);
3173 }
3174 else if(rs2[i]==0)
3175 {
3176 assert(sl>=0);
3177 if(sl!=tl) emit_mov(sl,tl);
3178 if(th>=0&&sh!=th) emit_mov(sh,th);
3179 }
3180 else
3181 {
3182 // FIXME: What if shift==tl ?
3183 assert(shift!=tl);
3184 int temp=get_reg(i_regs->regmap,-1);
3185 int real_th=th;
3186 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3187 assert(sl>=0);
3188 assert(sh>=0);
3189 emit_andimm(shift,31,HOST_TEMPREG);
3190 if(opcode2[i]==0x14) // DSLLV
3191 {
3192 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3193 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3194 emit_orrshr(sl,HOST_TEMPREG,th);
3195 emit_andimm(shift,31,HOST_TEMPREG);
3196 emit_testimm(shift,32);
3197 emit_shl(sl,HOST_TEMPREG,tl);
3198 if(th>=0) emit_cmovne_reg(tl,th);
3199 emit_cmovne_imm(0,tl);
3200 }
3201 if(opcode2[i]==0x16) // DSRLV
3202 {
3203 assert(th>=0);
3204 emit_shr(sl,HOST_TEMPREG,tl);
3205 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3206 emit_orrshl(sh,HOST_TEMPREG,tl);
3207 emit_andimm(shift,31,HOST_TEMPREG);
3208 emit_testimm(shift,32);
3209 emit_shr(sh,HOST_TEMPREG,th);
3210 emit_cmovne_reg(th,tl);
3211 if(real_th>=0) emit_cmovne_imm(0,th);
3212 }
3213 if(opcode2[i]==0x17) // DSRAV
3214 {
3215 assert(th>=0);
3216 emit_shr(sl,HOST_TEMPREG,tl);
3217 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3218 if(real_th>=0) {
3219 assert(temp>=0);
3220 emit_sarimm(th,31,temp);
3221 }
3222 emit_orrshl(sh,HOST_TEMPREG,tl);
3223 emit_andimm(shift,31,HOST_TEMPREG);
3224 emit_testimm(shift,32);
3225 emit_sar(sh,HOST_TEMPREG,th);
3226 emit_cmovne_reg(th,tl);
3227 if(real_th>=0) emit_cmovne_reg(temp,th);
3228 }
3229 }
3230 }
3231 }
3232 }
3233}
3234#define shift_assemble shift_assemble_arm
3235
3236void loadlr_assemble_arm(int i,struct regstat *i_regs)
3237{
3238 int s,th,tl,temp,temp2,addr,map=-1;
3239 int offset;
3240 int jaddr=0;
3241 int memtarget,c=0;
3242 u_int hr,reglist=0;
3243 th=get_reg(i_regs->regmap,rt1[i]|64);
3244 tl=get_reg(i_regs->regmap,rt1[i]);
3245 s=get_reg(i_regs->regmap,rs1[i]);
3246 temp=get_reg(i_regs->regmap,-1);
3247 temp2=get_reg(i_regs->regmap,FTEMP);
3248 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3249 assert(addr<0);
3250 offset=imm[i];
3251 for(hr=0;hr<HOST_REGS;hr++) {
3252 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3253 }
3254 reglist|=1<<temp;
3255 if(offset||s<0||c) addr=temp2;
3256 else addr=s;
3257 if(s>=0) {
3258 c=(i_regs->wasconst>>s)&1;
4cb76aa4 3259 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
57871462 3260 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3261 }
3262 if(tl>=0) {
3263 //assert(tl>=0);
3264 //assert(rt1[i]);
3265 if(!using_tlb) {
3266 if(!c) {
3267 emit_shlimm(addr,3,temp);
3268 if (opcode[i]==0x22||opcode[i]==0x26) {
3269 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3270 }else{
3271 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3272 }
4cb76aa4 3273 emit_cmpimm(addr,RAM_SIZE);
57871462 3274 jaddr=(int)out;
3275 emit_jno(0);
3276 }
3277 else {
3278 if (opcode[i]==0x22||opcode[i]==0x26) {
3279 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3280 }else{
3281 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3282 }
3283 }
3284 }else{ // using tlb
3285 int a;
3286 if(c) {
3287 a=-1;
3288 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3289 a=0xFFFFFFFC; // LWL/LWR
3290 }else{
3291 a=0xFFFFFFF8; // LDL/LDR
3292 }
3293 map=get_reg(i_regs->regmap,TLREG);
3294 assert(map>=0);
3295 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3296 if(c) {
3297 if (opcode[i]==0x22||opcode[i]==0x26) {
3298 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3299 }else{
3300 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3301 }
3302 }
3303 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3304 }
3305 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3306 if(!c||memtarget) {
3307 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3308 emit_readword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2);
3309 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3310 }
3311 else
3312 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3313 emit_andimm(temp,24,temp);
2002a1db 3314#ifdef BIG_ENDIAN_MIPS
3315 if (opcode[i]==0x26) // LWR
3316#else
3317 if (opcode[i]==0x22) // LWL
3318#endif
3319 emit_xorimm(temp,24,temp);
57871462 3320 emit_movimm(-1,HOST_TEMPREG);
3321 if (opcode[i]==0x26) {
3322 emit_shr(temp2,temp,temp2);
3323 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3324 }else{
3325 emit_shl(temp2,temp,temp2);
3326 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3327 }
3328 emit_or(temp2,tl,tl);
3329 //emit_storereg(rt1[i],tl); // DEBUG
3330 }
3331 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2002a1db 3332 // FIXME: little endian
57871462 3333 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3334 if(!c||memtarget) {
3335 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3336 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3337 emit_readdword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2h,temp2);
3338 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3339 }
3340 else
3341 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3342 emit_testimm(temp,32);
3343 emit_andimm(temp,24,temp);
3344 if (opcode[i]==0x1A) { // LDL
3345 emit_rsbimm(temp,32,HOST_TEMPREG);
3346 emit_shl(temp2h,temp,temp2h);
3347 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3348 emit_movimm(-1,HOST_TEMPREG);
3349 emit_shl(temp2,temp,temp2);
3350 emit_cmove_reg(temp2h,th);
3351 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3352 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3353 emit_orreq(temp2,tl,tl);
3354 emit_orrne(temp2,th,th);
3355 }
3356 if (opcode[i]==0x1B) { // LDR
3357 emit_xorimm(temp,24,temp);
3358 emit_rsbimm(temp,32,HOST_TEMPREG);
3359 emit_shr(temp2,temp,temp2);
3360 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3361 emit_movimm(-1,HOST_TEMPREG);
3362 emit_shr(temp2h,temp,temp2h);
3363 emit_cmovne_reg(temp2,tl);
3364 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3365 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3366 emit_orrne(temp2h,th,th);
3367 emit_orreq(temp2h,tl,tl);
3368 }
3369 }
3370 }
3371}
3372#define loadlr_assemble loadlr_assemble_arm
3373
3374void cop0_assemble(int i,struct regstat *i_regs)
3375{
3376 if(opcode2[i]==0) // MFC0
3377 {
3378 signed char t=get_reg(i_regs->regmap,rt1[i]);
3379 char copr=(source[i]>>11)&0x1f;
3380 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3381 if(t>=0&&rt1[i]!=0) {
7139f3c8 3382#ifdef MUPEN64
57871462 3383 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3384 emit_movimm((source[i]>>11)&0x1f,1);
3385 emit_writeword(0,(int)&PC);
3386 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3387 if(copr==9) {
3388 emit_readword((int)&last_count,ECX);
3389 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3390 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3391 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3392 emit_writeword(HOST_CCREG,(int)&Count);
3393 }
3394 emit_call((int)MFC0);
3395 emit_readword((int)&readmem_dword,t);
7139f3c8 3396#else
3397 emit_readword((int)&reg_cop0+copr*4,t);
3398#endif
57871462 3399 }
3400 }
3401 else if(opcode2[i]==4) // MTC0
3402 {
3403 signed char s=get_reg(i_regs->regmap,rs1[i]);
3404 char copr=(source[i]>>11)&0x1f;
3405 assert(s>=0);
3406 emit_writeword(s,(int)&readmem_dword);
3407 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3d624f89 3408#ifdef MUPEN64 /// FIXME
57871462 3409 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3410 emit_movimm((source[i]>>11)&0x1f,1);
3411 emit_writeword(0,(int)&PC);
3412 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3d624f89 3413#endif
7139f3c8 3414#ifdef PCSX
3415 emit_movimm(source[i],0);
3416 emit_writeword(0,(int)&psxRegs.code);
3417#endif
3418 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3419 emit_readword((int)&last_count,ECX);
3420 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3421 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3422 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3423 emit_writeword(HOST_CCREG,(int)&Count);
3424 }
3425 // What a mess. The status register (12) can enable interrupts,
3426 // so needs a special case to handle a pending interrupt.
3427 // The interrupt must be taken immediately, because a subsequent
3428 // instruction might disable interrupts again.
7139f3c8 3429 if(copr==12||copr==13) {
57871462 3430 emit_movimm(start+i*4+4,0);
3431 emit_movimm(0,1);
3432 emit_writeword(0,(int)&pcaddr);
3433 emit_writeword(1,(int)&pending_exception);
3434 }
3435 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3436 //else
3437 emit_call((int)MTC0);
7139f3c8 3438 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3439 emit_readword((int)&Count,HOST_CCREG);
3440 emit_readword((int)&next_interupt,ECX);
3441 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3442 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3443 emit_writeword(ECX,(int)&last_count);
3444 emit_storereg(CCREG,HOST_CCREG);
3445 }
7139f3c8 3446 if(copr==12||copr==13) {
57871462 3447 assert(!is_delayslot);
3448 emit_readword((int)&pending_exception,14);
3449 }
3450 emit_loadreg(rs1[i],s);
3451 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3452 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 3453 if(copr==12||copr==13) {
57871462 3454 emit_test(14,14);
3455 emit_jne((int)&do_interrupt);
3456 }
3457 cop1_usable=0;
3458 }
3459 else
3460 {
3461 assert(opcode2[i]==0x10);
3d624f89 3462#ifndef DISABLE_TLB
57871462 3463 if((source[i]&0x3f)==0x01) // TLBR
3464 emit_call((int)TLBR);
3465 if((source[i]&0x3f)==0x02) // TLBWI
3466 emit_call((int)TLBWI_new);
3467 if((source[i]&0x3f)==0x06) { // TLBWR
3468 // The TLB entry written by TLBWR is dependent on the count,
3469 // so update the cycle count
3470 emit_readword((int)&last_count,ECX);
3471 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3472 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3473 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3474 emit_writeword(HOST_CCREG,(int)&Count);
3475 emit_call((int)TLBWR_new);
3476 }
3477 if((source[i]&0x3f)==0x08) // TLBP
3478 emit_call((int)TLBP);
3d624f89 3479#endif