drc: merge Ari64's patch: 14_dont_save_or_restore_temporary
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
57871462 4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
0bbd1454 69void invalidate_addr_r0();
70void invalidate_addr_r1();
71void invalidate_addr_r2();
72void invalidate_addr_r3();
73void invalidate_addr_r4();
74void invalidate_addr_r5();
75void invalidate_addr_r6();
76void invalidate_addr_r7();
77void invalidate_addr_r8();
78void invalidate_addr_r9();
79void invalidate_addr_r10();
80void invalidate_addr_r12();
81
82const u_int invalidate_addr_reg[16] = {
83 (int)invalidate_addr_r0,
84 (int)invalidate_addr_r1,
85 (int)invalidate_addr_r2,
86 (int)invalidate_addr_r3,
87 (int)invalidate_addr_r4,
88 (int)invalidate_addr_r5,
89 (int)invalidate_addr_r6,
90 (int)invalidate_addr_r7,
91 (int)invalidate_addr_r8,
92 (int)invalidate_addr_r9,
93 (int)invalidate_addr_r10,
94 0,
95 (int)invalidate_addr_r12,
96 0,
97 0,
98 0};
99
57871462 100#include "fpu.h"
101
dd3a91a1 102unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
103
57871462 104/* Linker */
105
106void set_jump_target(int addr,u_int target)
107{
108 u_char *ptr=(u_char *)addr;
109 u_int *ptr2=(u_int *)ptr;
110 if(ptr[3]==0xe2) {
111 assert((target-(u_int)ptr2-8)<1024);
112 assert((addr&3)==0);
113 assert((target&3)==0);
114 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
115 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
116 }
117 else if(ptr[3]==0x72) {
118 // generated by emit_jno_unlikely
119 if((target-(u_int)ptr2-8)<1024) {
120 assert((addr&3)==0);
121 assert((target&3)==0);
122 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
123 }
124 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
125 assert((addr&3)==0);
126 assert((target&3)==0);
127 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
128 }
129 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
130 }
131 else {
132 assert((ptr[3]&0x0e)==0xa);
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137// This optionally copies the instruction from the target of the branch into
138// the space before the branch. Works, but the difference in speed is
139// usually insignificant.
140void set_jump_target_fillslot(int addr,u_int target,int copy)
141{
142 u_char *ptr=(u_char *)addr;
143 u_int *ptr2=(u_int *)ptr;
144 assert(!copy||ptr2[-1]==0xe28dd000);
145 if(ptr[3]==0xe2) {
146 assert(!copy);
147 assert((target-(u_int)ptr2-8)<4096);
148 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
149 }
150 else {
151 assert((ptr[3]&0x0e)==0xa);
152 u_int target_insn=*(u_int *)target;
153 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
154 copy=0;
155 }
156 if((target_insn&0x0c100000)==0x04100000) { // Load
157 copy=0;
158 }
159 if(target_insn&0x08000000) {
160 copy=0;
161 }
162 if(copy) {
163 ptr2[-1]=target_insn;
164 target+=4;
165 }
166 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
167 }
168}
169
170/* Literal pool */
171add_literal(int addr,int val)
172{
173 literals[literalcount][0]=addr;
174 literals[literalcount][1]=val;
175 literalcount++;
176}
177
f76eeef9 178void *kill_pointer(void *stub)
57871462 179{
180 int *ptr=(int *)(stub+4);
181 assert((*ptr&0x0ff00000)==0x05900000);
182 u_int offset=*ptr&0xfff;
183 int **l_ptr=(void *)ptr+offset+8;
184 int *i_ptr=*l_ptr;
185 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 186 return i_ptr;
57871462 187}
188
189int get_pointer(void *stub)
190{
191 //printf("get_pointer(%x)\n",(int)stub);
192 int *ptr=(int *)(stub+4);
193 assert((*ptr&0x0ff00000)==0x05900000);
194 u_int offset=*ptr&0xfff;
195 int **l_ptr=(void *)ptr+offset+8;
196 int *i_ptr=*l_ptr;
197 assert((*i_ptr&0x0f000000)==0x0a000000);
198 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
199}
200
201// Find the "clean" entry point from a "dirty" entry point
202// by skipping past the call to verify_code
203u_int get_clean_addr(int addr)
204{
205 int *ptr=(int *)addr;
206 #ifdef ARMv5_ONLY
207 ptr+=4;
208 #else
209 ptr+=6;
210 #endif
211 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
212 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
213 ptr++;
214 if((*ptr&0xFF000000)==0xea000000) {
215 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
216 }
217 return (u_int)ptr;
218}
219
220int verify_dirty(int addr)
221{
222 u_int *ptr=(u_int *)addr;
223 #ifdef ARMv5_ONLY
224 // get from literal pool
225 assert((*ptr&0xFFF00000)==0xe5900000);
226 u_int offset=*ptr&0xfff;
227 u_int *l_ptr=(void *)ptr+offset+8;
228 u_int source=l_ptr[0];
229 u_int copy=l_ptr[1];
230 u_int len=l_ptr[2];
231 ptr+=4;
232 #else
233 // ARMv7 movw/movt
234 assert((*ptr&0xFFF00000)==0xe3000000);
235 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
236 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
237 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
238 ptr+=6;
239 #endif
240 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
241 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 242 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 243 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
244 unsigned int page=source>>12;
245 unsigned int map_value=memory_map[page];
246 if(map_value>=0x80000000) return 0;
247 while(page<((source+len-1)>>12)) {
248 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
249 }
250 source = source+(map_value<<2);
251 }
252 //printf("verify_dirty: %x %x %x\n",source,copy,len);
253 return !memcmp((void *)source,(void *)copy,len);
254}
255
256// This doesn't necessarily find all clean entry points, just
257// guarantees that it's not dirty
258int isclean(int addr)
259{
260 #ifdef ARMv5_ONLY
261 int *ptr=((u_int *)addr)+4;
262 #else
263 int *ptr=((u_int *)addr)+6;
264 #endif
265 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
266 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
267 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
268 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
269 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
270 return 1;
271}
272
273void get_bounds(int addr,u_int *start,u_int *end)
274{
275 u_int *ptr=(u_int *)addr;
276 #ifdef ARMv5_ONLY
277 // get from literal pool
278 assert((*ptr&0xFFF00000)==0xe5900000);
279 u_int offset=*ptr&0xfff;
280 u_int *l_ptr=(void *)ptr+offset+8;
281 u_int source=l_ptr[0];
282 //u_int copy=l_ptr[1];
283 u_int len=l_ptr[2];
284 ptr+=4;
285 #else
286 // ARMv7 movw/movt
287 assert((*ptr&0xFFF00000)==0xe3000000);
288 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
289 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
290 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
291 ptr+=6;
292 #endif
293 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
294 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 295 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 296 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
297 if(memory_map[source>>12]>=0x80000000) source = 0;
298 else source = source+(memory_map[source>>12]<<2);
299 }
300 *start=source;
301 *end=source+len;
302}
303
304/* Register allocation */
305
306// Note: registers are allocated clean (unmodified state)
307// if you intend to modify the register, you must call dirty_reg().
308void alloc_reg(struct regstat *cur,int i,signed char reg)
309{
310 int r,hr;
311 int preferred_reg = (reg&7);
312 if(reg==CCREG) preferred_reg=HOST_CCREG;
313 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
314
315 // Don't allocate unused registers
316 if((cur->u>>reg)&1) return;
317
318 // see if it's already allocated
319 for(hr=0;hr<HOST_REGS;hr++)
320 {
321 if(cur->regmap[hr]==reg) return;
322 }
323
324 // Keep the same mapping if the register was already allocated in a loop
325 preferred_reg = loop_reg(i,reg,preferred_reg);
326
327 // Try to allocate the preferred register
328 if(cur->regmap[preferred_reg]==-1) {
329 cur->regmap[preferred_reg]=reg;
330 cur->dirty&=~(1<<preferred_reg);
331 cur->isconst&=~(1<<preferred_reg);
332 return;
333 }
334 r=cur->regmap[preferred_reg];
335 if(r<64&&((cur->u>>r)&1)) {
336 cur->regmap[preferred_reg]=reg;
337 cur->dirty&=~(1<<preferred_reg);
338 cur->isconst&=~(1<<preferred_reg);
339 return;
340 }
341 if(r>=64&&((cur->uu>>(r&63))&1)) {
342 cur->regmap[preferred_reg]=reg;
343 cur->dirty&=~(1<<preferred_reg);
344 cur->isconst&=~(1<<preferred_reg);
345 return;
346 }
347
348 // Clear any unneeded registers
349 // We try to keep the mapping consistent, if possible, because it
350 // makes branches easier (especially loops). So we try to allocate
351 // first (see above) before removing old mappings. If this is not
352 // possible then go ahead and clear out the registers that are no
353 // longer needed.
354 for(hr=0;hr<HOST_REGS;hr++)
355 {
356 r=cur->regmap[hr];
357 if(r>=0) {
358 if(r<64) {
359 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
360 }
361 else
362 {
363 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
364 }
365 }
366 }
367 // Try to allocate any available register, but prefer
368 // registers that have not been used recently.
369 if(i>0) {
370 for(hr=0;hr<HOST_REGS;hr++) {
371 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
372 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
373 cur->regmap[hr]=reg;
374 cur->dirty&=~(1<<hr);
375 cur->isconst&=~(1<<hr);
376 return;
377 }
378 }
379 }
380 }
381 // Try to allocate any available register
382 for(hr=0;hr<HOST_REGS;hr++) {
383 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
384 cur->regmap[hr]=reg;
385 cur->dirty&=~(1<<hr);
386 cur->isconst&=~(1<<hr);
387 return;
388 }
389 }
390
391 // Ok, now we have to evict someone
392 // Pick a register we hopefully won't need soon
393 u_char hsn[MAXREG+1];
394 memset(hsn,10,sizeof(hsn));
395 int j;
396 lsn(hsn,i,&preferred_reg);
397 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
398 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
399 if(i>0) {
400 // Don't evict the cycle count at entry points, otherwise the entry
401 // stub will have to write it.
402 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
403 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
404 for(j=10;j>=3;j--)
405 {
406 // Alloc preferred register if available
407 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
408 for(hr=0;hr<HOST_REGS;hr++) {
409 // Evict both parts of a 64-bit register
410 if((cur->regmap[hr]&63)==r) {
411 cur->regmap[hr]=-1;
412 cur->dirty&=~(1<<hr);
413 cur->isconst&=~(1<<hr);
414 }
415 }
416 cur->regmap[preferred_reg]=reg;
417 return;
418 }
419 for(r=1;r<=MAXREG;r++)
420 {
421 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
422 for(hr=0;hr<HOST_REGS;hr++) {
423 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
424 if(cur->regmap[hr]==r+64) {
425 cur->regmap[hr]=reg;
426 cur->dirty&=~(1<<hr);
427 cur->isconst&=~(1<<hr);
428 return;
429 }
430 }
431 }
432 for(hr=0;hr<HOST_REGS;hr++) {
433 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
434 if(cur->regmap[hr]==r) {
435 cur->regmap[hr]=reg;
436 cur->dirty&=~(1<<hr);
437 cur->isconst&=~(1<<hr);
438 return;
439 }
440 }
441 }
442 }
443 }
444 }
445 }
446 for(j=10;j>=0;j--)
447 {
448 for(r=1;r<=MAXREG;r++)
449 {
450 if(hsn[r]==j) {
451 for(hr=0;hr<HOST_REGS;hr++) {
452 if(cur->regmap[hr]==r+64) {
453 cur->regmap[hr]=reg;
454 cur->dirty&=~(1<<hr);
455 cur->isconst&=~(1<<hr);
456 return;
457 }
458 }
459 for(hr=0;hr<HOST_REGS;hr++) {
460 if(cur->regmap[hr]==r) {
461 cur->regmap[hr]=reg;
462 cur->dirty&=~(1<<hr);
463 cur->isconst&=~(1<<hr);
464 return;
465 }
466 }
467 }
468 }
469 }
470 printf("This shouldn't happen (alloc_reg)");exit(1);
471}
472
473void alloc_reg64(struct regstat *cur,int i,signed char reg)
474{
475 int preferred_reg = 8+(reg&1);
476 int r,hr;
477
478 // allocate the lower 32 bits
479 alloc_reg(cur,i,reg);
480
481 // Don't allocate unused registers
482 if((cur->uu>>reg)&1) return;
483
484 // see if the upper half is already allocated
485 for(hr=0;hr<HOST_REGS;hr++)
486 {
487 if(cur->regmap[hr]==reg+64) return;
488 }
489
490 // Keep the same mapping if the register was already allocated in a loop
491 preferred_reg = loop_reg(i,reg,preferred_reg);
492
493 // Try to allocate the preferred register
494 if(cur->regmap[preferred_reg]==-1) {
495 cur->regmap[preferred_reg]=reg|64;
496 cur->dirty&=~(1<<preferred_reg);
497 cur->isconst&=~(1<<preferred_reg);
498 return;
499 }
500 r=cur->regmap[preferred_reg];
501 if(r<64&&((cur->u>>r)&1)) {
502 cur->regmap[preferred_reg]=reg|64;
503 cur->dirty&=~(1<<preferred_reg);
504 cur->isconst&=~(1<<preferred_reg);
505 return;
506 }
507 if(r>=64&&((cur->uu>>(r&63))&1)) {
508 cur->regmap[preferred_reg]=reg|64;
509 cur->dirty&=~(1<<preferred_reg);
510 cur->isconst&=~(1<<preferred_reg);
511 return;
512 }
513
514 // Clear any unneeded registers
515 // We try to keep the mapping consistent, if possible, because it
516 // makes branches easier (especially loops). So we try to allocate
517 // first (see above) before removing old mappings. If this is not
518 // possible then go ahead and clear out the registers that are no
519 // longer needed.
520 for(hr=HOST_REGS-1;hr>=0;hr--)
521 {
522 r=cur->regmap[hr];
523 if(r>=0) {
524 if(r<64) {
525 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
526 }
527 else
528 {
529 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
530 }
531 }
532 }
533 // Try to allocate any available register, but prefer
534 // registers that have not been used recently.
535 if(i>0) {
536 for(hr=0;hr<HOST_REGS;hr++) {
537 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
538 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
539 cur->regmap[hr]=reg|64;
540 cur->dirty&=~(1<<hr);
541 cur->isconst&=~(1<<hr);
542 return;
543 }
544 }
545 }
546 }
547 // Try to allocate any available register
548 for(hr=0;hr<HOST_REGS;hr++) {
549 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
550 cur->regmap[hr]=reg|64;
551 cur->dirty&=~(1<<hr);
552 cur->isconst&=~(1<<hr);
553 return;
554 }
555 }
556
557 // Ok, now we have to evict someone
558 // Pick a register we hopefully won't need soon
559 u_char hsn[MAXREG+1];
560 memset(hsn,10,sizeof(hsn));
561 int j;
562 lsn(hsn,i,&preferred_reg);
563 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
564 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
565 if(i>0) {
566 // Don't evict the cycle count at entry points, otherwise the entry
567 // stub will have to write it.
568 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
569 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
570 for(j=10;j>=3;j--)
571 {
572 // Alloc preferred register if available
573 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
574 for(hr=0;hr<HOST_REGS;hr++) {
575 // Evict both parts of a 64-bit register
576 if((cur->regmap[hr]&63)==r) {
577 cur->regmap[hr]=-1;
578 cur->dirty&=~(1<<hr);
579 cur->isconst&=~(1<<hr);
580 }
581 }
582 cur->regmap[preferred_reg]=reg|64;
583 return;
584 }
585 for(r=1;r<=MAXREG;r++)
586 {
587 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
588 for(hr=0;hr<HOST_REGS;hr++) {
589 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
590 if(cur->regmap[hr]==r+64) {
591 cur->regmap[hr]=reg|64;
592 cur->dirty&=~(1<<hr);
593 cur->isconst&=~(1<<hr);
594 return;
595 }
596 }
597 }
598 for(hr=0;hr<HOST_REGS;hr++) {
599 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
600 if(cur->regmap[hr]==r) {
601 cur->regmap[hr]=reg|64;
602 cur->dirty&=~(1<<hr);
603 cur->isconst&=~(1<<hr);
604 return;
605 }
606 }
607 }
608 }
609 }
610 }
611 }
612 for(j=10;j>=0;j--)
613 {
614 for(r=1;r<=MAXREG;r++)
615 {
616 if(hsn[r]==j) {
617 for(hr=0;hr<HOST_REGS;hr++) {
618 if(cur->regmap[hr]==r+64) {
619 cur->regmap[hr]=reg|64;
620 cur->dirty&=~(1<<hr);
621 cur->isconst&=~(1<<hr);
622 return;
623 }
624 }
625 for(hr=0;hr<HOST_REGS;hr++) {
626 if(cur->regmap[hr]==r) {
627 cur->regmap[hr]=reg|64;
628 cur->dirty&=~(1<<hr);
629 cur->isconst&=~(1<<hr);
630 return;
631 }
632 }
633 }
634 }
635 }
636 printf("This shouldn't happen");exit(1);
637}
638
639// Allocate a temporary register. This is done without regard to
640// dirty status or whether the register we request is on the unneeded list
641// Note: This will only allocate one register, even if called multiple times
642void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
643{
644 int r,hr;
645 int preferred_reg = -1;
646
647 // see if it's already allocated
648 for(hr=0;hr<HOST_REGS;hr++)
649 {
650 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
651 }
652
653 // Try to allocate any available register
654 for(hr=HOST_REGS-1;hr>=0;hr--) {
655 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
656 cur->regmap[hr]=reg;
657 cur->dirty&=~(1<<hr);
658 cur->isconst&=~(1<<hr);
659 return;
660 }
661 }
662
663 // Find an unneeded register
664 for(hr=HOST_REGS-1;hr>=0;hr--)
665 {
666 r=cur->regmap[hr];
667 if(r>=0) {
668 if(r<64) {
669 if((cur->u>>r)&1) {
670 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
671 cur->regmap[hr]=reg;
672 cur->dirty&=~(1<<hr);
673 cur->isconst&=~(1<<hr);
674 return;
675 }
676 }
677 }
678 else
679 {
680 if((cur->uu>>(r&63))&1) {
681 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
682 cur->regmap[hr]=reg;
683 cur->dirty&=~(1<<hr);
684 cur->isconst&=~(1<<hr);
685 return;
686 }
687 }
688 }
689 }
690 }
691
692 // Ok, now we have to evict someone
693 // Pick a register we hopefully won't need soon
694 // TODO: we might want to follow unconditional jumps here
695 // TODO: get rid of dupe code and make this into a function
696 u_char hsn[MAXREG+1];
697 memset(hsn,10,sizeof(hsn));
698 int j;
699 lsn(hsn,i,&preferred_reg);
700 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
701 if(i>0) {
702 // Don't evict the cycle count at entry points, otherwise the entry
703 // stub will have to write it.
704 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
705 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
706 for(j=10;j>=3;j--)
707 {
708 for(r=1;r<=MAXREG;r++)
709 {
710 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
711 for(hr=0;hr<HOST_REGS;hr++) {
712 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
713 if(cur->regmap[hr]==r+64) {
714 cur->regmap[hr]=reg;
715 cur->dirty&=~(1<<hr);
716 cur->isconst&=~(1<<hr);
717 return;
718 }
719 }
720 }
721 for(hr=0;hr<HOST_REGS;hr++) {
722 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
723 if(cur->regmap[hr]==r) {
724 cur->regmap[hr]=reg;
725 cur->dirty&=~(1<<hr);
726 cur->isconst&=~(1<<hr);
727 return;
728 }
729 }
730 }
731 }
732 }
733 }
734 }
735 for(j=10;j>=0;j--)
736 {
737 for(r=1;r<=MAXREG;r++)
738 {
739 if(hsn[r]==j) {
740 for(hr=0;hr<HOST_REGS;hr++) {
741 if(cur->regmap[hr]==r+64) {
742 cur->regmap[hr]=reg;
743 cur->dirty&=~(1<<hr);
744 cur->isconst&=~(1<<hr);
745 return;
746 }
747 }
748 for(hr=0;hr<HOST_REGS;hr++) {
749 if(cur->regmap[hr]==r) {
750 cur->regmap[hr]=reg;
751 cur->dirty&=~(1<<hr);
752 cur->isconst&=~(1<<hr);
753 return;
754 }
755 }
756 }
757 }
758 }
759 printf("This shouldn't happen");exit(1);
760}
761// Allocate a specific ARM register.
762void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
763{
764 int n;
765
766 // see if it's already allocated (and dealloc it)
767 for(n=0;n<HOST_REGS;n++)
768 {
769 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
770 }
771
772 cur->regmap[hr]=reg;
773 cur->dirty&=~(1<<hr);
774 cur->isconst&=~(1<<hr);
775}
776
777// Alloc cycle count into dedicated register
778alloc_cc(struct regstat *cur,int i)
779{
780 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
781}
782
783/* Special alloc */
784
785
786/* Assembler */
787
788char regname[16][4] = {
789 "r0",
790 "r1",
791 "r2",
792 "r3",
793 "r4",
794 "r5",
795 "r6",
796 "r7",
797 "r8",
798 "r9",
799 "r10",
800 "fp",
801 "r12",
802 "sp",
803 "lr",
804 "pc"};
805
806void output_byte(u_char byte)
807{
808 *(out++)=byte;
809}
810void output_modrm(u_char mod,u_char rm,u_char ext)
811{
812 assert(mod<4);
813 assert(rm<8);
814 assert(ext<8);
815 u_char byte=(mod<<6)|(ext<<3)|rm;
816 *(out++)=byte;
817}
818void output_sib(u_char scale,u_char index,u_char base)
819{
820 assert(scale<4);
821 assert(index<8);
822 assert(base<8);
823 u_char byte=(scale<<6)|(index<<3)|base;
824 *(out++)=byte;
825}
826void output_w32(u_int word)
827{
828 *((u_int *)out)=word;
829 out+=4;
830}
831u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
832{
833 assert(rd<16);
834 assert(rn<16);
835 assert(rm<16);
836 return((rn<<16)|(rd<<12)|rm);
837}
838u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
839{
840 assert(rd<16);
841 assert(rn<16);
842 assert(imm<256);
843 assert((shift&1)==0);
844 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
845}
846u_int genimm(u_int imm,u_int *encoded)
847{
c2e3bd42 848 *encoded=0;
849 if(imm==0) return 1;
57871462 850 int i=32;
851 while(i>0)
852 {
853 if(imm<256) {
854 *encoded=((i&30)<<7)|imm;
855 return 1;
856 }
857 imm=(imm>>2)|(imm<<30);i-=2;
858 }
859 return 0;
860}
cfbd3c6e 861void genimm_checked(u_int imm,u_int *encoded)
862{
863 u_int ret=genimm(imm,encoded);
864 assert(ret);
865}
57871462 866u_int genjmp(u_int addr)
867{
868 int offset=addr-(int)out-8;
e80343e2 869 if(offset<-33554432||offset>=33554432) {
870 if (addr>2) {
871 printf("genjmp: out of range: %08x\n", offset);
872 exit(1);
873 }
874 return 0;
875 }
57871462 876 return ((u_int)offset>>2)&0xffffff;
877}
878
879void emit_mov(int rs,int rt)
880{
881 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
882 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
883}
884
885void emit_movs(int rs,int rt)
886{
887 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
888 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
889}
890
891void emit_add(int rs1,int rs2,int rt)
892{
893 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
894 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
895}
896
897void emit_adds(int rs1,int rs2,int rt)
898{
899 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
900 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
901}
902
903void emit_adcs(int rs1,int rs2,int rt)
904{
905 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
906 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
907}
908
909void emit_sbc(int rs1,int rs2,int rt)
910{
911 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
912 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
913}
914
915void emit_sbcs(int rs1,int rs2,int rt)
916{
917 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
918 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
919}
920
921void emit_neg(int rs, int rt)
922{
923 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
924 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
925}
926
927void emit_negs(int rs, int rt)
928{
929 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
930 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
931}
932
933void emit_sub(int rs1,int rs2,int rt)
934{
935 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
936 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
937}
938
939void emit_subs(int rs1,int rs2,int rt)
940{
941 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
942 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
943}
944
945void emit_zeroreg(int rt)
946{
947 assem_debug("mov %s,#0\n",regname[rt]);
948 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
949}
950
790ee18e 951void emit_loadlp(u_int imm,u_int rt)
952{
953 add_literal((int)out,imm);
954 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
955 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
956}
957void emit_movw(u_int imm,u_int rt)
958{
959 assert(imm<65536);
960 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
961 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
962}
963void emit_movt(u_int imm,u_int rt)
964{
965 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
966 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
967}
968void emit_movimm(u_int imm,u_int rt)
969{
970 u_int armval;
971 if(genimm(imm,&armval)) {
972 assem_debug("mov %s,#%d\n",regname[rt],imm);
973 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
974 }else if(genimm(~imm,&armval)) {
975 assem_debug("mvn %s,#%d\n",regname[rt],imm);
976 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
977 }else if(imm<65536) {
978 #ifdef ARMv5_ONLY
979 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
980 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
981 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
982 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
983 #else
984 emit_movw(imm,rt);
985 #endif
986 }else{
987 #ifdef ARMv5_ONLY
988 emit_loadlp(imm,rt);
989 #else
990 emit_movw(imm&0x0000FFFF,rt);
991 emit_movt(imm&0xFFFF0000,rt);
992 #endif
993 }
994}
995void emit_pcreladdr(u_int rt)
996{
997 assem_debug("add %s,pc,#?\n",regname[rt]);
998 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
999}
1000
57871462 1001void emit_loadreg(int r, int hr)
1002{
3d624f89 1003#ifdef FORCE32
1004 if(r&64) {
1005 printf("64bit load in 32bit mode!\n");
7f2607ea 1006 assert(0);
1007 return;
3d624f89 1008 }
1009#endif
57871462 1010 if((r&63)==0)
1011 emit_zeroreg(hr);
1012 else {
3d624f89 1013 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1014 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1015 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1016 if(r==CCREG) addr=(int)&cycle_count;
1017 if(r==CSREG) addr=(int)&Status;
1018 if(r==FSREG) addr=(int)&FCR31;
1019 if(r==INVCP) addr=(int)&invc_ptr;
1020 u_int offset = addr-(u_int)&dynarec_local;
1021 assert(offset<4096);
1022 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1023 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1024 }
1025}
1026void emit_storereg(int r, int hr)
1027{
3d624f89 1028#ifdef FORCE32
1029 if(r&64) {
1030 printf("64bit store in 32bit mode!\n");
7f2607ea 1031 assert(0);
1032 return;
3d624f89 1033 }
1034#endif
1035 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1036 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1037 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1038 if(r==CCREG) addr=(int)&cycle_count;
1039 if(r==FSREG) addr=(int)&FCR31;
1040 u_int offset = addr-(u_int)&dynarec_local;
1041 assert(offset<4096);
1042 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1043 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1044}
1045
1046void emit_test(int rs, int rt)
1047{
1048 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1049 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1050}
1051
1052void emit_testimm(int rs,int imm)
1053{
1054 u_int armval;
1055 assem_debug("tst %s,$%d\n",regname[rs],imm);
cfbd3c6e 1056 genimm_checked(imm,&armval);
57871462 1057 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1058}
1059
b9b61529 1060void emit_testeqimm(int rs,int imm)
1061{
1062 u_int armval;
1063 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1064 genimm_checked(imm,&armval);
b9b61529 1065 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1066}
1067
57871462 1068void emit_not(int rs,int rt)
1069{
1070 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1071 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1072}
1073
b9b61529 1074void emit_mvnmi(int rs,int rt)
1075{
1076 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1077 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1078}
1079
57871462 1080void emit_and(u_int rs1,u_int rs2,u_int rt)
1081{
1082 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1083 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1084}
1085
1086void emit_or(u_int rs1,u_int rs2,u_int rt)
1087{
1088 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1089 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1090}
1091void emit_or_and_set_flags(int rs1,int rs2,int rt)
1092{
1093 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1094 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1095}
1096
f70d384d 1097void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1098{
1099 assert(rs<16);
1100 assert(rt<16);
1101 assert(imm<32);
1102 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1103 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1104}
1105
576bbd8f 1106void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1107{
1108 assert(rs<16);
1109 assert(rt<16);
1110 assert(imm<32);
1111 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1112 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1113}
1114
57871462 1115void emit_xor(u_int rs1,u_int rs2,u_int rt)
1116{
1117 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1118 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1119}
1120
57871462 1121void emit_addimm(u_int rs,int imm,u_int rt)
1122{
1123 assert(rs<16);
1124 assert(rt<16);
1125 if(imm!=0) {
1126 assert(imm>-65536&&imm<65536);
1127 u_int armval;
1128 if(genimm(imm,&armval)) {
1129 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1130 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1131 }else if(genimm(-imm,&armval)) {
1132 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1133 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1134 }else if(imm<0) {
1135 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1136 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1137 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1138 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1139 }else{
1140 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1141 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1142 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1143 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1144 }
1145 }
1146 else if(rs!=rt) emit_mov(rs,rt);
1147}
1148
1149void emit_addimm_and_set_flags(int imm,int rt)
1150{
1151 assert(imm>-65536&&imm<65536);
1152 u_int armval;
1153 if(genimm(imm,&armval)) {
1154 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1155 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1156 }else if(genimm(-imm,&armval)) {
1157 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1158 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1159 }else if(imm<0) {
1160 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1161 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1162 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1163 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1164 }else{
1165 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1166 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1167 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1168 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1169 }
1170}
1171void emit_addimm_no_flags(u_int imm,u_int rt)
1172{
1173 emit_addimm(rt,imm,rt);
1174}
1175
1176void emit_addnop(u_int r)
1177{
1178 assert(r<16);
1179 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1180 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1181}
1182
1183void emit_adcimm(u_int rs,int imm,u_int rt)
1184{
1185 u_int armval;
cfbd3c6e 1186 genimm_checked(imm,&armval);
57871462 1187 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1188 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1189}
1190/*void emit_sbcimm(int imm,u_int rt)
1191{
1192 u_int armval;
cfbd3c6e 1193 genimm_checked(imm,&armval);
57871462 1194 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1195 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1196}*/
1197void emit_sbbimm(int imm,u_int rt)
1198{
1199 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1200 assert(rt<8);
1201 if(imm<128&&imm>=-128) {
1202 output_byte(0x83);
1203 output_modrm(3,rt,3);
1204 output_byte(imm);
1205 }
1206 else
1207 {
1208 output_byte(0x81);
1209 output_modrm(3,rt,3);
1210 output_w32(imm);
1211 }
1212}
1213void emit_rscimm(int rs,int imm,u_int rt)
1214{
1215 assert(0);
1216 u_int armval;
cfbd3c6e 1217 genimm_checked(imm,&armval);
57871462 1218 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1219 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1220}
1221
1222void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1223{
1224 // TODO: if(genimm(imm,&armval)) ...
1225 // else
1226 emit_movimm(imm,HOST_TEMPREG);
1227 emit_adds(HOST_TEMPREG,rsl,rtl);
1228 emit_adcimm(rsh,0,rth);
1229}
1230
1231void emit_sbb(int rs1,int rs2)
1232{
1233 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1234 output_byte(0x19);
1235 output_modrm(3,rs1,rs2);
1236}
1237
1238void emit_andimm(int rs,int imm,int rt)
1239{
1240 u_int armval;
790ee18e 1241 if(imm==0) {
1242 emit_zeroreg(rt);
1243 }else if(genimm(imm,&armval)) {
57871462 1244 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1245 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1246 }else if(genimm(~imm,&armval)) {
1247 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1248 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1249 }else if(imm==65535) {
1250 #ifdef ARMv5_ONLY
1251 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1252 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1253 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1254 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1255 #else
1256 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1257 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1258 #endif
1259 }else{
1260 assert(imm>0&&imm<65535);
1261 #ifdef ARMv5_ONLY
1262 assem_debug("mov r14,#%d\n",imm&0xFF00);
1263 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1264 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1265 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1266 #else
1267 emit_movw(imm,HOST_TEMPREG);
1268 #endif
1269 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1270 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1271 }
1272}
1273
1274void emit_orimm(int rs,int imm,int rt)
1275{
1276 u_int armval;
790ee18e 1277 if(imm==0) {
1278 if(rs!=rt) emit_mov(rs,rt);
1279 }else if(genimm(imm,&armval)) {
57871462 1280 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1281 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1282 }else{
1283 assert(imm>0&&imm<65536);
1284 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1285 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1286 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1287 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1288 }
1289}
1290
1291void emit_xorimm(int rs,int imm,int rt)
1292{
57871462 1293 u_int armval;
790ee18e 1294 if(imm==0) {
1295 if(rs!=rt) emit_mov(rs,rt);
1296 }else if(genimm(imm,&armval)) {
57871462 1297 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1298 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1299 }else{
514ed0d9 1300 assert(imm>0&&imm<65536);
57871462 1301 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1302 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1303 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1304 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1305 }
1306}
1307
1308void emit_shlimm(int rs,u_int imm,int rt)
1309{
1310 assert(imm>0);
1311 assert(imm<32);
1312 //if(imm==1) ...
1313 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1314 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1315}
1316
1317void emit_shrimm(int rs,u_int imm,int rt)
1318{
1319 assert(imm>0);
1320 assert(imm<32);
1321 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1322 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1323}
1324
1325void emit_sarimm(int rs,u_int imm,int rt)
1326{
1327 assert(imm>0);
1328 assert(imm<32);
1329 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1330 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1331}
1332
1333void emit_rorimm(int rs,u_int imm,int rt)
1334{
1335 assert(imm>0);
1336 assert(imm<32);
1337 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1338 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1339}
1340
1341void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1342{
1343 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1344 assert(imm>0);
1345 assert(imm<32);
1346 //if(imm==1) ...
1347 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1348 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1349 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1350 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1351}
1352
1353void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1354{
1355 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1356 assert(imm>0);
1357 assert(imm<32);
1358 //if(imm==1) ...
1359 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1360 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1361 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1362 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1363}
1364
b9b61529 1365void emit_signextend16(int rs,int rt)
1366{
1367 #ifdef ARMv5_ONLY
1368 emit_shlimm(rs,16,rt);
1369 emit_sarimm(rt,16,rt);
1370 #else
1371 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1372 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1373 #endif
1374}
1375
57871462 1376void emit_shl(u_int rs,u_int shift,u_int rt)
1377{
1378 assert(rs<16);
1379 assert(rt<16);
1380 assert(shift<16);
1381 //if(imm==1) ...
1382 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1383 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1384}
1385void emit_shr(u_int rs,u_int shift,u_int rt)
1386{
1387 assert(rs<16);
1388 assert(rt<16);
1389 assert(shift<16);
1390 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1391 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1392}
1393void emit_sar(u_int rs,u_int shift,u_int rt)
1394{
1395 assert(rs<16);
1396 assert(rt<16);
1397 assert(shift<16);
1398 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1399 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1400}
1401void emit_shlcl(int r)
1402{
1403 assem_debug("shl %%%s,%%cl\n",regname[r]);
1404 assert(0);
1405}
1406void emit_shrcl(int r)
1407{
1408 assem_debug("shr %%%s,%%cl\n",regname[r]);
1409 assert(0);
1410}
1411void emit_sarcl(int r)
1412{
1413 assem_debug("sar %%%s,%%cl\n",regname[r]);
1414 assert(0);
1415}
1416
1417void emit_shldcl(int r1,int r2)
1418{
1419 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1420 assert(0);
1421}
1422void emit_shrdcl(int r1,int r2)
1423{
1424 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1425 assert(0);
1426}
1427void emit_orrshl(u_int rs,u_int shift,u_int rt)
1428{
1429 assert(rs<16);
1430 assert(rt<16);
1431 assert(shift<16);
1432 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1433 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1434}
1435void emit_orrshr(u_int rs,u_int shift,u_int rt)
1436{
1437 assert(rs<16);
1438 assert(rt<16);
1439 assert(shift<16);
1440 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1441 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1442}
1443
1444void emit_cmpimm(int rs,int imm)
1445{
1446 u_int armval;
1447 if(genimm(imm,&armval)) {
1448 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1449 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1450 }else if(genimm(-imm,&armval)) {
1451 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1452 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1453 }else if(imm>0) {
1454 assert(imm<65536);
1455 #ifdef ARMv5_ONLY
1456 emit_movimm(imm,HOST_TEMPREG);
1457 #else
1458 emit_movw(imm,HOST_TEMPREG);
1459 #endif
1460 assem_debug("cmp %s,r14\n",regname[rs]);
1461 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1462 }else{
1463 assert(imm>-65536);
1464 #ifdef ARMv5_ONLY
1465 emit_movimm(-imm,HOST_TEMPREG);
1466 #else
1467 emit_movw(-imm,HOST_TEMPREG);
1468 #endif
1469 assem_debug("cmn %s,r14\n",regname[rs]);
1470 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1471 }
1472}
1473
1474void emit_cmovne(u_int *addr,int rt)
1475{
1476 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1477 assert(0);
1478}
1479void emit_cmovl(u_int *addr,int rt)
1480{
1481 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1482 assert(0);
1483}
1484void emit_cmovs(u_int *addr,int rt)
1485{
1486 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1487 assert(0);
1488}
1489void emit_cmovne_imm(int imm,int rt)
1490{
1491 assem_debug("movne %s,#%d\n",regname[rt],imm);
1492 u_int armval;
cfbd3c6e 1493 genimm_checked(imm,&armval);
57871462 1494 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1495}
1496void emit_cmovl_imm(int imm,int rt)
1497{
1498 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1499 u_int armval;
cfbd3c6e 1500 genimm_checked(imm,&armval);
57871462 1501 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1502}
1503void emit_cmovb_imm(int imm,int rt)
1504{
1505 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1506 u_int armval;
cfbd3c6e 1507 genimm_checked(imm,&armval);
57871462 1508 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1509}
1510void emit_cmovs_imm(int imm,int rt)
1511{
1512 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1513 u_int armval;
cfbd3c6e 1514 genimm_checked(imm,&armval);
57871462 1515 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1516}
1517void emit_cmove_reg(int rs,int rt)
1518{
1519 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1520 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1521}
1522void emit_cmovne_reg(int rs,int rt)
1523{
1524 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1525 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1526}
1527void emit_cmovl_reg(int rs,int rt)
1528{
1529 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1530 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1531}
1532void emit_cmovs_reg(int rs,int rt)
1533{
1534 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1535 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1536}
1537
1538void emit_slti32(int rs,int imm,int rt)
1539{
1540 if(rs!=rt) emit_zeroreg(rt);
1541 emit_cmpimm(rs,imm);
1542 if(rs==rt) emit_movimm(0,rt);
1543 emit_cmovl_imm(1,rt);
1544}
1545void emit_sltiu32(int rs,int imm,int rt)
1546{
1547 if(rs!=rt) emit_zeroreg(rt);
1548 emit_cmpimm(rs,imm);
1549 if(rs==rt) emit_movimm(0,rt);
1550 emit_cmovb_imm(1,rt);
1551}
1552void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1553{
1554 assert(rsh!=rt);
1555 emit_slti32(rsl,imm,rt);
1556 if(imm>=0)
1557 {
1558 emit_test(rsh,rsh);
1559 emit_cmovne_imm(0,rt);
1560 emit_cmovs_imm(1,rt);
1561 }
1562 else
1563 {
1564 emit_cmpimm(rsh,-1);
1565 emit_cmovne_imm(0,rt);
1566 emit_cmovl_imm(1,rt);
1567 }
1568}
1569void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1570{
1571 assert(rsh!=rt);
1572 emit_sltiu32(rsl,imm,rt);
1573 if(imm>=0)
1574 {
1575 emit_test(rsh,rsh);
1576 emit_cmovne_imm(0,rt);
1577 }
1578 else
1579 {
1580 emit_cmpimm(rsh,-1);
1581 emit_cmovne_imm(1,rt);
1582 }
1583}
1584
1585void emit_cmp(int rs,int rt)
1586{
1587 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1588 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1589}
1590void emit_set_gz32(int rs, int rt)
1591{
1592 //assem_debug("set_gz32\n");
1593 emit_cmpimm(rs,1);
1594 emit_movimm(1,rt);
1595 emit_cmovl_imm(0,rt);
1596}
1597void emit_set_nz32(int rs, int rt)
1598{
1599 //assem_debug("set_nz32\n");
1600 if(rs!=rt) emit_movs(rs,rt);
1601 else emit_test(rs,rs);
1602 emit_cmovne_imm(1,rt);
1603}
1604void emit_set_gz64_32(int rsh, int rsl, int rt)
1605{
1606 //assem_debug("set_gz64\n");
1607 emit_set_gz32(rsl,rt);
1608 emit_test(rsh,rsh);
1609 emit_cmovne_imm(1,rt);
1610 emit_cmovs_imm(0,rt);
1611}
1612void emit_set_nz64_32(int rsh, int rsl, int rt)
1613{
1614 //assem_debug("set_nz64\n");
1615 emit_or_and_set_flags(rsh,rsl,rt);
1616 emit_cmovne_imm(1,rt);
1617}
1618void emit_set_if_less32(int rs1, int rs2, int rt)
1619{
1620 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1621 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1622 emit_cmp(rs1,rs2);
1623 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1624 emit_cmovl_imm(1,rt);
1625}
1626void emit_set_if_carry32(int rs1, int rs2, int rt)
1627{
1628 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1629 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1630 emit_cmp(rs1,rs2);
1631 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1632 emit_cmovb_imm(1,rt);
1633}
1634void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1635{
1636 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1637 assert(u1!=rt);
1638 assert(u2!=rt);
1639 emit_cmp(l1,l2);
1640 emit_movimm(0,rt);
1641 emit_sbcs(u1,u2,HOST_TEMPREG);
1642 emit_cmovl_imm(1,rt);
1643}
1644void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1645{
1646 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1647 assert(u1!=rt);
1648 assert(u2!=rt);
1649 emit_cmp(l1,l2);
1650 emit_movimm(0,rt);
1651 emit_sbcs(u1,u2,HOST_TEMPREG);
1652 emit_cmovb_imm(1,rt);
1653}
1654
1655void emit_call(int a)
1656{
1657 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1658 u_int offset=genjmp(a);
1659 output_w32(0xeb000000|offset);
1660}
1661void emit_jmp(int a)
1662{
1663 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1664 u_int offset=genjmp(a);
1665 output_w32(0xea000000|offset);
1666}
1667void emit_jne(int a)
1668{
1669 assem_debug("bne %x\n",a);
1670 u_int offset=genjmp(a);
1671 output_w32(0x1a000000|offset);
1672}
1673void emit_jeq(int a)
1674{
1675 assem_debug("beq %x\n",a);
1676 u_int offset=genjmp(a);
1677 output_w32(0x0a000000|offset);
1678}
1679void emit_js(int a)
1680{
1681 assem_debug("bmi %x\n",a);
1682 u_int offset=genjmp(a);
1683 output_w32(0x4a000000|offset);
1684}
1685void emit_jns(int a)
1686{
1687 assem_debug("bpl %x\n",a);
1688 u_int offset=genjmp(a);
1689 output_w32(0x5a000000|offset);
1690}
1691void emit_jl(int a)
1692{
1693 assem_debug("blt %x\n",a);
1694 u_int offset=genjmp(a);
1695 output_w32(0xba000000|offset);
1696}
1697void emit_jge(int a)
1698{
1699 assem_debug("bge %x\n",a);
1700 u_int offset=genjmp(a);
1701 output_w32(0xaa000000|offset);
1702}
1703void emit_jno(int a)
1704{
1705 assem_debug("bvc %x\n",a);
1706 u_int offset=genjmp(a);
1707 output_w32(0x7a000000|offset);
1708}
1709void emit_jc(int a)
1710{
1711 assem_debug("bcs %x\n",a);
1712 u_int offset=genjmp(a);
1713 output_w32(0x2a000000|offset);
1714}
1715void emit_jcc(int a)
1716{
1717 assem_debug("bcc %x\n",a);
1718 u_int offset=genjmp(a);
1719 output_w32(0x3a000000|offset);
1720}
1721
1722void emit_pushimm(int imm)
1723{
1724 assem_debug("push $%x\n",imm);
1725 assert(0);
1726}
1727void emit_pusha()
1728{
1729 assem_debug("pusha\n");
1730 assert(0);
1731}
1732void emit_popa()
1733{
1734 assem_debug("popa\n");
1735 assert(0);
1736}
1737void emit_pushreg(u_int r)
1738{
1739 assem_debug("push %%%s\n",regname[r]);
1740 assert(0);
1741}
1742void emit_popreg(u_int r)
1743{
1744 assem_debug("pop %%%s\n",regname[r]);
1745 assert(0);
1746}
1747void emit_callreg(u_int r)
1748{
1749 assem_debug("call *%%%s\n",regname[r]);
1750 assert(0);
1751}
1752void emit_jmpreg(u_int r)
1753{
1754 assem_debug("mov pc,%s\n",regname[r]);
1755 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1756}
1757
1758void emit_readword_indexed(int offset, int rs, int rt)
1759{
1760 assert(offset>-4096&&offset<4096);
1761 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1762 if(offset>=0) {
1763 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1764 }else{
1765 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1766 }
1767}
1768void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1769{
1770 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1771 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1772}
1773void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1774{
1775 if(map<0) emit_readword_indexed(addr, rs, rt);
1776 else {
1777 assert(addr==0);
1778 emit_readword_dualindexedx4(rs, map, rt);
1779 }
1780}
1781void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1782{
1783 if(map<0) {
1784 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1785 emit_readword_indexed(addr+4, rs, rl);
1786 }else{
1787 assert(rh!=rs);
1788 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1789 emit_addimm(map,1,map);
1790 emit_readword_indexed_tlb(addr, rs, map, rl);
1791 }
1792}
1793void emit_movsbl_indexed(int offset, int rs, int rt)
1794{
1795 assert(offset>-256&&offset<256);
1796 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1797 if(offset>=0) {
1798 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1799 }else{
1800 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1801 }
1802}
1803void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1804{
1805 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1806 else {
1807 if(addr==0) {
1808 emit_shlimm(map,2,map);
1809 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1810 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1811 }else{
1812 assert(addr>-256&&addr<256);
1813 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1814 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1815 emit_movsbl_indexed(addr, rt, rt);
1816 }
1817 }
1818}
1819void emit_movswl_indexed(int offset, int rs, int rt)
1820{
1821 assert(offset>-256&&offset<256);
1822 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1823 if(offset>=0) {
1824 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1825 }else{
1826 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1827 }
1828}
1829void emit_movzbl_indexed(int offset, int rs, int rt)
1830{
1831 assert(offset>-4096&&offset<4096);
1832 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1833 if(offset>=0) {
1834 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1835 }else{
1836 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1837 }
1838}
1839void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1840{
1841 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1842 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1843}
1844void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1845{
1846 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1847 else {
1848 if(addr==0) {
1849 emit_movzbl_dualindexedx4(rs, map, rt);
1850 }else{
1851 emit_addimm(rs,addr,rt);
1852 emit_movzbl_dualindexedx4(rt, map, rt);
1853 }
1854 }
1855}
1856void emit_movzwl_indexed(int offset, int rs, int rt)
1857{
1858 assert(offset>-256&&offset<256);
1859 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1860 if(offset>=0) {
1861 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1862 }else{
1863 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1864 }
1865}
1866void emit_readword(int addr, int rt)
1867{
1868 u_int offset = addr-(u_int)&dynarec_local;
1869 assert(offset<4096);
1870 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1871 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1872}
1873void emit_movsbl(int addr, int rt)
1874{
1875 u_int offset = addr-(u_int)&dynarec_local;
1876 assert(offset<256);
1877 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1878 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1879}
1880void emit_movswl(int addr, int rt)
1881{
1882 u_int offset = addr-(u_int)&dynarec_local;
1883 assert(offset<256);
1884 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1885 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1886}
1887void emit_movzbl(int addr, int rt)
1888{
1889 u_int offset = addr-(u_int)&dynarec_local;
1890 assert(offset<4096);
1891 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1892 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1893}
1894void emit_movzwl(int addr, int rt)
1895{
1896 u_int offset = addr-(u_int)&dynarec_local;
1897 assert(offset<256);
1898 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1899 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1900}
1901void emit_movzwl_reg(int rs, int rt)
1902{
1903 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1904 assert(0);
1905}
1906
1907void emit_xchg(int rs, int rt)
1908{
1909 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1910 assert(0);
1911}
1912void emit_writeword_indexed(int rt, int offset, int rs)
1913{
1914 assert(offset>-4096&&offset<4096);
1915 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1916 if(offset>=0) {
1917 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1918 }else{
1919 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1920 }
1921}
1922void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1923{
1924 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1925 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1926}
1927void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1928{
1929 if(map<0) emit_writeword_indexed(rt, addr, rs);
1930 else {
1931 assert(addr==0);
1932 emit_writeword_dualindexedx4(rt, rs, map);
1933 }
1934}
1935void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1936{
1937 if(map<0) {
1938 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1939 emit_writeword_indexed(rl, addr+4, rs);
1940 }else{
1941 assert(rh>=0);
1942 if(temp!=rs) emit_addimm(map,1,temp);
1943 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1944 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1945 else {
1946 emit_addimm(rs,4,rs);
1947 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1948 }
1949 }
1950}
1951void emit_writehword_indexed(int rt, int offset, int rs)
1952{
1953 assert(offset>-256&&offset<256);
1954 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1955 if(offset>=0) {
1956 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1957 }else{
1958 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1959 }
1960}
1961void emit_writebyte_indexed(int rt, int offset, int rs)
1962{
1963 assert(offset>-4096&&offset<4096);
1964 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1965 if(offset>=0) {
1966 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1967 }else{
1968 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1969 }
1970}
1971void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1972{
1973 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1974 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1975}
1976void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1977{
1978 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1979 else {
1980 if(addr==0) {
1981 emit_writebyte_dualindexedx4(rt, rs, map);
1982 }else{
1983 emit_addimm(rs,addr,temp);
1984 emit_writebyte_dualindexedx4(rt, temp, map);
1985 }
1986 }
1987}
1988void emit_writeword(int rt, int addr)
1989{
1990 u_int offset = addr-(u_int)&dynarec_local;
1991 assert(offset<4096);
1992 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1993 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1994}
1995void emit_writehword(int rt, int addr)
1996{
1997 u_int offset = addr-(u_int)&dynarec_local;
1998 assert(offset<256);
1999 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2000 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2001}
2002void emit_writebyte(int rt, int addr)
2003{
2004 u_int offset = addr-(u_int)&dynarec_local;
2005 assert(offset<4096);
74426039 2006 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2007 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2008}
2009void emit_writeword_imm(int imm, int addr)
2010{
2011 assem_debug("movl $%x,%x\n",imm,addr);
2012 assert(0);
2013}
2014void emit_writebyte_imm(int imm, int addr)
2015{
2016 assem_debug("movb $%x,%x\n",imm,addr);
2017 assert(0);
2018}
2019
2020void emit_mul(int rs)
2021{
2022 assem_debug("mul %%%s\n",regname[rs]);
2023 assert(0);
2024}
2025void emit_imul(int rs)
2026{
2027 assem_debug("imul %%%s\n",regname[rs]);
2028 assert(0);
2029}
2030void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2031{
2032 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2033 assert(rs1<16);
2034 assert(rs2<16);
2035 assert(hi<16);
2036 assert(lo<16);
2037 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2038}
2039void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2040{
2041 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2042 assert(rs1<16);
2043 assert(rs2<16);
2044 assert(hi<16);
2045 assert(lo<16);
2046 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2047}
2048
2049void emit_div(int rs)
2050{
2051 assem_debug("div %%%s\n",regname[rs]);
2052 assert(0);
2053}
2054void emit_idiv(int rs)
2055{
2056 assem_debug("idiv %%%s\n",regname[rs]);
2057 assert(0);
2058}
2059void emit_cdq()
2060{
2061 assem_debug("cdq\n");
2062 assert(0);
2063}
2064
2065void emit_clz(int rs,int rt)
2066{
2067 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2068 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2069}
2070
2071void emit_subcs(int rs1,int rs2,int rt)
2072{
2073 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2074 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2075}
2076
2077void emit_shrcc_imm(int rs,u_int imm,int rt)
2078{
2079 assert(imm>0);
2080 assert(imm<32);
2081 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2082 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2083}
2084
2085void emit_negmi(int rs, int rt)
2086{
2087 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2088 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2089}
2090
2091void emit_negsmi(int rs, int rt)
2092{
2093 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2094 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2095}
2096
2097void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2098{
2099 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2100 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2101}
2102
2103void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2104{
2105 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2106 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2107}
2108
2109void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2110{
2111 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2112 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2113}
2114
2115void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2116{
2117 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2118 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2119}
2120
2121void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2122{
2123 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2124 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2125}
2126
2127void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2128{
2129 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2130 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2131}
2132
2133void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2134{
2135 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2136 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2137}
2138
2139void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2140{
2141 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2142 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2143}
2144
2145void emit_teq(int rs, int rt)
2146{
2147 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2148 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2149}
2150
2151void emit_rsbimm(int rs, int imm, int rt)
2152{
2153 u_int armval;
cfbd3c6e 2154 genimm_checked(imm,&armval);
57871462 2155 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2156 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2157}
2158
2159// Load 2 immediates optimizing for small code size
2160void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2161{
2162 emit_movimm(imm1,rt1);
2163 u_int armval;
2164 if(genimm(imm2-imm1,&armval)) {
2165 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2166 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2167 }else if(genimm(imm1-imm2,&armval)) {
2168 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2169 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2170 }
2171 else emit_movimm(imm2,rt2);
2172}
2173
2174// Conditionally select one of two immediates, optimizing for small code size
2175// This will only be called if HAVE_CMOV_IMM is defined
2176void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2177{
2178 u_int armval;
2179 if(genimm(imm2-imm1,&armval)) {
2180 emit_movimm(imm1,rt);
2181 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2182 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2183 }else if(genimm(imm1-imm2,&armval)) {
2184 emit_movimm(imm1,rt);
2185 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2186 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2187 }
2188 else {
2189 #ifdef ARMv5_ONLY
2190 emit_movimm(imm1,rt);
2191 add_literal((int)out,imm2);
2192 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2193 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2194 #else
2195 emit_movw(imm1&0x0000FFFF,rt);
2196 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2197 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2198 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2199 }
2200 emit_movt(imm1&0xFFFF0000,rt);
2201 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2202 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2203 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2204 }
2205 #endif
2206 }
2207}
2208
2209// special case for checking invalid_code
2210void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2211{
2212 assert(0);
2213}
2214
2215// special case for checking invalid_code
2216void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2217{
2218 assert(imm<128&&imm>=0);
2219 assert(r>=0&&r<16);
2220 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2221 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2222 emit_cmpimm(HOST_TEMPREG,imm);
2223}
2224
2225// special case for tlb mapping
2226void emit_addsr12(int rs1,int rs2,int rt)
2227{
2228 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2229 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2230}
2231
0bbd1454 2232void emit_callne(int a)
2233{
2234 assem_debug("blne %x\n",a);
2235 u_int offset=genjmp(a);
2236 output_w32(0x1b000000|offset);
2237}
2238
57871462 2239// Used to preload hash table entries
2240void emit_prefetch(void *addr)
2241{
2242 assem_debug("prefetch %x\n",(int)addr);
2243 output_byte(0x0F);
2244 output_byte(0x18);
2245 output_modrm(0,5,1);
2246 output_w32((int)addr);
2247}
2248void emit_prefetchreg(int r)
2249{
2250 assem_debug("pld %s\n",regname[r]);
2251 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2252}
2253
2254// Special case for mini_ht
2255void emit_ldreq_indexed(int rs, u_int offset, int rt)
2256{
2257 assert(offset<4096);
2258 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2259 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2260}
2261
2262void emit_flds(int r,int sr)
2263{
2264 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2265 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2266}
2267
2268void emit_vldr(int r,int vr)
2269{
2270 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2271 output_w32(0xed900b00|(vr<<12)|(r<<16));
2272}
2273
2274void emit_fsts(int sr,int r)
2275{
2276 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2277 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2278}
2279
2280void emit_vstr(int vr,int r)
2281{
2282 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2283 output_w32(0xed800b00|(vr<<12)|(r<<16));
2284}
2285
2286void emit_ftosizs(int s,int d)
2287{
2288 assem_debug("ftosizs s%d,s%d\n",d,s);
2289 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2290}
2291
2292void emit_ftosizd(int s,int d)
2293{
2294 assem_debug("ftosizd s%d,d%d\n",d,s);
2295 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2296}
2297
2298void emit_fsitos(int s,int d)
2299{
2300 assem_debug("fsitos s%d,s%d\n",d,s);
2301 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2302}
2303
2304void emit_fsitod(int s,int d)
2305{
2306 assem_debug("fsitod d%d,s%d\n",d,s);
2307 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2308}
2309
2310void emit_fcvtds(int s,int d)
2311{
2312 assem_debug("fcvtds d%d,s%d\n",d,s);
2313 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2314}
2315
2316void emit_fcvtsd(int s,int d)
2317{
2318 assem_debug("fcvtsd s%d,d%d\n",d,s);
2319 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2320}
2321
2322void emit_fsqrts(int s,int d)
2323{
2324 assem_debug("fsqrts d%d,s%d\n",d,s);
2325 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2326}
2327
2328void emit_fsqrtd(int s,int d)
2329{
2330 assem_debug("fsqrtd s%d,d%d\n",d,s);
2331 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2332}
2333
2334void emit_fabss(int s,int d)
2335{
2336 assem_debug("fabss d%d,s%d\n",d,s);
2337 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2338}
2339
2340void emit_fabsd(int s,int d)
2341{
2342 assem_debug("fabsd s%d,d%d\n",d,s);
2343 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2344}
2345
2346void emit_fnegs(int s,int d)
2347{
2348 assem_debug("fnegs d%d,s%d\n",d,s);
2349 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2350}
2351
2352void emit_fnegd(int s,int d)
2353{
2354 assem_debug("fnegd s%d,d%d\n",d,s);
2355 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2356}
2357
2358void emit_fadds(int s1,int s2,int d)
2359{
2360 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2361 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2362}
2363
2364void emit_faddd(int s1,int s2,int d)
2365{
2366 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2367 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2368}
2369
2370void emit_fsubs(int s1,int s2,int d)
2371{
2372 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2373 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2374}
2375
2376void emit_fsubd(int s1,int s2,int d)
2377{
2378 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2379 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2380}
2381
2382void emit_fmuls(int s1,int s2,int d)
2383{
2384 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2385 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2386}
2387
2388void emit_fmuld(int s1,int s2,int d)
2389{
2390 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2391 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2392}
2393
2394void emit_fdivs(int s1,int s2,int d)
2395{
2396 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2397 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2398}
2399
2400void emit_fdivd(int s1,int s2,int d)
2401{
2402 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2403 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2404}
2405
2406void emit_fcmps(int x,int y)
2407{
2408 assem_debug("fcmps s14, s15\n");
2409 output_w32(0xeeb47a67);
2410}
2411
2412void emit_fcmpd(int x,int y)
2413{
2414 assem_debug("fcmpd d6, d7\n");
2415 output_w32(0xeeb46b47);
2416}
2417
2418void emit_fmstat()
2419{
2420 assem_debug("fmstat\n");
2421 output_w32(0xeef1fa10);
2422}
2423
2424void emit_bicne_imm(int rs,int imm,int rt)
2425{
2426 u_int armval;
cfbd3c6e 2427 genimm_checked(imm,&armval);
57871462 2428 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2429 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2430}
2431
2432void emit_biccs_imm(int rs,int imm,int rt)
2433{
2434 u_int armval;
cfbd3c6e 2435 genimm_checked(imm,&armval);
57871462 2436 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2437 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2438}
2439
2440void emit_bicvc_imm(int rs,int imm,int rt)
2441{
2442 u_int armval;
cfbd3c6e 2443 genimm_checked(imm,&armval);
57871462 2444 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2445 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2446}
2447
2448void emit_bichi_imm(int rs,int imm,int rt)
2449{
2450 u_int armval;
cfbd3c6e 2451 genimm_checked(imm,&armval);
57871462 2452 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2453 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2454}
2455
2456void emit_orrvs_imm(int rs,int imm,int rt)
2457{
2458 u_int armval;
cfbd3c6e 2459 genimm_checked(imm,&armval);
57871462 2460 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2461 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2462}
2463
b9b61529 2464void emit_orrne_imm(int rs,int imm,int rt)
2465{
2466 u_int armval;
cfbd3c6e 2467 genimm_checked(imm,&armval);
b9b61529 2468 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2469 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2470}
2471
2472void emit_andne_imm(int rs,int imm,int rt)
2473{
2474 u_int armval;
cfbd3c6e 2475 genimm_checked(imm,&armval);
b9b61529 2476 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2477 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2478}
2479
57871462 2480void emit_jno_unlikely(int a)
2481{
2482 //emit_jno(a);
2483 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2484 output_w32(0x72800000|rd_rn_rm(15,15,0));
2485}
2486
2487// Save registers before function call
2488void save_regs(u_int reglist)
2489{
2490 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2491 if(!reglist) return;
2492 assem_debug("stmia fp,{");
2493 if(reglist&1) assem_debug("r0, ");
2494 if(reglist&2) assem_debug("r1, ");
2495 if(reglist&4) assem_debug("r2, ");
2496 if(reglist&8) assem_debug("r3, ");
2497 if(reglist&0x1000) assem_debug("r12");
2498 assem_debug("}\n");
2499 output_w32(0xe88b0000|reglist);
2500}
2501// Restore registers after function call
2502void restore_regs(u_int reglist)
2503{
2504 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2505 if(!reglist) return;
2506 assem_debug("ldmia fp,{");
2507 if(reglist&1) assem_debug("r0, ");
2508 if(reglist&2) assem_debug("r1, ");
2509 if(reglist&4) assem_debug("r2, ");
2510 if(reglist&8) assem_debug("r3, ");
2511 if(reglist&0x1000) assem_debug("r12");
2512 assem_debug("}\n");
2513 output_w32(0xe89b0000|reglist);
2514}
2515
2516// Write back consts using r14 so we don't disturb the other registers
2517void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2518{
2519 int hr;
2520 for(hr=0;hr<HOST_REGS;hr++) {
2521 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2522 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2523 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2524 int value=constmap[i][hr];
2525 if(value==0) {
2526 emit_zeroreg(HOST_TEMPREG);
2527 }
2528 else {
2529 emit_movimm(value,HOST_TEMPREG);
2530 }
2531 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2532#ifndef FORCE32
57871462 2533 if((i_is32>>i_regmap[hr])&1) {
2534 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2535 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2536 }
24385cae 2537#endif
57871462 2538 }
2539 }
2540 }
2541 }
2542}
2543
2544/* Stubs/epilogue */
2545
2546void literal_pool(int n)
2547{
2548 if(!literalcount) return;
2549 if(n) {
2550 if((int)out-literals[0][0]<4096-n) return;
2551 }
2552 u_int *ptr;
2553 int i;
2554 for(i=0;i<literalcount;i++)
2555 {
2556 ptr=(u_int *)literals[i][0];
2557 u_int offset=(u_int)out-(u_int)ptr-8;
2558 assert(offset<4096);
2559 assert(!(offset&3));
2560 *ptr|=offset;
2561 output_w32(literals[i][1]);
2562 }
2563 literalcount=0;
2564}
2565
2566void literal_pool_jumpover(int n)
2567{
2568 if(!literalcount) return;
2569 if(n) {
2570 if((int)out-literals[0][0]<4096-n) return;
2571 }
2572 int jaddr=(int)out;
2573 emit_jmp(0);
2574 literal_pool(0);
2575 set_jump_target(jaddr,(int)out);
2576}
2577
2578emit_extjump2(int addr, int target, int linker)
2579{
2580 u_char *ptr=(u_char *)addr;
2581 assert((ptr[3]&0x0e)==0xa);
2582 emit_loadlp(target,0);
2583 emit_loadlp(addr,1);
24385cae 2584 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2585 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2586//DEBUG >
2587#ifdef DEBUG_CYCLE_COUNT
2588 emit_readword((int)&last_count,ECX);
2589 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2590 emit_readword((int)&next_interupt,ECX);
2591 emit_writeword(HOST_CCREG,(int)&Count);
2592 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2593 emit_writeword(ECX,(int)&last_count);
2594#endif
2595//DEBUG <
2596 emit_jmp(linker);
2597}
2598
2599emit_extjump(int addr, int target)
2600{
2601 emit_extjump2(addr, target, (int)dyna_linker);
2602}
2603emit_extjump_ds(int addr, int target)
2604{
2605 emit_extjump2(addr, target, (int)dyna_linker_ds);
2606}
2607
cbbab9cd 2608#ifdef PCSX
2609#include "pcsxmem_inline.c"
2610#endif
2611
57871462 2612do_readstub(int n)
2613{
2614 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2615 literal_pool(256);
2616 set_jump_target(stubs[n][1],(int)out);
2617 int type=stubs[n][0];
2618 int i=stubs[n][3];
2619 int rs=stubs[n][4];
2620 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2621 u_int reglist=stubs[n][7];
2622 signed char *i_regmap=i_regs->regmap;
2623 int addr=get_reg(i_regmap,AGEN1+(i&1));
2624 int rth,rt;
2625 int ds;
b9b61529 2626 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2627 rth=get_reg(i_regmap,FTEMP|64);
2628 rt=get_reg(i_regmap,FTEMP);
2629 }else{
2630 rth=get_reg(i_regmap,rt1[i]|64);
2631 rt=get_reg(i_regmap,rt1[i]);
2632 }
2633 assert(rs>=0);
57871462 2634 if(addr<0) addr=rt;
535d208a 2635 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2636 assert(addr>=0);
2637 int ftable=0;
2638 if(type==LOADB_STUB||type==LOADBU_STUB)
2639 ftable=(int)readmemb;
2640 if(type==LOADH_STUB||type==LOADHU_STUB)
2641 ftable=(int)readmemh;
2642 if(type==LOADW_STUB)
2643 ftable=(int)readmem;
24385cae 2644#ifndef FORCE32
57871462 2645 if(type==LOADD_STUB)
2646 ftable=(int)readmemd;
24385cae 2647#endif
2648 assert(ftable!=0);
57871462 2649 emit_writeword(rs,(int)&address);
2650 //emit_pusha();
2651 save_regs(reglist);
97a238a6 2652#ifndef PCSX
57871462 2653 ds=i_regs!=&regs[i];
2654 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2655 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2656 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2657 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2658 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2659#endif
57871462 2660 emit_shrimm(rs,16,1);
2661 int cc=get_reg(i_regmap,CCREG);
2662 if(cc<0) {
2663 emit_loadreg(CCREG,2);
2664 }
2665 emit_movimm(ftable,0);
2666 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2667#ifndef PCSX
57871462 2668 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2669#endif
57871462 2670 //emit_readword((int)&last_count,temp);
2671 //emit_add(cc,temp,cc);
2672 //emit_writeword(cc,(int)&Count);
2673 //emit_mov(15,14);
2674 emit_call((int)&indirect_jump_indexed);
2675 //emit_callreg(rs);
2676 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2677#ifndef PCSX
57871462 2678 // We really shouldn't need to update the count here,
2679 // but not doing so causes random crashes...
2680 emit_readword((int)&Count,HOST_TEMPREG);
2681 emit_readword((int)&next_interupt,2);
2682 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2683 emit_writeword(2,(int)&last_count);
2684 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2685 if(cc<0) {
2686 emit_storereg(CCREG,HOST_TEMPREG);
2687 }
f51dc36c 2688#endif
57871462 2689 //emit_popa();
2690 restore_regs(reglist);
2691 //if((cc=get_reg(regmap,CCREG))>=0) {
2692 // emit_loadreg(CCREG,cc);
2693 //}
f18c0f46 2694 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2695 assert(rt>=0);
2696 if(type==LOADB_STUB)
2697 emit_movsbl((int)&readmem_dword,rt);
2698 if(type==LOADBU_STUB)
2699 emit_movzbl((int)&readmem_dword,rt);
2700 if(type==LOADH_STUB)
2701 emit_movswl((int)&readmem_dword,rt);
2702 if(type==LOADHU_STUB)
2703 emit_movzwl((int)&readmem_dword,rt);
2704 if(type==LOADW_STUB)
2705 emit_readword((int)&readmem_dword,rt);
2706 if(type==LOADD_STUB) {
2707 emit_readword((int)&readmem_dword,rt);
2708 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2709 }
57871462 2710 }
2711 emit_jmp(stubs[n][2]); // return address
2712}
2713
2714inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2715{
2716 int rs=get_reg(regmap,target);
2717 int rth=get_reg(regmap,target|64);
2718 int rt=get_reg(regmap,target);
535d208a 2719 if(rs<0) rs=get_reg(regmap,-1);
57871462 2720 assert(rs>=0);
57871462 2721 int ftable=0;
2722 if(type==LOADB_STUB||type==LOADBU_STUB)
2723 ftable=(int)readmemb;
2724 if(type==LOADH_STUB||type==LOADHU_STUB)
2725 ftable=(int)readmemh;
2726 if(type==LOADW_STUB)
2727 ftable=(int)readmem;
24385cae 2728#ifndef FORCE32
57871462 2729 if(type==LOADD_STUB)
2730 ftable=(int)readmemd;
24385cae 2731#endif
2732 assert(ftable!=0);
cbbab9cd 2733#ifdef PCSX
2734 if(pcsx_direct_read(type,addr,target?rs:-1,rt))
2735 return;
2736#endif
fd99c415 2737 if(target==0)
2738 emit_movimm(addr,rs);
57871462 2739 emit_writeword(rs,(int)&address);
2740 //emit_pusha();
2741 save_regs(reglist);
0c1fe38b 2742#ifndef PCSX
2743 if((signed int)addr>=(signed int)0xC0000000) {
2744 // Theoretically we can have a pagefault here, if the TLB has never
2745 // been enabled and the address is outside the range 80000000..BFFFFFFF
2746 // Write out the registers so the pagefault can be handled. This is
2747 // a very rare case and likely represents a bug.
2748 int ds=regmap!=regs[i].regmap;
2749 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2750 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2751 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2752 }
2753#endif
57871462 2754 //emit_shrimm(rs,16,1);
2755 int cc=get_reg(regmap,CCREG);
2756 if(cc<0) {
2757 emit_loadreg(CCREG,2);
2758 }
2759 //emit_movimm(ftable,0);
2760 emit_movimm(((u_int *)ftable)[addr>>16],0);
2761 //emit_readword((int)&last_count,12);
2762 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2763#ifndef PCSX
57871462 2764 if((signed int)addr>=(signed int)0xC0000000) {
2765 // Pagefault address
2766 int ds=regmap!=regs[i].regmap;
2767 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2768 }
f51dc36c 2769#endif
57871462 2770 //emit_add(12,2,2);
2771 //emit_writeword(2,(int)&Count);
2772 //emit_call(((u_int *)ftable)[addr>>16]);
2773 emit_call((int)&indirect_jump);
f51dc36c 2774#ifndef PCSX
57871462 2775 // We really shouldn't need to update the count here,
2776 // but not doing so causes random crashes...
2777 emit_readword((int)&Count,HOST_TEMPREG);
2778 emit_readword((int)&next_interupt,2);
2779 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2780 emit_writeword(2,(int)&last_count);
2781 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2782 if(cc<0) {
2783 emit_storereg(CCREG,HOST_TEMPREG);
2784 }
f51dc36c 2785#endif
57871462 2786 //emit_popa();
2787 restore_regs(reglist);
fd99c415 2788 if(rt>=0) {
2789 if(type==LOADB_STUB)
2790 emit_movsbl((int)&readmem_dword,rt);
2791 if(type==LOADBU_STUB)
2792 emit_movzbl((int)&readmem_dword,rt);
2793 if(type==LOADH_STUB)
2794 emit_movswl((int)&readmem_dword,rt);
2795 if(type==LOADHU_STUB)
2796 emit_movzwl((int)&readmem_dword,rt);
2797 if(type==LOADW_STUB)
2798 emit_readword((int)&readmem_dword,rt);
2799 if(type==LOADD_STUB) {
2800 emit_readword((int)&readmem_dword,rt);
2801 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2802 }
57871462 2803 }
2804}
2805
2806do_writestub(int n)
2807{
2808 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2809 literal_pool(256);
2810 set_jump_target(stubs[n][1],(int)out);
2811 int type=stubs[n][0];
2812 int i=stubs[n][3];
2813 int rs=stubs[n][4];
2814 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2815 u_int reglist=stubs[n][7];
2816 signed char *i_regmap=i_regs->regmap;
2817 int addr=get_reg(i_regmap,AGEN1+(i&1));
2818 int rth,rt,r;
2819 int ds;
b9b61529 2820 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2821 rth=get_reg(i_regmap,FTEMP|64);
2822 rt=get_reg(i_regmap,r=FTEMP);
2823 }else{
2824 rth=get_reg(i_regmap,rs2[i]|64);
2825 rt=get_reg(i_regmap,r=rs2[i]);
2826 }
2827 assert(rs>=0);
2828 assert(rt>=0);
2829 if(addr<0) addr=get_reg(i_regmap,-1);
2830 assert(addr>=0);
2831 int ftable=0;
2832 if(type==STOREB_STUB)
2833 ftable=(int)writememb;
2834 if(type==STOREH_STUB)
2835 ftable=(int)writememh;
2836 if(type==STOREW_STUB)
2837 ftable=(int)writemem;
24385cae 2838#ifndef FORCE32
57871462 2839 if(type==STORED_STUB)
2840 ftable=(int)writememd;
24385cae 2841#endif
2842 assert(ftable!=0);
57871462 2843 emit_writeword(rs,(int)&address);
2844 //emit_shrimm(rs,16,rs);
2845 //emit_movmem_indexedx4(ftable,rs,rs);
2846 if(type==STOREB_STUB)
2847 emit_writebyte(rt,(int)&byte);
2848 if(type==STOREH_STUB)
2849 emit_writehword(rt,(int)&hword);
2850 if(type==STOREW_STUB)
2851 emit_writeword(rt,(int)&word);
2852 if(type==STORED_STUB) {
3d624f89 2853#ifndef FORCE32
57871462 2854 emit_writeword(rt,(int)&dword);
2855 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2856#else
2857 printf("STORED_STUB\n");
2858#endif
57871462 2859 }
2860 //emit_pusha();
2861 save_regs(reglist);
97a238a6 2862#ifndef PCSX
57871462 2863 ds=i_regs!=&regs[i];
2864 int real_rs=get_reg(i_regmap,rs1[i]);
2865 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2866 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2867 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2868 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2869#endif
57871462 2870 emit_shrimm(rs,16,1);
2871 int cc=get_reg(i_regmap,CCREG);
2872 if(cc<0) {
2873 emit_loadreg(CCREG,2);
2874 }
2875 emit_movimm(ftable,0);
2876 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2877#ifndef PCSX
57871462 2878 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2879#endif
57871462 2880 //emit_readword((int)&last_count,temp);
2881 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2882 //emit_add(cc,temp,cc);
2883 //emit_writeword(cc,(int)&Count);
2884 emit_call((int)&indirect_jump_indexed);
2885 //emit_callreg(rs);
2886 emit_readword((int)&Count,HOST_TEMPREG);
2887 emit_readword((int)&next_interupt,2);
2888 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2889 emit_writeword(2,(int)&last_count);
2890 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2891 if(cc<0) {
2892 emit_storereg(CCREG,HOST_TEMPREG);
2893 }
2894 //emit_popa();
2895 restore_regs(reglist);
2896 //if((cc=get_reg(regmap,CCREG))>=0) {
2897 // emit_loadreg(CCREG,cc);
2898 //}
2899 emit_jmp(stubs[n][2]); // return address
2900}
2901
2902inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2903{
2904 int rs=get_reg(regmap,-1);
2905 int rth=get_reg(regmap,target|64);
2906 int rt=get_reg(regmap,target);
2907 assert(rs>=0);
2908 assert(rt>=0);
cbbab9cd 2909#ifdef PCSX
2910 if(pcsx_direct_write(type,addr,rs,rt,regmap))
2911 return;
2912#endif
57871462 2913 int ftable=0;
2914 if(type==STOREB_STUB)
2915 ftable=(int)writememb;
2916 if(type==STOREH_STUB)
2917 ftable=(int)writememh;
2918 if(type==STOREW_STUB)
2919 ftable=(int)writemem;
24385cae 2920#ifndef FORCE32
57871462 2921 if(type==STORED_STUB)
2922 ftable=(int)writememd;
24385cae 2923#endif
2924 assert(ftable!=0);
57871462 2925 emit_writeword(rs,(int)&address);
2926 //emit_shrimm(rs,16,rs);
2927 //emit_movmem_indexedx4(ftable,rs,rs);
2928 if(type==STOREB_STUB)
2929 emit_writebyte(rt,(int)&byte);
2930 if(type==STOREH_STUB)
2931 emit_writehword(rt,(int)&hword);
2932 if(type==STOREW_STUB)
2933 emit_writeword(rt,(int)&word);
2934 if(type==STORED_STUB) {
3d624f89 2935#ifndef FORCE32
57871462 2936 emit_writeword(rt,(int)&dword);
2937 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2938#else
2939 printf("STORED_STUB\n");
2940#endif
57871462 2941 }
2942 //emit_pusha();
2943 save_regs(reglist);
0c1fe38b 2944#ifndef PCSX
2945 // rearmed note: load_all_consts prevents BIOS boot, some bug?
2946 if((signed int)addr>=(signed int)0xC0000000) {
2947 // Theoretically we can have a pagefault here, if the TLB has never
2948 // been enabled and the address is outside the range 80000000..BFFFFFFF
2949 // Write out the registers so the pagefault can be handled. This is
2950 // a very rare case and likely represents a bug.
2951 int ds=regmap!=regs[i].regmap;
2952 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2953 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2954 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2955 }
2956#endif
57871462 2957 //emit_shrimm(rs,16,1);
2958 int cc=get_reg(regmap,CCREG);
2959 if(cc<0) {
2960 emit_loadreg(CCREG,2);
2961 }
2962 //emit_movimm(ftable,0);
2963 emit_movimm(((u_int *)ftable)[addr>>16],0);
2964 //emit_readword((int)&last_count,12);
2965 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2966#ifndef PCSX
57871462 2967 if((signed int)addr>=(signed int)0xC0000000) {
2968 // Pagefault address
2969 int ds=regmap!=regs[i].regmap;
2970 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2971 }
f51dc36c 2972#endif
57871462 2973 //emit_add(12,2,2);
2974 //emit_writeword(2,(int)&Count);
2975 //emit_call(((u_int *)ftable)[addr>>16]);
2976 emit_call((int)&indirect_jump);
2977 emit_readword((int)&Count,HOST_TEMPREG);
2978 emit_readword((int)&next_interupt,2);
2979 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2980 emit_writeword(2,(int)&last_count);
2981 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2982 if(cc<0) {
2983 emit_storereg(CCREG,HOST_TEMPREG);
2984 }
2985 //emit_popa();
2986 restore_regs(reglist);
2987}
2988
2989do_unalignedwritestub(int n)
2990{
b7918751 2991 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2992 literal_pool(256);
57871462 2993 set_jump_target(stubs[n][1],(int)out);
b7918751 2994
2995 int i=stubs[n][3];
2996 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2997 int addr=stubs[n][5];
2998 u_int reglist=stubs[n][7];
2999 signed char *i_regmap=i_regs->regmap;
3000 int temp2=get_reg(i_regmap,FTEMP);
3001 int rt;
3002 int ds, real_rs;
3003 rt=get_reg(i_regmap,rs2[i]);
3004 assert(rt>=0);
3005 assert(addr>=0);
3006 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3007 reglist|=(1<<addr);
3008 reglist&=~(1<<temp2);
3009
3010 emit_andimm(addr,0xfffffffc,temp2);
3011 emit_writeword(temp2,(int)&address);
3012
3013 save_regs(reglist);
97a238a6 3014#ifndef PCSX
b7918751 3015 ds=i_regs!=&regs[i];
3016 real_rs=get_reg(i_regmap,rs1[i]);
3017 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3018 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3019 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3020 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3021#endif
b7918751 3022 emit_shrimm(addr,16,1);
3023 int cc=get_reg(i_regmap,CCREG);
3024 if(cc<0) {
3025 emit_loadreg(CCREG,2);
3026 }
3027 emit_movimm((u_int)readmem,0);
3028 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3029#ifndef PCSX
3030 // pagefault address
3031 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3032#endif
b7918751 3033 emit_call((int)&indirect_jump_indexed);
3034 restore_regs(reglist);
3035
3036 emit_readword((int)&readmem_dword,temp2);
3037 int temp=addr; //hmh
3038 emit_shlimm(addr,3,temp);
3039 emit_andimm(temp,24,temp);
3040#ifdef BIG_ENDIAN_MIPS
3041 if (opcode[i]==0x2e) // SWR
3042#else
3043 if (opcode[i]==0x2a) // SWL
3044#endif
3045 emit_xorimm(temp,24,temp);
3046 emit_movimm(-1,HOST_TEMPREG);
55439448 3047 if (opcode[i]==0x2a) { // SWL
b7918751 3048 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3049 emit_orrshr(rt,temp,temp2);
3050 }else{
3051 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3052 emit_orrshl(rt,temp,temp2);
3053 }
3054 emit_readword((int)&address,addr);
3055 emit_writeword(temp2,(int)&word);
3056 //save_regs(reglist); // don't need to, no state changes
3057 emit_shrimm(addr,16,1);
3058 emit_movimm((u_int)writemem,0);
3059 //emit_call((int)&indirect_jump_indexed);
3060 emit_mov(15,14);
3061 emit_readword_dualindexedx4(0,1,15);
3062 emit_readword((int)&Count,HOST_TEMPREG);
3063 emit_readword((int)&next_interupt,2);
3064 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3065 emit_writeword(2,(int)&last_count);
3066 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3067 if(cc<0) {
3068 emit_storereg(CCREG,HOST_TEMPREG);
3069 }
3070 restore_regs(reglist);
57871462 3071 emit_jmp(stubs[n][2]); // return address
3072}
3073
3074void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3075{
3076 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3077}
3078
3079do_invstub(int n)
3080{
3081 literal_pool(20);
3082 u_int reglist=stubs[n][3];
3083 set_jump_target(stubs[n][1],(int)out);
3084 save_regs(reglist);
3085 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3086 emit_call((int)&invalidate_addr);
3087 restore_regs(reglist);
3088 emit_jmp(stubs[n][2]); // return address
3089}
3090
3091int do_dirty_stub(int i)
3092{
3093 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3094 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3095 #ifdef PCSX
3096 addr=(u_int)source;
3097 #endif
57871462 3098 // Careful about the code output here, verify_dirty needs to parse it.
3099 #ifdef ARMv5_ONLY
ac545b3a 3100 emit_loadlp(addr,1);
57871462 3101 emit_loadlp((int)copy,2);
3102 emit_loadlp(slen*4,3);
3103 #else
ac545b3a 3104 emit_movw(addr&0x0000FFFF,1);
57871462 3105 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3106 emit_movt(addr&0xFFFF0000,1);
57871462 3107 emit_movt(((u_int)copy)&0xFFFF0000,2);
3108 emit_movw(slen*4,3);
3109 #endif
3110 emit_movimm(start+i*4,0);
3111 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3112 int entry=(int)out;
3113 load_regs_entry(i);
3114 if(entry==(int)out) entry=instr_addr[i];
3115 emit_jmp(instr_addr[i]);
3116 return entry;
3117}
3118
3119void do_dirty_stub_ds()
3120{
3121 // Careful about the code output here, verify_dirty needs to parse it.
3122 #ifdef ARMv5_ONLY
3123 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3124 emit_loadlp((int)copy,2);
3125 emit_loadlp(slen*4,3);
3126 #else
3127 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3128 emit_movw(((u_int)copy)&0x0000FFFF,2);
3129 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3130 emit_movt(((u_int)copy)&0xFFFF0000,2);
3131 emit_movw(slen*4,3);
3132 #endif
3133 emit_movimm(start+1,0);
3134 emit_call((int)&verify_code_ds);
3135}
3136
3137do_cop1stub(int n)
3138{
3139 literal_pool(256);
3140 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3141 set_jump_target(stubs[n][1],(int)out);
3142 int i=stubs[n][3];
3d624f89 3143// int rs=stubs[n][4];
57871462 3144 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3145 int ds=stubs[n][6];
3146 if(!ds) {
3147 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3148 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3149 }
3150 //else {printf("fp exception in delay slot\n");}
3151 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3152 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3153 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3154 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3155 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3156}
3157
3158/* TLB */
3159
3160int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3161{
3162 if(c) {
3163 if((signed int)addr>=(signed int)0xC0000000) {
3164 // address_generation already loaded the const
3165 emit_readword_dualindexedx4(FP,map,map);
3166 }
3167 else
3168 return -1; // No mapping
3169 }
3170 else {
3171 assert(s!=map);
3172 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3173 emit_addsr12(map,s,map);
3174 // Schedule this while we wait on the load
3175 //if(x) emit_xorimm(s,x,ar);
3176 if(shift>=0) emit_shlimm(s,3,shift);
3177 if(~a) emit_andimm(s,a,ar);
3178 emit_readword_dualindexedx4(FP,map,map);
3179 }
3180 return map;
3181}
3182int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3183{
3184 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3185 emit_test(map,map);
3186 *jaddr=(int)out;
3187 emit_js(0);
3188 }
3189 return map;
3190}
3191
3192int gen_tlb_addr_r(int ar, int map) {
3193 if(map>=0) {
3194 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3195 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3196 }
3197}
3198
3199int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3200{
3201 if(c) {
3202 if(addr<0x80800000||addr>=0xC0000000) {
3203 // address_generation already loaded the const
3204 emit_readword_dualindexedx4(FP,map,map);
3205 }
3206 else
3207 return -1; // No mapping
3208 }
3209 else {
3210 assert(s!=map);
3211 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3212 emit_addsr12(map,s,map);
3213 // Schedule this while we wait on the load
3214 //if(x) emit_xorimm(s,x,ar);
3215 emit_readword_dualindexedx4(FP,map,map);
3216 }
3217 return map;
3218}
3219int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3220{
3221 if(!c||addr<0x80800000||addr>=0xC0000000) {
3222 emit_testimm(map,0x40000000);
3223 *jaddr=(int)out;
3224 emit_jne(0);
3225 }
3226}
3227
3228int gen_tlb_addr_w(int ar, int map) {
3229 if(map>=0) {
3230 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3231 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3232 }
3233}
3234
3235// Generate the address of the memory_map entry, relative to dynarec_local
3236generate_map_const(u_int addr,int reg) {
3237 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3238 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3239}
3240
3241/* Special assem */
3242
3243void shift_assemble_arm(int i,struct regstat *i_regs)
3244{
3245 if(rt1[i]) {
3246 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3247 {
3248 signed char s,t,shift;
3249 t=get_reg(i_regs->regmap,rt1[i]);
3250 s=get_reg(i_regs->regmap,rs1[i]);
3251 shift=get_reg(i_regs->regmap,rs2[i]);
3252 if(t>=0){
3253 if(rs1[i]==0)
3254 {
3255 emit_zeroreg(t);
3256 }
3257 else if(rs2[i]==0)
3258 {
3259 assert(s>=0);
3260 if(s!=t) emit_mov(s,t);
3261 }
3262 else
3263 {
3264 emit_andimm(shift,31,HOST_TEMPREG);
3265 if(opcode2[i]==4) // SLLV
3266 {
3267 emit_shl(s,HOST_TEMPREG,t);
3268 }
3269 if(opcode2[i]==6) // SRLV
3270 {
3271 emit_shr(s,HOST_TEMPREG,t);
3272 }
3273 if(opcode2[i]==7) // SRAV
3274 {
3275 emit_sar(s,HOST_TEMPREG,t);
3276 }
3277 }
3278 }
3279 } else { // DSLLV/DSRLV/DSRAV
3280 signed char sh,sl,th,tl,shift;
3281 th=get_reg(i_regs->regmap,rt1[i]|64);
3282 tl=get_reg(i_regs->regmap,rt1[i]);
3283 sh=get_reg(i_regs->regmap,rs1[i]|64);
3284 sl=get_reg(i_regs->regmap,rs1[i]);
3285 shift=get_reg(i_regs->regmap,rs2[i]);
3286 if(tl>=0){
3287 if(rs1[i]==0)
3288 {
3289 emit_zeroreg(tl);
3290 if(th>=0) emit_zeroreg(th);
3291 }
3292 else if(rs2[i]==0)
3293 {
3294 assert(sl>=0);
3295 if(sl!=tl) emit_mov(sl,tl);
3296 if(th>=0&&sh!=th) emit_mov(sh,th);
3297 }
3298 else
3299 {
3300 // FIXME: What if shift==tl ?
3301 assert(shift!=tl);
3302 int temp=get_reg(i_regs->regmap,-1);
3303 int real_th=th;
3304 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3305 assert(sl>=0);
3306 assert(sh>=0);
3307 emit_andimm(shift,31,HOST_TEMPREG);
3308 if(opcode2[i]==0x14) // DSLLV
3309 {
3310 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3311 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3312 emit_orrshr(sl,HOST_TEMPREG,th);
3313 emit_andimm(shift,31,HOST_TEMPREG);
3314 emit_testimm(shift,32);
3315 emit_shl(sl,HOST_TEMPREG,tl);
3316 if(th>=0) emit_cmovne_reg(tl,th);
3317 emit_cmovne_imm(0,tl);
3318 }
3319 if(opcode2[i]==0x16) // DSRLV
3320 {
3321 assert(th>=0);
3322 emit_shr(sl,HOST_TEMPREG,tl);
3323 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3324 emit_orrshl(sh,HOST_TEMPREG,tl);
3325 emit_andimm(shift,31,HOST_TEMPREG);
3326 emit_testimm(shift,32);
3327 emit_shr(sh,HOST_TEMPREG,th);
3328 emit_cmovne_reg(th,tl);
3329 if(real_th>=0) emit_cmovne_imm(0,th);
3330 }
3331 if(opcode2[i]==0x17) // DSRAV
3332 {
3333 assert(th>=0);
3334 emit_shr(sl,HOST_TEMPREG,tl);
3335 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3336 if(real_th>=0) {
3337 assert(temp>=0);
3338 emit_sarimm(th,31,temp);
3339 }
3340 emit_orrshl(sh,HOST_TEMPREG,tl);
3341 emit_andimm(shift,31,HOST_TEMPREG);
3342 emit_testimm(shift,32);
3343 emit_sar(sh,HOST_TEMPREG,th);
3344 emit_cmovne_reg(th,tl);
3345 if(real_th>=0) emit_cmovne_reg(temp,th);
3346 }
3347 }
3348 }
3349 }
3350 }
3351}
3352#define shift_assemble shift_assemble_arm
3353
3354void loadlr_assemble_arm(int i,struct regstat *i_regs)
3355{
3356 int s,th,tl,temp,temp2,addr,map=-1;
3357 int offset;
3358 int jaddr=0;
af4ee1fe 3359 int memtarget=0,c=0;
57871462 3360 u_int hr,reglist=0;
3361 th=get_reg(i_regs->regmap,rt1[i]|64);
3362 tl=get_reg(i_regs->regmap,rt1[i]);
3363 s=get_reg(i_regs->regmap,rs1[i]);
3364 temp=get_reg(i_regs->regmap,-1);
3365 temp2=get_reg(i_regs->regmap,FTEMP);
3366 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3367 assert(addr<0);
3368 offset=imm[i];
3369 for(hr=0;hr<HOST_REGS;hr++) {
3370 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3371 }
3372 reglist|=1<<temp;
3373 if(offset||s<0||c) addr=temp2;
3374 else addr=s;
3375 if(s>=0) {
3376 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3377 if(c) {
3378 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3379 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3380 }
57871462 3381 }
535d208a 3382 if(!using_tlb) {
3383 if(!c) {
3384 #ifdef RAM_OFFSET
3385 map=get_reg(i_regs->regmap,ROREG);
3386 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3387 #endif
3388 emit_shlimm(addr,3,temp);
3389 if (opcode[i]==0x22||opcode[i]==0x26) {
3390 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 3391 }else{
535d208a 3392 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 3393 }
535d208a 3394 emit_cmpimm(addr,RAM_SIZE);
3395 jaddr=(int)out;
3396 emit_jno(0);
3397 }
3398 else {
3399 if (opcode[i]==0x22||opcode[i]==0x26) {
3400 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3401 }else{
3402 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3403 }
57871462 3404 }
535d208a 3405 }else{ // using tlb
3406 int a;
3407 if(c) {
3408 a=-1;
3409 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3410 a=0xFFFFFFFC; // LWL/LWR
3411 }else{
3412 a=0xFFFFFFF8; // LDL/LDR
3413 }
3414 map=get_reg(i_regs->regmap,TLREG);
3415 assert(map>=0);
ea3d2e6e 3416 reglist&=~(1<<map);
535d208a 3417 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3418 if(c) {
3419 if (opcode[i]==0x22||opcode[i]==0x26) {
3420 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3421 }else{
3422 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3423 }
535d208a 3424 }
3425 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3426 }
3427 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3428 if(!c||memtarget) {
3429 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3430 emit_readword_indexed_tlb(0,temp2,map,temp2);
3431 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3432 }
3433 else
3434 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3435 if(rt1[i]) {
3436 assert(tl>=0);
57871462 3437 emit_andimm(temp,24,temp);
2002a1db 3438#ifdef BIG_ENDIAN_MIPS
3439 if (opcode[i]==0x26) // LWR
3440#else
3441 if (opcode[i]==0x22) // LWL