drc: merge Ari64's patch: 19_arm_typos
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
57871462 4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
0bbd1454 69void invalidate_addr_r0();
70void invalidate_addr_r1();
71void invalidate_addr_r2();
72void invalidate_addr_r3();
73void invalidate_addr_r4();
74void invalidate_addr_r5();
75void invalidate_addr_r6();
76void invalidate_addr_r7();
77void invalidate_addr_r8();
78void invalidate_addr_r9();
79void invalidate_addr_r10();
80void invalidate_addr_r12();
81
82const u_int invalidate_addr_reg[16] = {
83 (int)invalidate_addr_r0,
84 (int)invalidate_addr_r1,
85 (int)invalidate_addr_r2,
86 (int)invalidate_addr_r3,
87 (int)invalidate_addr_r4,
88 (int)invalidate_addr_r5,
89 (int)invalidate_addr_r6,
90 (int)invalidate_addr_r7,
91 (int)invalidate_addr_r8,
92 (int)invalidate_addr_r9,
93 (int)invalidate_addr_r10,
94 0,
95 (int)invalidate_addr_r12,
96 0,
97 0,
98 0};
99
57871462 100#include "fpu.h"
101
dd3a91a1 102unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
103
57871462 104/* Linker */
105
106void set_jump_target(int addr,u_int target)
107{
108 u_char *ptr=(u_char *)addr;
109 u_int *ptr2=(u_int *)ptr;
110 if(ptr[3]==0xe2) {
111 assert((target-(u_int)ptr2-8)<1024);
112 assert((addr&3)==0);
113 assert((target&3)==0);
114 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
115 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
116 }
117 else if(ptr[3]==0x72) {
118 // generated by emit_jno_unlikely
119 if((target-(u_int)ptr2-8)<1024) {
120 assert((addr&3)==0);
121 assert((target&3)==0);
122 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
123 }
124 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
125 assert((addr&3)==0);
126 assert((target&3)==0);
127 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
128 }
129 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
130 }
131 else {
132 assert((ptr[3]&0x0e)==0xa);
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137// This optionally copies the instruction from the target of the branch into
138// the space before the branch. Works, but the difference in speed is
139// usually insignificant.
140void set_jump_target_fillslot(int addr,u_int target,int copy)
141{
142 u_char *ptr=(u_char *)addr;
143 u_int *ptr2=(u_int *)ptr;
144 assert(!copy||ptr2[-1]==0xe28dd000);
145 if(ptr[3]==0xe2) {
146 assert(!copy);
147 assert((target-(u_int)ptr2-8)<4096);
148 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
149 }
150 else {
151 assert((ptr[3]&0x0e)==0xa);
152 u_int target_insn=*(u_int *)target;
153 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
154 copy=0;
155 }
156 if((target_insn&0x0c100000)==0x04100000) { // Load
157 copy=0;
158 }
159 if(target_insn&0x08000000) {
160 copy=0;
161 }
162 if(copy) {
163 ptr2[-1]=target_insn;
164 target+=4;
165 }
166 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
167 }
168}
169
170/* Literal pool */
171add_literal(int addr,int val)
172{
173 literals[literalcount][0]=addr;
174 literals[literalcount][1]=val;
175 literalcount++;
176}
177
f76eeef9 178void *kill_pointer(void *stub)
57871462 179{
180 int *ptr=(int *)(stub+4);
181 assert((*ptr&0x0ff00000)==0x05900000);
182 u_int offset=*ptr&0xfff;
183 int **l_ptr=(void *)ptr+offset+8;
184 int *i_ptr=*l_ptr;
185 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 186 return i_ptr;
57871462 187}
188
189int get_pointer(void *stub)
190{
191 //printf("get_pointer(%x)\n",(int)stub);
192 int *ptr=(int *)(stub+4);
193 assert((*ptr&0x0ff00000)==0x05900000);
194 u_int offset=*ptr&0xfff;
195 int **l_ptr=(void *)ptr+offset+8;
196 int *i_ptr=*l_ptr;
197 assert((*i_ptr&0x0f000000)==0x0a000000);
198 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
199}
200
201// Find the "clean" entry point from a "dirty" entry point
202// by skipping past the call to verify_code
203u_int get_clean_addr(int addr)
204{
205 int *ptr=(int *)addr;
206 #ifdef ARMv5_ONLY
207 ptr+=4;
208 #else
209 ptr+=6;
210 #endif
211 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
212 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
213 ptr++;
214 if((*ptr&0xFF000000)==0xea000000) {
215 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
216 }
217 return (u_int)ptr;
218}
219
220int verify_dirty(int addr)
221{
222 u_int *ptr=(u_int *)addr;
223 #ifdef ARMv5_ONLY
224 // get from literal pool
225 assert((*ptr&0xFFF00000)==0xe5900000);
226 u_int offset=*ptr&0xfff;
227 u_int *l_ptr=(void *)ptr+offset+8;
228 u_int source=l_ptr[0];
229 u_int copy=l_ptr[1];
230 u_int len=l_ptr[2];
231 ptr+=4;
232 #else
233 // ARMv7 movw/movt
234 assert((*ptr&0xFFF00000)==0xe3000000);
235 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
236 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
237 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
238 ptr+=6;
239 #endif
240 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
241 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 242 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 243 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
244 unsigned int page=source>>12;
245 unsigned int map_value=memory_map[page];
246 if(map_value>=0x80000000) return 0;
247 while(page<((source+len-1)>>12)) {
248 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
249 }
250 source = source+(map_value<<2);
251 }
252 //printf("verify_dirty: %x %x %x\n",source,copy,len);
253 return !memcmp((void *)source,(void *)copy,len);
254}
255
256// This doesn't necessarily find all clean entry points, just
257// guarantees that it's not dirty
258int isclean(int addr)
259{
260 #ifdef ARMv5_ONLY
261 int *ptr=((u_int *)addr)+4;
262 #else
263 int *ptr=((u_int *)addr)+6;
264 #endif
265 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
266 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
267 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
268 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
269 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
270 return 1;
271}
272
273void get_bounds(int addr,u_int *start,u_int *end)
274{
275 u_int *ptr=(u_int *)addr;
276 #ifdef ARMv5_ONLY
277 // get from literal pool
278 assert((*ptr&0xFFF00000)==0xe5900000);
279 u_int offset=*ptr&0xfff;
280 u_int *l_ptr=(void *)ptr+offset+8;
281 u_int source=l_ptr[0];
282 //u_int copy=l_ptr[1];
283 u_int len=l_ptr[2];
284 ptr+=4;
285 #else
286 // ARMv7 movw/movt
287 assert((*ptr&0xFFF00000)==0xe3000000);
288 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
289 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
290 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
291 ptr+=6;
292 #endif
293 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
294 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 295 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 296 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
297 if(memory_map[source>>12]>=0x80000000) source = 0;
298 else source = source+(memory_map[source>>12]<<2);
299 }
300 *start=source;
301 *end=source+len;
302}
303
304/* Register allocation */
305
306// Note: registers are allocated clean (unmodified state)
307// if you intend to modify the register, you must call dirty_reg().
308void alloc_reg(struct regstat *cur,int i,signed char reg)
309{
310 int r,hr;
311 int preferred_reg = (reg&7);
312 if(reg==CCREG) preferred_reg=HOST_CCREG;
313 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
314
315 // Don't allocate unused registers
316 if((cur->u>>reg)&1) return;
317
318 // see if it's already allocated
319 for(hr=0;hr<HOST_REGS;hr++)
320 {
321 if(cur->regmap[hr]==reg) return;
322 }
323
324 // Keep the same mapping if the register was already allocated in a loop
325 preferred_reg = loop_reg(i,reg,preferred_reg);
326
327 // Try to allocate the preferred register
328 if(cur->regmap[preferred_reg]==-1) {
329 cur->regmap[preferred_reg]=reg;
330 cur->dirty&=~(1<<preferred_reg);
331 cur->isconst&=~(1<<preferred_reg);
332 return;
333 }
334 r=cur->regmap[preferred_reg];
335 if(r<64&&((cur->u>>r)&1)) {
336 cur->regmap[preferred_reg]=reg;
337 cur->dirty&=~(1<<preferred_reg);
338 cur->isconst&=~(1<<preferred_reg);
339 return;
340 }
341 if(r>=64&&((cur->uu>>(r&63))&1)) {
342 cur->regmap[preferred_reg]=reg;
343 cur->dirty&=~(1<<preferred_reg);
344 cur->isconst&=~(1<<preferred_reg);
345 return;
346 }
347
348 // Clear any unneeded registers
349 // We try to keep the mapping consistent, if possible, because it
350 // makes branches easier (especially loops). So we try to allocate
351 // first (see above) before removing old mappings. If this is not
352 // possible then go ahead and clear out the registers that are no
353 // longer needed.
354 for(hr=0;hr<HOST_REGS;hr++)
355 {
356 r=cur->regmap[hr];
357 if(r>=0) {
358 if(r<64) {
359 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
360 }
361 else
362 {
363 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
364 }
365 }
366 }
367 // Try to allocate any available register, but prefer
368 // registers that have not been used recently.
369 if(i>0) {
370 for(hr=0;hr<HOST_REGS;hr++) {
371 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
372 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
373 cur->regmap[hr]=reg;
374 cur->dirty&=~(1<<hr);
375 cur->isconst&=~(1<<hr);
376 return;
377 }
378 }
379 }
380 }
381 // Try to allocate any available register
382 for(hr=0;hr<HOST_REGS;hr++) {
383 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
384 cur->regmap[hr]=reg;
385 cur->dirty&=~(1<<hr);
386 cur->isconst&=~(1<<hr);
387 return;
388 }
389 }
390
391 // Ok, now we have to evict someone
392 // Pick a register we hopefully won't need soon
393 u_char hsn[MAXREG+1];
394 memset(hsn,10,sizeof(hsn));
395 int j;
396 lsn(hsn,i,&preferred_reg);
397 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
398 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
399 if(i>0) {
400 // Don't evict the cycle count at entry points, otherwise the entry
401 // stub will have to write it.
402 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
403 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
404 for(j=10;j>=3;j--)
405 {
406 // Alloc preferred register if available
407 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
408 for(hr=0;hr<HOST_REGS;hr++) {
409 // Evict both parts of a 64-bit register
410 if((cur->regmap[hr]&63)==r) {
411 cur->regmap[hr]=-1;
412 cur->dirty&=~(1<<hr);
413 cur->isconst&=~(1<<hr);
414 }
415 }
416 cur->regmap[preferred_reg]=reg;
417 return;
418 }
419 for(r=1;r<=MAXREG;r++)
420 {
421 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
422 for(hr=0;hr<HOST_REGS;hr++) {
423 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
424 if(cur->regmap[hr]==r+64) {
425 cur->regmap[hr]=reg;
426 cur->dirty&=~(1<<hr);
427 cur->isconst&=~(1<<hr);
428 return;
429 }
430 }
431 }
432 for(hr=0;hr<HOST_REGS;hr++) {
433 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
434 if(cur->regmap[hr]==r) {
435 cur->regmap[hr]=reg;
436 cur->dirty&=~(1<<hr);
437 cur->isconst&=~(1<<hr);
438 return;
439 }
440 }
441 }
442 }
443 }
444 }
445 }
446 for(j=10;j>=0;j--)
447 {
448 for(r=1;r<=MAXREG;r++)
449 {
450 if(hsn[r]==j) {
451 for(hr=0;hr<HOST_REGS;hr++) {
452 if(cur->regmap[hr]==r+64) {
453 cur->regmap[hr]=reg;
454 cur->dirty&=~(1<<hr);
455 cur->isconst&=~(1<<hr);
456 return;
457 }
458 }
459 for(hr=0;hr<HOST_REGS;hr++) {
460 if(cur->regmap[hr]==r) {
461 cur->regmap[hr]=reg;
462 cur->dirty&=~(1<<hr);
463 cur->isconst&=~(1<<hr);
464 return;
465 }
466 }
467 }
468 }
469 }
470 printf("This shouldn't happen (alloc_reg)");exit(1);
471}
472
473void alloc_reg64(struct regstat *cur,int i,signed char reg)
474{
475 int preferred_reg = 8+(reg&1);
476 int r,hr;
477
478 // allocate the lower 32 bits
479 alloc_reg(cur,i,reg);
480
481 // Don't allocate unused registers
482 if((cur->uu>>reg)&1) return;
483
484 // see if the upper half is already allocated
485 for(hr=0;hr<HOST_REGS;hr++)
486 {
487 if(cur->regmap[hr]==reg+64) return;
488 }
489
490 // Keep the same mapping if the register was already allocated in a loop
491 preferred_reg = loop_reg(i,reg,preferred_reg);
492
493 // Try to allocate the preferred register
494 if(cur->regmap[preferred_reg]==-1) {
495 cur->regmap[preferred_reg]=reg|64;
496 cur->dirty&=~(1<<preferred_reg);
497 cur->isconst&=~(1<<preferred_reg);
498 return;
499 }
500 r=cur->regmap[preferred_reg];
501 if(r<64&&((cur->u>>r)&1)) {
502 cur->regmap[preferred_reg]=reg|64;
503 cur->dirty&=~(1<<preferred_reg);
504 cur->isconst&=~(1<<preferred_reg);
505 return;
506 }
507 if(r>=64&&((cur->uu>>(r&63))&1)) {
508 cur->regmap[preferred_reg]=reg|64;
509 cur->dirty&=~(1<<preferred_reg);
510 cur->isconst&=~(1<<preferred_reg);
511 return;
512 }
513
514 // Clear any unneeded registers
515 // We try to keep the mapping consistent, if possible, because it
516 // makes branches easier (especially loops). So we try to allocate
517 // first (see above) before removing old mappings. If this is not
518 // possible then go ahead and clear out the registers that are no
519 // longer needed.
520 for(hr=HOST_REGS-1;hr>=0;hr--)
521 {
522 r=cur->regmap[hr];
523 if(r>=0) {
524 if(r<64) {
525 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
526 }
527 else
528 {
529 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
530 }
531 }
532 }
533 // Try to allocate any available register, but prefer
534 // registers that have not been used recently.
535 if(i>0) {
536 for(hr=0;hr<HOST_REGS;hr++) {
537 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
538 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
539 cur->regmap[hr]=reg|64;
540 cur->dirty&=~(1<<hr);
541 cur->isconst&=~(1<<hr);
542 return;
543 }
544 }
545 }
546 }
547 // Try to allocate any available register
548 for(hr=0;hr<HOST_REGS;hr++) {
549 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
550 cur->regmap[hr]=reg|64;
551 cur->dirty&=~(1<<hr);
552 cur->isconst&=~(1<<hr);
553 return;
554 }
555 }
556
557 // Ok, now we have to evict someone
558 // Pick a register we hopefully won't need soon
559 u_char hsn[MAXREG+1];
560 memset(hsn,10,sizeof(hsn));
561 int j;
562 lsn(hsn,i,&preferred_reg);
563 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
564 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
565 if(i>0) {
566 // Don't evict the cycle count at entry points, otherwise the entry
567 // stub will have to write it.
568 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
569 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
570 for(j=10;j>=3;j--)
571 {
572 // Alloc preferred register if available
573 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
574 for(hr=0;hr<HOST_REGS;hr++) {
575 // Evict both parts of a 64-bit register
576 if((cur->regmap[hr]&63)==r) {
577 cur->regmap[hr]=-1;
578 cur->dirty&=~(1<<hr);
579 cur->isconst&=~(1<<hr);
580 }
581 }
582 cur->regmap[preferred_reg]=reg|64;
583 return;
584 }
585 for(r=1;r<=MAXREG;r++)
586 {
587 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
588 for(hr=0;hr<HOST_REGS;hr++) {
589 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
590 if(cur->regmap[hr]==r+64) {
591 cur->regmap[hr]=reg|64;
592 cur->dirty&=~(1<<hr);
593 cur->isconst&=~(1<<hr);
594 return;
595 }
596 }
597 }
598 for(hr=0;hr<HOST_REGS;hr++) {
599 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
600 if(cur->regmap[hr]==r) {
601 cur->regmap[hr]=reg|64;
602 cur->dirty&=~(1<<hr);
603 cur->isconst&=~(1<<hr);
604 return;
605 }
606 }
607 }
608 }
609 }
610 }
611 }
612 for(j=10;j>=0;j--)
613 {
614 for(r=1;r<=MAXREG;r++)
615 {
616 if(hsn[r]==j) {
617 for(hr=0;hr<HOST_REGS;hr++) {
618 if(cur->regmap[hr]==r+64) {
619 cur->regmap[hr]=reg|64;
620 cur->dirty&=~(1<<hr);
621 cur->isconst&=~(1<<hr);
622 return;
623 }
624 }
625 for(hr=0;hr<HOST_REGS;hr++) {
626 if(cur->regmap[hr]==r) {
627 cur->regmap[hr]=reg|64;
628 cur->dirty&=~(1<<hr);
629 cur->isconst&=~(1<<hr);
630 return;
631 }
632 }
633 }
634 }
635 }
636 printf("This shouldn't happen");exit(1);
637}
638
639// Allocate a temporary register. This is done without regard to
640// dirty status or whether the register we request is on the unneeded list
641// Note: This will only allocate one register, even if called multiple times
642void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
643{
644 int r,hr;
645 int preferred_reg = -1;
646
647 // see if it's already allocated
648 for(hr=0;hr<HOST_REGS;hr++)
649 {
650 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
651 }
652
653 // Try to allocate any available register
654 for(hr=HOST_REGS-1;hr>=0;hr--) {
655 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
656 cur->regmap[hr]=reg;
657 cur->dirty&=~(1<<hr);
658 cur->isconst&=~(1<<hr);
659 return;
660 }
661 }
662
663 // Find an unneeded register
664 for(hr=HOST_REGS-1;hr>=0;hr--)
665 {
666 r=cur->regmap[hr];
667 if(r>=0) {
668 if(r<64) {
669 if((cur->u>>r)&1) {
670 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
671 cur->regmap[hr]=reg;
672 cur->dirty&=~(1<<hr);
673 cur->isconst&=~(1<<hr);
674 return;
675 }
676 }
677 }
678 else
679 {
680 if((cur->uu>>(r&63))&1) {
681 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
682 cur->regmap[hr]=reg;
683 cur->dirty&=~(1<<hr);
684 cur->isconst&=~(1<<hr);
685 return;
686 }
687 }
688 }
689 }
690 }
691
692 // Ok, now we have to evict someone
693 // Pick a register we hopefully won't need soon
694 // TODO: we might want to follow unconditional jumps here
695 // TODO: get rid of dupe code and make this into a function
696 u_char hsn[MAXREG+1];
697 memset(hsn,10,sizeof(hsn));
698 int j;
699 lsn(hsn,i,&preferred_reg);
700 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
701 if(i>0) {
702 // Don't evict the cycle count at entry points, otherwise the entry
703 // stub will have to write it.
704 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
705 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
706 for(j=10;j>=3;j--)
707 {
708 for(r=1;r<=MAXREG;r++)
709 {
710 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
711 for(hr=0;hr<HOST_REGS;hr++) {
712 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
713 if(cur->regmap[hr]==r+64) {
714 cur->regmap[hr]=reg;
715 cur->dirty&=~(1<<hr);
716 cur->isconst&=~(1<<hr);
717 return;
718 }
719 }
720 }
721 for(hr=0;hr<HOST_REGS;hr++) {
722 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
723 if(cur->regmap[hr]==r) {
724 cur->regmap[hr]=reg;
725 cur->dirty&=~(1<<hr);
726 cur->isconst&=~(1<<hr);
727 return;
728 }
729 }
730 }
731 }
732 }
733 }
734 }
735 for(j=10;j>=0;j--)
736 {
737 for(r=1;r<=MAXREG;r++)
738 {
739 if(hsn[r]==j) {
740 for(hr=0;hr<HOST_REGS;hr++) {
741 if(cur->regmap[hr]==r+64) {
742 cur->regmap[hr]=reg;
743 cur->dirty&=~(1<<hr);
744 cur->isconst&=~(1<<hr);
745 return;
746 }
747 }
748 for(hr=0;hr<HOST_REGS;hr++) {
749 if(cur->regmap[hr]==r) {
750 cur->regmap[hr]=reg;
751 cur->dirty&=~(1<<hr);
752 cur->isconst&=~(1<<hr);
753 return;
754 }
755 }
756 }
757 }
758 }
759 printf("This shouldn't happen");exit(1);
760}
761// Allocate a specific ARM register.
762void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
763{
764 int n;
f776eb14 765 int dirty=0;
57871462 766
767 // see if it's already allocated (and dealloc it)
768 for(n=0;n<HOST_REGS;n++)
769 {
f776eb14 770 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
771 dirty=(cur->dirty>>n)&1;
772 cur->regmap[n]=-1;
773 }
57871462 774 }
775
776 cur->regmap[hr]=reg;
777 cur->dirty&=~(1<<hr);
f776eb14 778 cur->dirty|=dirty<<hr;
57871462 779 cur->isconst&=~(1<<hr);
780}
781
782// Alloc cycle count into dedicated register
783alloc_cc(struct regstat *cur,int i)
784{
785 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
786}
787
788/* Special alloc */
789
790
791/* Assembler */
792
793char regname[16][4] = {
794 "r0",
795 "r1",
796 "r2",
797 "r3",
798 "r4",
799 "r5",
800 "r6",
801 "r7",
802 "r8",
803 "r9",
804 "r10",
805 "fp",
806 "r12",
807 "sp",
808 "lr",
809 "pc"};
810
811void output_byte(u_char byte)
812{
813 *(out++)=byte;
814}
815void output_modrm(u_char mod,u_char rm,u_char ext)
816{
817 assert(mod<4);
818 assert(rm<8);
819 assert(ext<8);
820 u_char byte=(mod<<6)|(ext<<3)|rm;
821 *(out++)=byte;
822}
823void output_sib(u_char scale,u_char index,u_char base)
824{
825 assert(scale<4);
826 assert(index<8);
827 assert(base<8);
828 u_char byte=(scale<<6)|(index<<3)|base;
829 *(out++)=byte;
830}
831void output_w32(u_int word)
832{
833 *((u_int *)out)=word;
834 out+=4;
835}
836u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
837{
838 assert(rd<16);
839 assert(rn<16);
840 assert(rm<16);
841 return((rn<<16)|(rd<<12)|rm);
842}
843u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
844{
845 assert(rd<16);
846 assert(rn<16);
847 assert(imm<256);
848 assert((shift&1)==0);
849 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
850}
851u_int genimm(u_int imm,u_int *encoded)
852{
c2e3bd42 853 *encoded=0;
854 if(imm==0) return 1;
57871462 855 int i=32;
856 while(i>0)
857 {
858 if(imm<256) {
859 *encoded=((i&30)<<7)|imm;
860 return 1;
861 }
862 imm=(imm>>2)|(imm<<30);i-=2;
863 }
864 return 0;
865}
cfbd3c6e 866void genimm_checked(u_int imm,u_int *encoded)
867{
868 u_int ret=genimm(imm,encoded);
869 assert(ret);
870}
57871462 871u_int genjmp(u_int addr)
872{
873 int offset=addr-(int)out-8;
e80343e2 874 if(offset<-33554432||offset>=33554432) {
875 if (addr>2) {
876 printf("genjmp: out of range: %08x\n", offset);
877 exit(1);
878 }
879 return 0;
880 }
57871462 881 return ((u_int)offset>>2)&0xffffff;
882}
883
884void emit_mov(int rs,int rt)
885{
886 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
887 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
888}
889
890void emit_movs(int rs,int rt)
891{
892 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
893 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
894}
895
896void emit_add(int rs1,int rs2,int rt)
897{
898 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
899 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
900}
901
902void emit_adds(int rs1,int rs2,int rt)
903{
904 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
905 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
906}
907
908void emit_adcs(int rs1,int rs2,int rt)
909{
910 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
911 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
912}
913
914void emit_sbc(int rs1,int rs2,int rt)
915{
916 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
917 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
918}
919
920void emit_sbcs(int rs1,int rs2,int rt)
921{
922 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
923 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
924}
925
926void emit_neg(int rs, int rt)
927{
928 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
929 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
930}
931
932void emit_negs(int rs, int rt)
933{
934 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
935 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
936}
937
938void emit_sub(int rs1,int rs2,int rt)
939{
940 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
941 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
942}
943
944void emit_subs(int rs1,int rs2,int rt)
945{
946 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
947 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
948}
949
950void emit_zeroreg(int rt)
951{
952 assem_debug("mov %s,#0\n",regname[rt]);
953 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
954}
955
790ee18e 956void emit_loadlp(u_int imm,u_int rt)
957{
958 add_literal((int)out,imm);
959 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
960 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
961}
962void emit_movw(u_int imm,u_int rt)
963{
964 assert(imm<65536);
965 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
966 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
967}
968void emit_movt(u_int imm,u_int rt)
969{
970 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
971 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
972}
973void emit_movimm(u_int imm,u_int rt)
974{
975 u_int armval;
976 if(genimm(imm,&armval)) {
977 assem_debug("mov %s,#%d\n",regname[rt],imm);
978 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
979 }else if(genimm(~imm,&armval)) {
980 assem_debug("mvn %s,#%d\n",regname[rt],imm);
981 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
982 }else if(imm<65536) {
983 #ifdef ARMv5_ONLY
984 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
985 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
986 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
987 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
988 #else
989 emit_movw(imm,rt);
990 #endif
991 }else{
992 #ifdef ARMv5_ONLY
993 emit_loadlp(imm,rt);
994 #else
995 emit_movw(imm&0x0000FFFF,rt);
996 emit_movt(imm&0xFFFF0000,rt);
997 #endif
998 }
999}
1000void emit_pcreladdr(u_int rt)
1001{
1002 assem_debug("add %s,pc,#?\n",regname[rt]);
1003 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1004}
1005
57871462 1006void emit_loadreg(int r, int hr)
1007{
3d624f89 1008#ifdef FORCE32
1009 if(r&64) {
1010 printf("64bit load in 32bit mode!\n");
7f2607ea 1011 assert(0);
1012 return;
3d624f89 1013 }
1014#endif
57871462 1015 if((r&63)==0)
1016 emit_zeroreg(hr);
1017 else {
3d624f89 1018 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1019 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1020 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1021 if(r==CCREG) addr=(int)&cycle_count;
1022 if(r==CSREG) addr=(int)&Status;
1023 if(r==FSREG) addr=(int)&FCR31;
1024 if(r==INVCP) addr=(int)&invc_ptr;
1025 u_int offset = addr-(u_int)&dynarec_local;
1026 assert(offset<4096);
1027 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1028 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1029 }
1030}
1031void emit_storereg(int r, int hr)
1032{
3d624f89 1033#ifdef FORCE32
1034 if(r&64) {
1035 printf("64bit store in 32bit mode!\n");
7f2607ea 1036 assert(0);
1037 return;
3d624f89 1038 }
1039#endif
1040 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1041 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1042 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1043 if(r==CCREG) addr=(int)&cycle_count;
1044 if(r==FSREG) addr=(int)&FCR31;
1045 u_int offset = addr-(u_int)&dynarec_local;
1046 assert(offset<4096);
1047 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1048 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1049}
1050
1051void emit_test(int rs, int rt)
1052{
1053 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1054 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1055}
1056
1057void emit_testimm(int rs,int imm)
1058{
1059 u_int armval;
5a05d80c 1060 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1061 genimm_checked(imm,&armval);
57871462 1062 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1063}
1064
b9b61529 1065void emit_testeqimm(int rs,int imm)
1066{
1067 u_int armval;
1068 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1069 genimm_checked(imm,&armval);
b9b61529 1070 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1071}
1072
57871462 1073void emit_not(int rs,int rt)
1074{
1075 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1076 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1077}
1078
b9b61529 1079void emit_mvnmi(int rs,int rt)
1080{
1081 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1082 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1083}
1084
57871462 1085void emit_and(u_int rs1,u_int rs2,u_int rt)
1086{
1087 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1088 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1089}
1090
1091void emit_or(u_int rs1,u_int rs2,u_int rt)
1092{
1093 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1094 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1095}
1096void emit_or_and_set_flags(int rs1,int rs2,int rt)
1097{
1098 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1099 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1100}
1101
f70d384d 1102void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1103{
1104 assert(rs<16);
1105 assert(rt<16);
1106 assert(imm<32);
1107 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1108 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1109}
1110
576bbd8f 1111void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1112{
1113 assert(rs<16);
1114 assert(rt<16);
1115 assert(imm<32);
1116 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1117 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1118}
1119
57871462 1120void emit_xor(u_int rs1,u_int rs2,u_int rt)
1121{
1122 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1123 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1124}
1125
57871462 1126void emit_addimm(u_int rs,int imm,u_int rt)
1127{
1128 assert(rs<16);
1129 assert(rt<16);
1130 if(imm!=0) {
1131 assert(imm>-65536&&imm<65536);
1132 u_int armval;
1133 if(genimm(imm,&armval)) {
1134 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1135 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1136 }else if(genimm(-imm,&armval)) {
1137 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1138 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1139 }else if(imm<0) {
1140 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1141 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1142 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1143 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1144 }else{
1145 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1146 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1147 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1148 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1149 }
1150 }
1151 else if(rs!=rt) emit_mov(rs,rt);
1152}
1153
1154void emit_addimm_and_set_flags(int imm,int rt)
1155{
1156 assert(imm>-65536&&imm<65536);
1157 u_int armval;
1158 if(genimm(imm,&armval)) {
1159 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1160 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1161 }else if(genimm(-imm,&armval)) {
1162 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1163 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1164 }else if(imm<0) {
1165 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1166 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1167 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1168 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1169 }else{
1170 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1171 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1172 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1173 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1174 }
1175}
1176void emit_addimm_no_flags(u_int imm,u_int rt)
1177{
1178 emit_addimm(rt,imm,rt);
1179}
1180
1181void emit_addnop(u_int r)
1182{
1183 assert(r<16);
1184 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1185 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1186}
1187
1188void emit_adcimm(u_int rs,int imm,u_int rt)
1189{
1190 u_int armval;
cfbd3c6e 1191 genimm_checked(imm,&armval);
57871462 1192 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1193 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1194}
1195/*void emit_sbcimm(int imm,u_int rt)
1196{
1197 u_int armval;
cfbd3c6e 1198 genimm_checked(imm,&armval);
57871462 1199 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1200 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1201}*/
1202void emit_sbbimm(int imm,u_int rt)
1203{
1204 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1205 assert(rt<8);
1206 if(imm<128&&imm>=-128) {
1207 output_byte(0x83);
1208 output_modrm(3,rt,3);
1209 output_byte(imm);
1210 }
1211 else
1212 {
1213 output_byte(0x81);
1214 output_modrm(3,rt,3);
1215 output_w32(imm);
1216 }
1217}
1218void emit_rscimm(int rs,int imm,u_int rt)
1219{
1220 assert(0);
1221 u_int armval;
cfbd3c6e 1222 genimm_checked(imm,&armval);
57871462 1223 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1224 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1225}
1226
1227void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1228{
1229 // TODO: if(genimm(imm,&armval)) ...
1230 // else
1231 emit_movimm(imm,HOST_TEMPREG);
1232 emit_adds(HOST_TEMPREG,rsl,rtl);
1233 emit_adcimm(rsh,0,rth);
1234}
1235
1236void emit_sbb(int rs1,int rs2)
1237{
1238 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1239 output_byte(0x19);
1240 output_modrm(3,rs1,rs2);
1241}
1242
1243void emit_andimm(int rs,int imm,int rt)
1244{
1245 u_int armval;
790ee18e 1246 if(imm==0) {
1247 emit_zeroreg(rt);
1248 }else if(genimm(imm,&armval)) {
57871462 1249 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1250 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1251 }else if(genimm(~imm,&armval)) {
1252 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1253 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1254 }else if(imm==65535) {
1255 #ifdef ARMv5_ONLY
1256 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1257 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1258 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1259 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1260 #else
1261 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1262 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1263 #endif
1264 }else{
1265 assert(imm>0&&imm<65535);
1266 #ifdef ARMv5_ONLY
1267 assem_debug("mov r14,#%d\n",imm&0xFF00);
1268 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1269 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1270 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1271 #else
1272 emit_movw(imm,HOST_TEMPREG);
1273 #endif
1274 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1275 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1276 }
1277}
1278
1279void emit_orimm(int rs,int imm,int rt)
1280{
1281 u_int armval;
790ee18e 1282 if(imm==0) {
1283 if(rs!=rt) emit_mov(rs,rt);
1284 }else if(genimm(imm,&armval)) {
57871462 1285 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1286 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1287 }else{
1288 assert(imm>0&&imm<65536);
1289 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1290 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1291 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1292 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1293 }
1294}
1295
1296void emit_xorimm(int rs,int imm,int rt)
1297{
57871462 1298 u_int armval;
790ee18e 1299 if(imm==0) {
1300 if(rs!=rt) emit_mov(rs,rt);
1301 }else if(genimm(imm,&armval)) {
57871462 1302 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1303 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1304 }else{
514ed0d9 1305 assert(imm>0&&imm<65536);
57871462 1306 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1307 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1308 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1309 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1310 }
1311}
1312
1313void emit_shlimm(int rs,u_int imm,int rt)
1314{
1315 assert(imm>0);
1316 assert(imm<32);
1317 //if(imm==1) ...
1318 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1319 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1320}
1321
1322void emit_shrimm(int rs,u_int imm,int rt)
1323{
1324 assert(imm>0);
1325 assert(imm<32);
1326 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1327 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1328}
1329
1330void emit_sarimm(int rs,u_int imm,int rt)
1331{
1332 assert(imm>0);
1333 assert(imm<32);
1334 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1335 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1336}
1337
1338void emit_rorimm(int rs,u_int imm,int rt)
1339{
1340 assert(imm>0);
1341 assert(imm<32);
1342 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1343 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1344}
1345
1346void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1347{
1348 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1349 assert(imm>0);
1350 assert(imm<32);
1351 //if(imm==1) ...
1352 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1353 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1354 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1355 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1356}
1357
1358void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1359{
1360 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1361 assert(imm>0);
1362 assert(imm<32);
1363 //if(imm==1) ...
1364 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1365 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1366 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1367 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1368}
1369
b9b61529 1370void emit_signextend16(int rs,int rt)
1371{
1372 #ifdef ARMv5_ONLY
1373 emit_shlimm(rs,16,rt);
1374 emit_sarimm(rt,16,rt);
1375 #else
1376 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1377 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1378 #endif
1379}
1380
57871462 1381void emit_shl(u_int rs,u_int shift,u_int rt)
1382{
1383 assert(rs<16);
1384 assert(rt<16);
1385 assert(shift<16);
1386 //if(imm==1) ...
1387 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1388 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1389}
1390void emit_shr(u_int rs,u_int shift,u_int rt)
1391{
1392 assert(rs<16);
1393 assert(rt<16);
1394 assert(shift<16);
1395 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1396 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1397}
1398void emit_sar(u_int rs,u_int shift,u_int rt)
1399{
1400 assert(rs<16);
1401 assert(rt<16);
1402 assert(shift<16);
1403 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1404 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1405}
1406void emit_shlcl(int r)
1407{
1408 assem_debug("shl %%%s,%%cl\n",regname[r]);
1409 assert(0);
1410}
1411void emit_shrcl(int r)
1412{
1413 assem_debug("shr %%%s,%%cl\n",regname[r]);
1414 assert(0);
1415}
1416void emit_sarcl(int r)
1417{
1418 assem_debug("sar %%%s,%%cl\n",regname[r]);
1419 assert(0);
1420}
1421
1422void emit_shldcl(int r1,int r2)
1423{
1424 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1425 assert(0);
1426}
1427void emit_shrdcl(int r1,int r2)
1428{
1429 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1430 assert(0);
1431}
1432void emit_orrshl(u_int rs,u_int shift,u_int rt)
1433{
1434 assert(rs<16);
1435 assert(rt<16);
1436 assert(shift<16);
1437 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1438 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1439}
1440void emit_orrshr(u_int rs,u_int shift,u_int rt)
1441{
1442 assert(rs<16);
1443 assert(rt<16);
1444 assert(shift<16);
1445 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1446 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1447}
1448
1449void emit_cmpimm(int rs,int imm)
1450{
1451 u_int armval;
1452 if(genimm(imm,&armval)) {
5a05d80c 1453 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1454 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1455 }else if(genimm(-imm,&armval)) {
5a05d80c 1456 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1457 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1458 }else if(imm>0) {
1459 assert(imm<65536);
1460 #ifdef ARMv5_ONLY
1461 emit_movimm(imm,HOST_TEMPREG);
1462 #else
1463 emit_movw(imm,HOST_TEMPREG);
1464 #endif
1465 assem_debug("cmp %s,r14\n",regname[rs]);
1466 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1467 }else{
1468 assert(imm>-65536);
1469 #ifdef ARMv5_ONLY
1470 emit_movimm(-imm,HOST_TEMPREG);
1471 #else
1472 emit_movw(-imm,HOST_TEMPREG);
1473 #endif
1474 assem_debug("cmn %s,r14\n",regname[rs]);
1475 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1476 }
1477}
1478
1479void emit_cmovne(u_int *addr,int rt)
1480{
1481 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1482 assert(0);
1483}
1484void emit_cmovl(u_int *addr,int rt)
1485{
1486 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1487 assert(0);
1488}
1489void emit_cmovs(u_int *addr,int rt)
1490{
1491 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1492 assert(0);
1493}
1494void emit_cmovne_imm(int imm,int rt)
1495{
1496 assem_debug("movne %s,#%d\n",regname[rt],imm);
1497 u_int armval;
cfbd3c6e 1498 genimm_checked(imm,&armval);
57871462 1499 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1500}
1501void emit_cmovl_imm(int imm,int rt)
1502{
1503 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1504 u_int armval;
cfbd3c6e 1505 genimm_checked(imm,&armval);
57871462 1506 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1507}
1508void emit_cmovb_imm(int imm,int rt)
1509{
1510 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1511 u_int armval;
cfbd3c6e 1512 genimm_checked(imm,&armval);
57871462 1513 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1514}
1515void emit_cmovs_imm(int imm,int rt)
1516{
1517 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1518 u_int armval;
cfbd3c6e 1519 genimm_checked(imm,&armval);
57871462 1520 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1521}
1522void emit_cmove_reg(int rs,int rt)
1523{
1524 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1525 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1526}
1527void emit_cmovne_reg(int rs,int rt)
1528{
1529 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1530 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1531}
1532void emit_cmovl_reg(int rs,int rt)
1533{
1534 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1535 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1536}
1537void emit_cmovs_reg(int rs,int rt)
1538{
1539 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1540 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1541}
1542
1543void emit_slti32(int rs,int imm,int rt)
1544{
1545 if(rs!=rt) emit_zeroreg(rt);
1546 emit_cmpimm(rs,imm);
1547 if(rs==rt) emit_movimm(0,rt);
1548 emit_cmovl_imm(1,rt);
1549}
1550void emit_sltiu32(int rs,int imm,int rt)
1551{
1552 if(rs!=rt) emit_zeroreg(rt);
1553 emit_cmpimm(rs,imm);
1554 if(rs==rt) emit_movimm(0,rt);
1555 emit_cmovb_imm(1,rt);
1556}
1557void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1558{
1559 assert(rsh!=rt);
1560 emit_slti32(rsl,imm,rt);
1561 if(imm>=0)
1562 {
1563 emit_test(rsh,rsh);
1564 emit_cmovne_imm(0,rt);
1565 emit_cmovs_imm(1,rt);
1566 }
1567 else
1568 {
1569 emit_cmpimm(rsh,-1);
1570 emit_cmovne_imm(0,rt);
1571 emit_cmovl_imm(1,rt);
1572 }
1573}
1574void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1575{
1576 assert(rsh!=rt);
1577 emit_sltiu32(rsl,imm,rt);
1578 if(imm>=0)
1579 {
1580 emit_test(rsh,rsh);
1581 emit_cmovne_imm(0,rt);
1582 }
1583 else
1584 {
1585 emit_cmpimm(rsh,-1);
1586 emit_cmovne_imm(1,rt);
1587 }
1588}
1589
1590void emit_cmp(int rs,int rt)
1591{
1592 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1593 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1594}
1595void emit_set_gz32(int rs, int rt)
1596{
1597 //assem_debug("set_gz32\n");
1598 emit_cmpimm(rs,1);
1599 emit_movimm(1,rt);
1600 emit_cmovl_imm(0,rt);
1601}
1602void emit_set_nz32(int rs, int rt)
1603{
1604 //assem_debug("set_nz32\n");
1605 if(rs!=rt) emit_movs(rs,rt);
1606 else emit_test(rs,rs);
1607 emit_cmovne_imm(1,rt);
1608}
1609void emit_set_gz64_32(int rsh, int rsl, int rt)
1610{
1611 //assem_debug("set_gz64\n");
1612 emit_set_gz32(rsl,rt);
1613 emit_test(rsh,rsh);
1614 emit_cmovne_imm(1,rt);
1615 emit_cmovs_imm(0,rt);
1616}
1617void emit_set_nz64_32(int rsh, int rsl, int rt)
1618{
1619 //assem_debug("set_nz64\n");
1620 emit_or_and_set_flags(rsh,rsl,rt);
1621 emit_cmovne_imm(1,rt);
1622}
1623void emit_set_if_less32(int rs1, int rs2, int rt)
1624{
1625 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1626 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1627 emit_cmp(rs1,rs2);
1628 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1629 emit_cmovl_imm(1,rt);
1630}
1631void emit_set_if_carry32(int rs1, int rs2, int rt)
1632{
1633 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1634 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1635 emit_cmp(rs1,rs2);
1636 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1637 emit_cmovb_imm(1,rt);
1638}
1639void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1640{
1641 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1642 assert(u1!=rt);
1643 assert(u2!=rt);
1644 emit_cmp(l1,l2);
1645 emit_movimm(0,rt);
1646 emit_sbcs(u1,u2,HOST_TEMPREG);
1647 emit_cmovl_imm(1,rt);
1648}
1649void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1650{
1651 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1652 assert(u1!=rt);
1653 assert(u2!=rt);
1654 emit_cmp(l1,l2);
1655 emit_movimm(0,rt);
1656 emit_sbcs(u1,u2,HOST_TEMPREG);
1657 emit_cmovb_imm(1,rt);
1658}
1659
1660void emit_call(int a)
1661{
1662 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1663 u_int offset=genjmp(a);
1664 output_w32(0xeb000000|offset);
1665}
1666void emit_jmp(int a)
1667{
1668 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1669 u_int offset=genjmp(a);
1670 output_w32(0xea000000|offset);
1671}
1672void emit_jne(int a)
1673{
1674 assem_debug("bne %x\n",a);
1675 u_int offset=genjmp(a);
1676 output_w32(0x1a000000|offset);
1677}
1678void emit_jeq(int a)
1679{
1680 assem_debug("beq %x\n",a);
1681 u_int offset=genjmp(a);
1682 output_w32(0x0a000000|offset);
1683}
1684void emit_js(int a)
1685{
1686 assem_debug("bmi %x\n",a);
1687 u_int offset=genjmp(a);
1688 output_w32(0x4a000000|offset);
1689}
1690void emit_jns(int a)
1691{
1692 assem_debug("bpl %x\n",a);
1693 u_int offset=genjmp(a);
1694 output_w32(0x5a000000|offset);
1695}
1696void emit_jl(int a)
1697{
1698 assem_debug("blt %x\n",a);
1699 u_int offset=genjmp(a);
1700 output_w32(0xba000000|offset);
1701}
1702void emit_jge(int a)
1703{
1704 assem_debug("bge %x\n",a);
1705 u_int offset=genjmp(a);
1706 output_w32(0xaa000000|offset);
1707}
1708void emit_jno(int a)
1709{
1710 assem_debug("bvc %x\n",a);
1711 u_int offset=genjmp(a);
1712 output_w32(0x7a000000|offset);
1713}
1714void emit_jc(int a)
1715{
1716 assem_debug("bcs %x\n",a);
1717 u_int offset=genjmp(a);
1718 output_w32(0x2a000000|offset);
1719}
1720void emit_jcc(int a)
1721{
1722 assem_debug("bcc %x\n",a);
1723 u_int offset=genjmp(a);
1724 output_w32(0x3a000000|offset);
1725}
1726
1727void emit_pushimm(int imm)
1728{
1729 assem_debug("push $%x\n",imm);
1730 assert(0);
1731}
1732void emit_pusha()
1733{
1734 assem_debug("pusha\n");
1735 assert(0);
1736}
1737void emit_popa()
1738{
1739 assem_debug("popa\n");
1740 assert(0);
1741}
1742void emit_pushreg(u_int r)
1743{
1744 assem_debug("push %%%s\n",regname[r]);
1745 assert(0);
1746}
1747void emit_popreg(u_int r)
1748{
1749 assem_debug("pop %%%s\n",regname[r]);
1750 assert(0);
1751}
1752void emit_callreg(u_int r)
1753{
1754 assem_debug("call *%%%s\n",regname[r]);
1755 assert(0);
1756}
1757void emit_jmpreg(u_int r)
1758{
1759 assem_debug("mov pc,%s\n",regname[r]);
1760 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1761}
1762
1763void emit_readword_indexed(int offset, int rs, int rt)
1764{
1765 assert(offset>-4096&&offset<4096);
1766 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1767 if(offset>=0) {
1768 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1769 }else{
1770 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1771 }
1772}
1773void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1774{
1775 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1776 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1777}
1778void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1779{
1780 if(map<0) emit_readword_indexed(addr, rs, rt);
1781 else {
1782 assert(addr==0);
1783 emit_readword_dualindexedx4(rs, map, rt);
1784 }
1785}
1786void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1787{
1788 if(map<0) {
1789 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1790 emit_readword_indexed(addr+4, rs, rl);
1791 }else{
1792 assert(rh!=rs);
1793 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1794 emit_addimm(map,1,map);
1795 emit_readword_indexed_tlb(addr, rs, map, rl);
1796 }
1797}
1798void emit_movsbl_indexed(int offset, int rs, int rt)
1799{
1800 assert(offset>-256&&offset<256);
1801 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1802 if(offset>=0) {
1803 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1804 }else{
1805 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1806 }
1807}
1808void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1809{
1810 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1811 else {
1812 if(addr==0) {
1813 emit_shlimm(map,2,map);
1814 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1815 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1816 }else{
1817 assert(addr>-256&&addr<256);
1818 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1819 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1820 emit_movsbl_indexed(addr, rt, rt);
1821 }
1822 }
1823}
1824void emit_movswl_indexed(int offset, int rs, int rt)
1825{
1826 assert(offset>-256&&offset<256);
1827 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1828 if(offset>=0) {
1829 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1830 }else{
1831 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1832 }
1833}
1834void emit_movzbl_indexed(int offset, int rs, int rt)
1835{
1836 assert(offset>-4096&&offset<4096);
1837 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1838 if(offset>=0) {
1839 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1840 }else{
1841 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1842 }
1843}
1844void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1845{
1846 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1847 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1848}
1849void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1850{
1851 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1852 else {
1853 if(addr==0) {
1854 emit_movzbl_dualindexedx4(rs, map, rt);
1855 }else{
1856 emit_addimm(rs,addr,rt);
1857 emit_movzbl_dualindexedx4(rt, map, rt);
1858 }
1859 }
1860}
1861void emit_movzwl_indexed(int offset, int rs, int rt)
1862{
1863 assert(offset>-256&&offset<256);
1864 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1865 if(offset>=0) {
1866 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1867 }else{
1868 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1869 }
1870}
1871void emit_readword(int addr, int rt)
1872{
1873 u_int offset = addr-(u_int)&dynarec_local;
1874 assert(offset<4096);
1875 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1876 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1877}
1878void emit_movsbl(int addr, int rt)
1879{
1880 u_int offset = addr-(u_int)&dynarec_local;
1881 assert(offset<256);
1882 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1883 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1884}
1885void emit_movswl(int addr, int rt)
1886{
1887 u_int offset = addr-(u_int)&dynarec_local;
1888 assert(offset<256);
1889 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1890 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1891}
1892void emit_movzbl(int addr, int rt)
1893{
1894 u_int offset = addr-(u_int)&dynarec_local;
1895 assert(offset<4096);
1896 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1897 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1898}
1899void emit_movzwl(int addr, int rt)
1900{
1901 u_int offset = addr-(u_int)&dynarec_local;
1902 assert(offset<256);
1903 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1904 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1905}
1906void emit_movzwl_reg(int rs, int rt)
1907{
1908 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1909 assert(0);
1910}
1911
1912void emit_xchg(int rs, int rt)
1913{
1914 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1915 assert(0);
1916}
1917void emit_writeword_indexed(int rt, int offset, int rs)
1918{
1919 assert(offset>-4096&&offset<4096);
1920 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1921 if(offset>=0) {
1922 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1923 }else{
1924 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1925 }
1926}
1927void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1928{
1929 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1930 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1931}
1932void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1933{
1934 if(map<0) emit_writeword_indexed(rt, addr, rs);
1935 else {
1936 assert(addr==0);
1937 emit_writeword_dualindexedx4(rt, rs, map);
1938 }
1939}
1940void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1941{
1942 if(map<0) {
1943 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1944 emit_writeword_indexed(rl, addr+4, rs);
1945 }else{
1946 assert(rh>=0);
1947 if(temp!=rs) emit_addimm(map,1,temp);
1948 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1949 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1950 else {
1951 emit_addimm(rs,4,rs);
1952 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1953 }
1954 }
1955}
1956void emit_writehword_indexed(int rt, int offset, int rs)
1957{
1958 assert(offset>-256&&offset<256);
1959 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1960 if(offset>=0) {
1961 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1962 }else{
1963 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1964 }
1965}
1966void emit_writebyte_indexed(int rt, int offset, int rs)
1967{
1968 assert(offset>-4096&&offset<4096);
1969 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1970 if(offset>=0) {
1971 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1972 }else{
1973 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1974 }
1975}
1976void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1977{
1978 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1979 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1980}
1981void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1982{
1983 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1984 else {
1985 if(addr==0) {
1986 emit_writebyte_dualindexedx4(rt, rs, map);
1987 }else{
1988 emit_addimm(rs,addr,temp);
1989 emit_writebyte_dualindexedx4(rt, temp, map);
1990 }
1991 }
1992}
1993void emit_writeword(int rt, int addr)
1994{
1995 u_int offset = addr-(u_int)&dynarec_local;
1996 assert(offset<4096);
1997 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1998 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1999}
2000void emit_writehword(int rt, int addr)
2001{
2002 u_int offset = addr-(u_int)&dynarec_local;
2003 assert(offset<256);
2004 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2005 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2006}
2007void emit_writebyte(int rt, int addr)
2008{
2009 u_int offset = addr-(u_int)&dynarec_local;
2010 assert(offset<4096);
74426039 2011 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2012 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2013}
2014void emit_writeword_imm(int imm, int addr)
2015{
2016 assem_debug("movl $%x,%x\n",imm,addr);
2017 assert(0);
2018}
2019void emit_writebyte_imm(int imm, int addr)
2020{
2021 assem_debug("movb $%x,%x\n",imm,addr);
2022 assert(0);
2023}
2024
2025void emit_mul(int rs)
2026{
2027 assem_debug("mul %%%s\n",regname[rs]);
2028 assert(0);
2029}
2030void emit_imul(int rs)
2031{
2032 assem_debug("imul %%%s\n",regname[rs]);
2033 assert(0);
2034}
2035void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2036{
2037 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2038 assert(rs1<16);
2039 assert(rs2<16);
2040 assert(hi<16);
2041 assert(lo<16);
2042 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2043}
2044void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2045{
2046 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2047 assert(rs1<16);
2048 assert(rs2<16);
2049 assert(hi<16);
2050 assert(lo<16);
2051 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2052}
2053
2054void emit_div(int rs)
2055{
2056 assem_debug("div %%%s\n",regname[rs]);
2057 assert(0);
2058}
2059void emit_idiv(int rs)
2060{
2061 assem_debug("idiv %%%s\n",regname[rs]);
2062 assert(0);
2063}
2064void emit_cdq()
2065{
2066 assem_debug("cdq\n");
2067 assert(0);
2068}
2069
2070void emit_clz(int rs,int rt)
2071{
2072 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2073 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2074}
2075
2076void emit_subcs(int rs1,int rs2,int rt)
2077{
2078 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2079 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2080}
2081
2082void emit_shrcc_imm(int rs,u_int imm,int rt)
2083{
2084 assert(imm>0);
2085 assert(imm<32);
2086 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2087 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2088}
2089
2090void emit_negmi(int rs, int rt)
2091{
2092 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2093 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2094}
2095
2096void emit_negsmi(int rs, int rt)
2097{
2098 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2099 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2100}
2101
2102void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2103{
2104 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2105 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2106}
2107
2108void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2109{
2110 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2111 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2112}
2113
2114void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2115{
2116 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2117 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2118}
2119
2120void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2121{
2122 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2123 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2124}
2125
2126void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2127{
2128 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2129 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2130}
2131
2132void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2133{
2134 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2135 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2136}
2137
2138void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2139{
2140 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2141 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2142}
2143
2144void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2145{
2146 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2147 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2148}
2149
2150void emit_teq(int rs, int rt)
2151{
2152 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2153 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2154}
2155
2156void emit_rsbimm(int rs, int imm, int rt)
2157{
2158 u_int armval;
cfbd3c6e 2159 genimm_checked(imm,&armval);
57871462 2160 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2161 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2162}
2163
2164// Load 2 immediates optimizing for small code size
2165void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2166{
2167 emit_movimm(imm1,rt1);
2168 u_int armval;
2169 if(genimm(imm2-imm1,&armval)) {
2170 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2171 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2172 }else if(genimm(imm1-imm2,&armval)) {
2173 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2174 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2175 }
2176 else emit_movimm(imm2,rt2);
2177}
2178
2179// Conditionally select one of two immediates, optimizing for small code size
2180// This will only be called if HAVE_CMOV_IMM is defined
2181void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2182{
2183 u_int armval;
2184 if(genimm(imm2-imm1,&armval)) {
2185 emit_movimm(imm1,rt);
2186 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2187 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2188 }else if(genimm(imm1-imm2,&armval)) {
2189 emit_movimm(imm1,rt);
2190 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2191 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2192 }
2193 else {
2194 #ifdef ARMv5_ONLY
2195 emit_movimm(imm1,rt);
2196 add_literal((int)out,imm2);
2197 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2198 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2199 #else
2200 emit_movw(imm1&0x0000FFFF,rt);
2201 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2202 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2203 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2204 }
2205 emit_movt(imm1&0xFFFF0000,rt);
2206 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2207 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2208 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2209 }
2210 #endif
2211 }
2212}
2213
2214// special case for checking invalid_code
2215void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2216{
2217 assert(0);
2218}
2219
2220// special case for checking invalid_code
2221void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2222{
2223 assert(imm<128&&imm>=0);
2224 assert(r>=0&&r<16);
2225 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2226 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2227 emit_cmpimm(HOST_TEMPREG,imm);
2228}
2229
2230// special case for tlb mapping
2231void emit_addsr12(int rs1,int rs2,int rt)
2232{
2233 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2234 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2235}
2236
0bbd1454 2237void emit_callne(int a)
2238{
2239 assem_debug("blne %x\n",a);
2240 u_int offset=genjmp(a);
2241 output_w32(0x1b000000|offset);
2242}
2243
57871462 2244// Used to preload hash table entries
2245void emit_prefetch(void *addr)
2246{
2247 assem_debug("prefetch %x\n",(int)addr);
2248 output_byte(0x0F);
2249 output_byte(0x18);
2250 output_modrm(0,5,1);
2251 output_w32((int)addr);
2252}
2253void emit_prefetchreg(int r)
2254{
2255 assem_debug("pld %s\n",regname[r]);
2256 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2257}
2258
2259// Special case for mini_ht
2260void emit_ldreq_indexed(int rs, u_int offset, int rt)
2261{
2262 assert(offset<4096);
2263 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2264 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2265}
2266
2267void emit_flds(int r,int sr)
2268{
2269 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2270 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2271}
2272
2273void emit_vldr(int r,int vr)
2274{
2275 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2276 output_w32(0xed900b00|(vr<<12)|(r<<16));
2277}
2278
2279void emit_fsts(int sr,int r)
2280{
2281 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2282 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2283}
2284
2285void emit_vstr(int vr,int r)
2286{
2287 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2288 output_w32(0xed800b00|(vr<<12)|(r<<16));
2289}
2290
2291void emit_ftosizs(int s,int d)
2292{
2293 assem_debug("ftosizs s%d,s%d\n",d,s);
2294 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2295}
2296
2297void emit_ftosizd(int s,int d)
2298{
2299 assem_debug("ftosizd s%d,d%d\n",d,s);
2300 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2301}
2302
2303void emit_fsitos(int s,int d)
2304{
2305 assem_debug("fsitos s%d,s%d\n",d,s);
2306 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2307}
2308
2309void emit_fsitod(int s,int d)
2310{
2311 assem_debug("fsitod d%d,s%d\n",d,s);
2312 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2313}
2314
2315void emit_fcvtds(int s,int d)
2316{
2317 assem_debug("fcvtds d%d,s%d\n",d,s);
2318 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2319}
2320
2321void emit_fcvtsd(int s,int d)
2322{
2323 assem_debug("fcvtsd s%d,d%d\n",d,s);
2324 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2325}
2326
2327void emit_fsqrts(int s,int d)
2328{
2329 assem_debug("fsqrts d%d,s%d\n",d,s);
2330 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2331}
2332
2333void emit_fsqrtd(int s,int d)
2334{
2335 assem_debug("fsqrtd s%d,d%d\n",d,s);
2336 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2337}
2338
2339void emit_fabss(int s,int d)
2340{
2341 assem_debug("fabss d%d,s%d\n",d,s);
2342 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2343}
2344
2345void emit_fabsd(int s,int d)
2346{
2347 assem_debug("fabsd s%d,d%d\n",d,s);
2348 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2349}
2350
2351void emit_fnegs(int s,int d)
2352{
2353 assem_debug("fnegs d%d,s%d\n",d,s);
2354 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2355}
2356
2357void emit_fnegd(int s,int d)
2358{
2359 assem_debug("fnegd s%d,d%d\n",d,s);
2360 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2361}
2362
2363void emit_fadds(int s1,int s2,int d)
2364{
2365 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2366 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2367}
2368
2369void emit_faddd(int s1,int s2,int d)
2370{
2371 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2372 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2373}
2374
2375void emit_fsubs(int s1,int s2,int d)
2376{
2377 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2378 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2379}
2380
2381void emit_fsubd(int s1,int s2,int d)
2382{
2383 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2384 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2385}
2386
2387void emit_fmuls(int s1,int s2,int d)
2388{
2389 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2390 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2391}
2392
2393void emit_fmuld(int s1,int s2,int d)
2394{
2395 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2396 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2397}
2398
2399void emit_fdivs(int s1,int s2,int d)
2400{
2401 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2402 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2403}
2404
2405void emit_fdivd(int s1,int s2,int d)
2406{
2407 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2408 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2409}
2410
2411void emit_fcmps(int x,int y)
2412{
2413 assem_debug("fcmps s14, s15\n");
2414 output_w32(0xeeb47a67);
2415}
2416
2417void emit_fcmpd(int x,int y)
2418{
2419 assem_debug("fcmpd d6, d7\n");
2420 output_w32(0xeeb46b47);
2421}
2422
2423void emit_fmstat()
2424{
2425 assem_debug("fmstat\n");
2426 output_w32(0xeef1fa10);
2427}
2428
2429void emit_bicne_imm(int rs,int imm,int rt)
2430{
2431 u_int armval;
cfbd3c6e 2432 genimm_checked(imm,&armval);
57871462 2433 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2434 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2435}
2436
2437void emit_biccs_imm(int rs,int imm,int rt)
2438{
2439 u_int armval;
cfbd3c6e 2440 genimm_checked(imm,&armval);
57871462 2441 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2442 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2443}
2444
2445void emit_bicvc_imm(int rs,int imm,int rt)
2446{
2447 u_int armval;
cfbd3c6e 2448 genimm_checked(imm,&armval);
57871462 2449 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2450 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2451}
2452
2453void emit_bichi_imm(int rs,int imm,int rt)
2454{
2455 u_int armval;
cfbd3c6e 2456 genimm_checked(imm,&armval);
57871462 2457 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2458 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2459}
2460
2461void emit_orrvs_imm(int rs,int imm,int rt)
2462{
2463 u_int armval;
cfbd3c6e 2464 genimm_checked(imm,&armval);
57871462 2465 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2466 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2467}
2468
b9b61529 2469void emit_orrne_imm(int rs,int imm,int rt)
2470{
2471 u_int armval;
cfbd3c6e 2472 genimm_checked(imm,&armval);
b9b61529 2473 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2474 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2475}
2476
2477void emit_andne_imm(int rs,int imm,int rt)
2478{
2479 u_int armval;
cfbd3c6e 2480 genimm_checked(imm,&armval);
b9b61529 2481 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2482 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2483}
2484
57871462 2485void emit_jno_unlikely(int a)
2486{
2487 //emit_jno(a);
2488 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2489 output_w32(0x72800000|rd_rn_rm(15,15,0));
2490}
2491
2492// Save registers before function call
2493void save_regs(u_int reglist)
2494{
2495 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2496 if(!reglist) return;
2497 assem_debug("stmia fp,{");
2498 if(reglist&1) assem_debug("r0, ");
2499 if(reglist&2) assem_debug("r1, ");
2500 if(reglist&4) assem_debug("r2, ");
2501 if(reglist&8) assem_debug("r3, ");
2502 if(reglist&0x1000) assem_debug("r12");
2503 assem_debug("}\n");
2504 output_w32(0xe88b0000|reglist);
2505}
2506// Restore registers after function call
2507void restore_regs(u_int reglist)
2508{
2509 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2510 if(!reglist) return;
2511 assem_debug("ldmia fp,{");
2512 if(reglist&1) assem_debug("r0, ");
2513 if(reglist&2) assem_debug("r1, ");
2514 if(reglist&4) assem_debug("r2, ");
2515 if(reglist&8) assem_debug("r3, ");
2516 if(reglist&0x1000) assem_debug("r12");
2517 assem_debug("}\n");
2518 output_w32(0xe89b0000|reglist);
2519}
2520
2521// Write back consts using r14 so we don't disturb the other registers
2522void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2523{
2524 int hr;
2525 for(hr=0;hr<HOST_REGS;hr++) {
2526 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2527 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2528 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2529 int value=constmap[i][hr];
2530 if(value==0) {
2531 emit_zeroreg(HOST_TEMPREG);
2532 }
2533 else {
2534 emit_movimm(value,HOST_TEMPREG);
2535 }
2536 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2537#ifndef FORCE32
57871462 2538 if((i_is32>>i_regmap[hr])&1) {
2539 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2540 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2541 }
24385cae 2542#endif
57871462 2543 }
2544 }
2545 }
2546 }
2547}
2548
2549/* Stubs/epilogue */
2550
2551void literal_pool(int n)
2552{
2553 if(!literalcount) return;
2554 if(n) {
2555 if((int)out-literals[0][0]<4096-n) return;
2556 }
2557 u_int *ptr;
2558 int i;
2559 for(i=0;i<literalcount;i++)
2560 {
2561 ptr=(u_int *)literals[i][0];
2562 u_int offset=(u_int)out-(u_int)ptr-8;
2563 assert(offset<4096);
2564 assert(!(offset&3));
2565 *ptr|=offset;
2566 output_w32(literals[i][1]);
2567 }
2568 literalcount=0;
2569}
2570
2571void literal_pool_jumpover(int n)
2572{
2573 if(!literalcount) return;
2574 if(n) {
2575 if((int)out-literals[0][0]<4096-n) return;
2576 }
2577 int jaddr=(int)out;
2578 emit_jmp(0);
2579 literal_pool(0);
2580 set_jump_target(jaddr,(int)out);
2581}
2582
2583emit_extjump2(int addr, int target, int linker)
2584{
2585 u_char *ptr=(u_char *)addr;
2586 assert((ptr[3]&0x0e)==0xa);
2587 emit_loadlp(target,0);
2588 emit_loadlp(addr,1);
24385cae 2589 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2590 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2591//DEBUG >
2592#ifdef DEBUG_CYCLE_COUNT
2593 emit_readword((int)&last_count,ECX);
2594 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2595 emit_readword((int)&next_interupt,ECX);
2596 emit_writeword(HOST_CCREG,(int)&Count);
2597 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2598 emit_writeword(ECX,(int)&last_count);
2599#endif
2600//DEBUG <
2601 emit_jmp(linker);
2602}
2603
2604emit_extjump(int addr, int target)
2605{
2606 emit_extjump2(addr, target, (int)dyna_linker);
2607}
2608emit_extjump_ds(int addr, int target)
2609{
2610 emit_extjump2(addr, target, (int)dyna_linker_ds);
2611}
2612
cbbab9cd 2613#ifdef PCSX
2614#include "pcsxmem_inline.c"
2615#endif
2616
57871462 2617do_readstub(int n)
2618{
2619 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2620 literal_pool(256);
2621 set_jump_target(stubs[n][1],(int)out);
2622 int type=stubs[n][0];
2623 int i=stubs[n][3];
2624 int rs=stubs[n][4];
2625 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2626 u_int reglist=stubs[n][7];
2627 signed char *i_regmap=i_regs->regmap;
2628 int addr=get_reg(i_regmap,AGEN1+(i&1));
2629 int rth,rt;
2630 int ds;
b9b61529 2631 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2632 rth=get_reg(i_regmap,FTEMP|64);
2633 rt=get_reg(i_regmap,FTEMP);
2634 }else{
2635 rth=get_reg(i_regmap,rt1[i]|64);
2636 rt=get_reg(i_regmap,rt1[i]);
2637 }
2638 assert(rs>=0);
57871462 2639 if(addr<0) addr=rt;
535d208a 2640 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2641 assert(addr>=0);
2642 int ftable=0;
2643 if(type==LOADB_STUB||type==LOADBU_STUB)
2644 ftable=(int)readmemb;
2645 if(type==LOADH_STUB||type==LOADHU_STUB)
2646 ftable=(int)readmemh;
2647 if(type==LOADW_STUB)
2648 ftable=(int)readmem;
24385cae 2649#ifndef FORCE32
57871462 2650 if(type==LOADD_STUB)
2651 ftable=(int)readmemd;
24385cae 2652#endif
2653 assert(ftable!=0);
57871462 2654 emit_writeword(rs,(int)&address);
2655 //emit_pusha();
2656 save_regs(reglist);
97a238a6 2657#ifndef PCSX
57871462 2658 ds=i_regs!=&regs[i];
2659 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2660 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2661 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2662 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2663 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2664#endif
57871462 2665 emit_shrimm(rs,16,1);
2666 int cc=get_reg(i_regmap,CCREG);
2667 if(cc<0) {
2668 emit_loadreg(CCREG,2);
2669 }
2670 emit_movimm(ftable,0);
2671 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2672#ifndef PCSX
57871462 2673 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2674#endif
57871462 2675 //emit_readword((int)&last_count,temp);
2676 //emit_add(cc,temp,cc);
2677 //emit_writeword(cc,(int)&Count);
2678 //emit_mov(15,14);
2679 emit_call((int)&indirect_jump_indexed);
2680 //emit_callreg(rs);
2681 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2682#ifndef PCSX
57871462 2683 // We really shouldn't need to update the count here,
2684 // but not doing so causes random crashes...
2685 emit_readword((int)&Count,HOST_TEMPREG);
2686 emit_readword((int)&next_interupt,2);
2687 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2688 emit_writeword(2,(int)&last_count);
2689 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2690 if(cc<0) {
2691 emit_storereg(CCREG,HOST_TEMPREG);
2692 }
f51dc36c 2693#endif
57871462 2694 //emit_popa();
2695 restore_regs(reglist);
2696 //if((cc=get_reg(regmap,CCREG))>=0) {
2697 // emit_loadreg(CCREG,cc);
2698 //}
f18c0f46 2699 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2700 assert(rt>=0);
2701 if(type==LOADB_STUB)
2702 emit_movsbl((int)&readmem_dword,rt);
2703 if(type==LOADBU_STUB)
2704 emit_movzbl((int)&readmem_dword,rt);
2705 if(type==LOADH_STUB)
2706 emit_movswl((int)&readmem_dword,rt);
2707 if(type==LOADHU_STUB)
2708 emit_movzwl((int)&readmem_dword,rt);
2709 if(type==LOADW_STUB)
2710 emit_readword((int)&readmem_dword,rt);
2711 if(type==LOADD_STUB) {
2712 emit_readword((int)&readmem_dword,rt);
2713 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2714 }
57871462 2715 }
2716 emit_jmp(stubs[n][2]); // return address
2717}
2718
2719inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2720{
2721 int rs=get_reg(regmap,target);
2722 int rth=get_reg(regmap,target|64);
2723 int rt=get_reg(regmap,target);
535d208a 2724 if(rs<0) rs=get_reg(regmap,-1);
57871462 2725 assert(rs>=0);
57871462 2726 int ftable=0;
2727 if(type==LOADB_STUB||type==LOADBU_STUB)
2728 ftable=(int)readmemb;
2729 if(type==LOADH_STUB||type==LOADHU_STUB)
2730 ftable=(int)readmemh;
2731 if(type==LOADW_STUB)
2732 ftable=(int)readmem;
24385cae 2733#ifndef FORCE32
57871462 2734 if(type==LOADD_STUB)
2735 ftable=(int)readmemd;
24385cae 2736#endif
2737 assert(ftable!=0);
cbbab9cd 2738#ifdef PCSX
2739 if(pcsx_direct_read(type,addr,target?rs:-1,rt))
2740 return;
2741#endif
fd99c415 2742 if(target==0)
2743 emit_movimm(addr,rs);
57871462 2744 emit_writeword(rs,(int)&address);
2745 //emit_pusha();
2746 save_regs(reglist);
0c1fe38b 2747#ifndef PCSX
2748 if((signed int)addr>=(signed int)0xC0000000) {
2749 // Theoretically we can have a pagefault here, if the TLB has never
2750 // been enabled and the address is outside the range 80000000..BFFFFFFF
2751 // Write out the registers so the pagefault can be handled. This is
2752 // a very rare case and likely represents a bug.
2753 int ds=regmap!=regs[i].regmap;
2754 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2755 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2756 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2757 }
2758#endif
57871462 2759 //emit_shrimm(rs,16,1);
2760 int cc=get_reg(regmap,CCREG);
2761 if(cc<0) {
2762 emit_loadreg(CCREG,2);
2763 }
2764 //emit_movimm(ftable,0);
2765 emit_movimm(((u_int *)ftable)[addr>>16],0);
2766 //emit_readword((int)&last_count,12);
2767 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2768#ifndef PCSX
57871462 2769 if((signed int)addr>=(signed int)0xC0000000) {
2770 // Pagefault address
2771 int ds=regmap!=regs[i].regmap;
2772 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2773 }
f51dc36c 2774#endif
57871462 2775 //emit_add(12,2,2);
2776 //emit_writeword(2,(int)&Count);
2777 //emit_call(((u_int *)ftable)[addr>>16]);
2778 emit_call((int)&indirect_jump);
f51dc36c 2779#ifndef PCSX
57871462 2780 // We really shouldn't need to update the count here,
2781 // but not doing so causes random crashes...
2782 emit_readword((int)&Count,HOST_TEMPREG);
2783 emit_readword((int)&next_interupt,2);
2784 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2785 emit_writeword(2,(int)&last_count);
2786 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2787 if(cc<0) {
2788 emit_storereg(CCREG,HOST_TEMPREG);
2789 }
f51dc36c 2790#endif
57871462 2791 //emit_popa();
2792 restore_regs(reglist);
fd99c415 2793 if(rt>=0) {
2794 if(type==LOADB_STUB)
2795 emit_movsbl((int)&readmem_dword,rt);
2796 if(type==LOADBU_STUB)
2797 emit_movzbl((int)&readmem_dword,rt);
2798 if(type==LOADH_STUB)
2799 emit_movswl((int)&readmem_dword,rt);
2800 if(type==LOADHU_STUB)
2801 emit_movzwl((int)&readmem_dword,rt);
2802 if(type==LOADW_STUB)
2803 emit_readword((int)&readmem_dword,rt);
2804 if(type==LOADD_STUB) {
2805 emit_readword((int)&readmem_dword,rt);
2806 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2807 }
57871462 2808 }
2809}
2810
2811do_writestub(int n)
2812{
2813 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2814 literal_pool(256);
2815 set_jump_target(stubs[n][1],(int)out);
2816 int type=stubs[n][0];
2817 int i=stubs[n][3];
2818 int rs=stubs[n][4];
2819 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2820 u_int reglist=stubs[n][7];
2821 signed char *i_regmap=i_regs->regmap;
2822 int addr=get_reg(i_regmap,AGEN1+(i&1));
2823 int rth,rt,r;
2824 int ds;
b9b61529 2825 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2826 rth=get_reg(i_regmap,FTEMP|64);
2827 rt=get_reg(i_regmap,r=FTEMP);
2828 }else{
2829 rth=get_reg(i_regmap,rs2[i]|64);
2830 rt=get_reg(i_regmap,r=rs2[i]);
2831 }
2832 assert(rs>=0);
2833 assert(rt>=0);
2834 if(addr<0) addr=get_reg(i_regmap,-1);
2835 assert(addr>=0);
2836 int ftable=0;
2837 if(type==STOREB_STUB)
2838 ftable=(int)writememb;
2839 if(type==STOREH_STUB)
2840 ftable=(int)writememh;
2841 if(type==STOREW_STUB)
2842 ftable=(int)writemem;
24385cae 2843#ifndef FORCE32
57871462 2844 if(type==STORED_STUB)
2845 ftable=(int)writememd;
24385cae 2846#endif
2847 assert(ftable!=0);
57871462 2848 emit_writeword(rs,(int)&address);
2849 //emit_shrimm(rs,16,rs);
2850 //emit_movmem_indexedx4(ftable,rs,rs);
2851 if(type==STOREB_STUB)
2852 emit_writebyte(rt,(int)&byte);
2853 if(type==STOREH_STUB)
2854 emit_writehword(rt,(int)&hword);
2855 if(type==STOREW_STUB)
2856 emit_writeword(rt,(int)&word);
2857 if(type==STORED_STUB) {
3d624f89 2858#ifndef FORCE32
57871462 2859 emit_writeword(rt,(int)&dword);
2860 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2861#else
2862 printf("STORED_STUB\n");
2863#endif
57871462 2864 }
2865 //emit_pusha();
2866 save_regs(reglist);
97a238a6 2867#ifndef PCSX
57871462 2868 ds=i_regs!=&regs[i];
2869 int real_rs=get_reg(i_regmap,rs1[i]);
2870 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2871 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2872 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2873 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2874#endif
57871462 2875 emit_shrimm(rs,16,1);
2876 int cc=get_reg(i_regmap,CCREG);
2877 if(cc<0) {
2878 emit_loadreg(CCREG,2);
2879 }
2880 emit_movimm(ftable,0);
2881 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2882#ifndef PCSX
57871462 2883 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2884#endif
57871462 2885 //emit_readword((int)&last_count,temp);
2886 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2887 //emit_add(cc,temp,cc);
2888 //emit_writeword(cc,(int)&Count);
2889 emit_call((int)&indirect_jump_indexed);
2890 //emit_callreg(rs);
2891 emit_readword((int)&Count,HOST_TEMPREG);
2892 emit_readword((int)&next_interupt,2);
2893 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2894 emit_writeword(2,(int)&last_count);
2895 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2896 if(cc<0) {
2897 emit_storereg(CCREG,HOST_TEMPREG);
2898 }
2899 //emit_popa();
2900 restore_regs(reglist);
2901 //if((cc=get_reg(regmap,CCREG))>=0) {
2902 // emit_loadreg(CCREG,cc);
2903 //}
2904 emit_jmp(stubs[n][2]); // return address
2905}
2906
2907inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2908{
2909 int rs=get_reg(regmap,-1);
2910 int rth=get_reg(regmap,target|64);
2911 int rt=get_reg(regmap,target);
2912 assert(rs>=0);
2913 assert(rt>=0);
cbbab9cd 2914#ifdef PCSX
2915 if(pcsx_direct_write(type,addr,rs,rt,regmap))
2916 return;
2917#endif
57871462 2918 int ftable=0;
2919 if(type==STOREB_STUB)
2920 ftable=(int)writememb;
2921 if(type==STOREH_STUB)
2922 ftable=(int)writememh;
2923 if(type==STOREW_STUB)
2924 ftable=(int)writemem;
24385cae 2925#ifndef FORCE32
57871462 2926 if(type==STORED_STUB)
2927 ftable=(int)writememd;
24385cae 2928#endif
2929 assert(ftable!=0);
57871462 2930 emit_writeword(rs,(int)&address);
2931 //emit_shrimm(rs,16,rs);
2932 //emit_movmem_indexedx4(ftable,rs,rs);
2933 if(type==STOREB_STUB)
2934 emit_writebyte(rt,(int)&byte);
2935 if(type==STOREH_STUB)
2936 emit_writehword(rt,(int)&hword);
2937 if(type==STOREW_STUB)
2938 emit_writeword(rt,(int)&word);
2939 if(type==STORED_STUB) {
3d624f89 2940#ifndef FORCE32
57871462 2941 emit_writeword(rt,(int)&dword);
2942 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2943#else
2944 printf("STORED_STUB\n");
2945#endif
57871462 2946 }
2947 //emit_pusha();
2948 save_regs(reglist);
0c1fe38b 2949#ifndef PCSX
2950 // rearmed note: load_all_consts prevents BIOS boot, some bug?
2951 if((signed int)addr>=(signed int)0xC0000000) {
2952 // Theoretically we can have a pagefault here, if the TLB has never
2953 // been enabled and the address is outside the range 80000000..BFFFFFFF
2954 // Write out the registers so the pagefault can be handled. This is
2955 // a very rare case and likely represents a bug.
2956 int ds=regmap!=regs[i].regmap;
2957 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2958 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2959 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2960 }
2961#endif
57871462 2962 //emit_shrimm(rs,16,1);
2963 int cc=get_reg(regmap,CCREG);
2964 if(cc<0) {
2965 emit_loadreg(CCREG,2);
2966 }
2967 //emit_movimm(ftable,0);
2968 emit_movimm(((u_int *)ftable)[addr>>16],0);
2969 //emit_readword((int)&last_count,12);
2970 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2971#ifndef PCSX
57871462 2972 if((signed int)addr>=(signed int)0xC0000000) {
2973 // Pagefault address
2974 int ds=regmap!=regs[i].regmap;
2975 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2976 }
f51dc36c 2977#endif
57871462 2978 //emit_add(12,2,2);
2979 //emit_writeword(2,(int)&Count);
2980 //emit_call(((u_int *)ftable)[addr>>16]);
2981 emit_call((int)&indirect_jump);
2982 emit_readword((int)&Count,HOST_TEMPREG);
2983 emit_readword((int)&next_interupt,2);
2984 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2985 emit_writeword(2,(int)&last_count);
2986 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2987 if(cc<0) {
2988 emit_storereg(CCREG,HOST_TEMPREG);
2989 }
2990 //emit_popa();
2991 restore_regs(reglist);
2992}
2993
2994do_unalignedwritestub(int n)
2995{
b7918751 2996 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2997 literal_pool(256);
57871462 2998 set_jump_target(stubs[n][1],(int)out);
b7918751 2999
3000 int i=stubs[n][3];
3001 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3002 int addr=stubs[n][5];
3003 u_int reglist=stubs[n][7];
3004 signed char *i_regmap=i_regs->regmap;
3005 int temp2=get_reg(i_regmap,FTEMP);
3006 int rt;
3007 int ds, real_rs;
3008 rt=get_reg(i_regmap,rs2[i]);
3009 assert(rt>=0);
3010 assert(addr>=0);
3011 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3012 reglist|=(1<<addr);
3013 reglist&=~(1<<temp2);
3014
3015 emit_andimm(addr,0xfffffffc,temp2);
3016 emit_writeword(temp2,(int)&address);
3017
3018 save_regs(reglist);
97a238a6 3019#ifndef PCSX
b7918751 3020 ds=i_regs!=&regs[i];
3021 real_rs=get_reg(i_regmap,rs1[i]);
3022 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3023 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3024 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3025 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3026#endif
b7918751 3027 emit_shrimm(addr,16,1);
3028 int cc=get_reg(i_regmap,CCREG);
3029 if(cc<0) {
3030 emit_loadreg(CCREG,2);
3031 }
3032 emit_movimm((u_int)readmem,0);
3033 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3034#ifndef PCSX
3035 // pagefault address
3036 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3037#endif
b7918751 3038 emit_call((int)&indirect_jump_indexed);
3039 restore_regs(reglist);
3040
3041 emit_readword((int)&readmem_dword,temp2);
3042 int temp=addr; //hmh
3043 emit_shlimm(addr,3,temp);
3044 emit_andimm(temp,24,temp);
3045#ifdef BIG_ENDIAN_MIPS
3046 if (opcode[i]==0x2e) // SWR
3047#else
3048 if (opcode[i]==0x2a) // SWL
3049#endif
3050 emit_xorimm(temp,24,temp);
3051 emit_movimm(-1,HOST_TEMPREG);
55439448 3052 if (opcode[i]==0x2a) { // SWL
b7918751 3053 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3054 emit_orrshr(rt,temp,temp2);
3055 }else{
3056 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3057 emit_orrshl(rt,temp,temp2);
3058 }
3059 emit_readword((int)&address,addr);
3060 emit_writeword(temp2,(int)&word);
3061 //save_regs(reglist); // don't need to, no state changes
3062 emit_shrimm(addr,16,1);
3063 emit_movimm((u_int)writemem,0);
3064 //emit_call((int)&indirect_jump_indexed);
3065 emit_mov(15,14);
3066 emit_readword_dualindexedx4(0,1,15);
3067 emit_readword((int)&Count,HOST_TEMPREG);
3068 emit_readword((int)&next_interupt,2);
3069 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3070 emit_writeword(2,(int)&last_count);
3071 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3072 if(cc<0) {
3073 emit_storereg(CCREG,HOST_TEMPREG);
3074 }
3075 restore_regs(reglist);
57871462 3076 emit_jmp(stubs[n][2]); // return address
3077}
3078
3079void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3080{
3081 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3082}
3083
3084do_invstub(int n)
3085{
3086 literal_pool(20);
3087 u_int reglist=stubs[n][3];
3088 set_jump_target(stubs[n][1],(int)out);
3089 save_regs(reglist);
3090 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3091 emit_call((int)&invalidate_addr);
3092 restore_regs(reglist);
3093 emit_jmp(stubs[n][2]); // return address
3094}
3095
3096int do_dirty_stub(int i)
3097{
3098 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3099 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3100 #ifdef PCSX
3101 addr=(u_int)source;
3102 #endif
57871462 3103 // Careful about the code output here, verify_dirty needs to parse it.
3104 #ifdef ARMv5_ONLY
ac545b3a 3105 emit_loadlp(addr,1);
57871462 3106 emit_loadlp((int)copy,2);
3107 emit_loadlp(slen*4,3);
3108 #else
ac545b3a 3109 emit_movw(addr&0x0000FFFF,1);
57871462 3110 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3111 emit_movt(addr&0xFFFF0000,1);
57871462 3112 emit_movt(((u_int)copy)&0xFFFF0000,2);
3113 emit_movw(slen*4,3);
3114 #endif
3115 emit_movimm(start+i*4,0);
3116 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3117 int entry=(int)out;
3118 load_regs_entry(i);
3119 if(entry==(int)out) entry=instr_addr[i];
3120 emit_jmp(instr_addr[i]);
3121 return entry;
3122}
3123
3124void do_dirty_stub_ds()
3125{
3126 // Careful about the code output here, verify_dirty needs to parse it.
3127 #ifdef ARMv5_ONLY
3128 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3129 emit_loadlp((int)copy,2);
3130 emit_loadlp(slen*4,3);
3131 #else
3132 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3133 emit_movw(((u_int)copy)&0x0000FFFF,2);
3134 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3135 emit_movt(((u_int)copy)&0xFFFF0000,2);
3136 emit_movw(slen*4,3);
3137 #endif
3138 emit_movimm(start+1,0);
3139 emit_call((int)&verify_code_ds);
3140}
3141
3142do_cop1stub(int n)
3143{
3144 literal_pool(256);
3145 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3146 set_jump_target(stubs[n][1],(int)out);
3147 int i=stubs[n][3];
3d624f89 3148// int rs=stubs[n][4];
57871462 3149 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3150 int ds=stubs[n][6];
3151 if(!ds) {
3152 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3153 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3154 }
3155 //else {printf("fp exception in delay slot\n");}
3156 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3157 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3158 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3159 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3160 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3161}
3162
3163/* TLB */
3164
3165int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3166{
3167 if(c) {
3168 if((signed int)addr>=(signed int)0xC0000000) {
3169 // address_generation already loaded the const
3170 emit_readword_dualindexedx4(FP,map,map);
3171 }
3172 else
3173 return -1; // No mapping
3174 }
3175 else {
3176 assert(s!=map);
3177 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3178 emit_addsr12(map,s,map);
3179 // Schedule this while we wait on the load
3180 //if(x) emit_xorimm(s,x,ar);
3181 if(shift>=0) emit_shlimm(s,3,shift);
3182 if(~a) emit_andimm(s,a,ar);
3183 emit_readword_dualindexedx4(FP,map,map);
3184 }
3185 return map;
3186}
3187int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3188{
3189 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3190 emit_test(map,map);
3191 *jaddr=(int)out;
3192 emit_js(0);
3193 }
3194 return map;
3195}
3196
3197int gen_tlb_addr_r(int ar, int map) {
3198 if(map>=0) {
3199 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3200 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3201 }
3202}
3203
3204int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3205{
3206 if(c) {
3207 if(addr<0x80800000||addr>=0xC0000000) {
3208 // address_generation already loaded the const
3209 emit_readword_dualindexedx4(FP,map,map);
3210 }
3211 else
3212 return -1; // No mapping
3213 }
3214 else {
3215 assert(s!=map);
3216 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3217 emit_addsr12(map,s,map);
3218 // Schedule this while we wait on the load
3219 //if(x) emit_xorimm(s,x,ar);
3220 emit_readword_dualindexedx4(FP,map,map);
3221 }
3222 return map;
3223}
3224int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3225{
3226 if(!c||addr<0x80800000||addr>=0xC0000000) {
3227 emit_testimm(map,0x40000000);
3228 *jaddr=(int)out;
3229 emit_jne(0);
3230 }
3231}
3232
3233int gen_tlb_addr_w(int ar, int map) {
3234 if(map>=0) {
3235 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3236 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3237 }
3238}
3239
3240// Generate the address of the memory_map entry, relative to dynarec_local
3241generate_map_const(u_int addr,int reg) {
3242 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3243 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3244}
3245
3246/* Special assem */
3247
3248void shift_assemble_arm(int i,struct regstat *i_regs)
3249{
3250 if(rt1[i]) {
3251 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3252 {
3253 signed char s,t,shift;
3254 t=get_reg(i_regs->regmap,rt1[i]);
3255 s=get_reg(i_regs->regmap,rs1[i]);
3256 shift=get_reg(i_regs->regmap,rs2[i]);
3257 if(t>=0){
3258 if(rs1[i]==0)
3259 {
3260 emit_zeroreg(t);
3261 }
3262 else if(rs2[i]==0)
3263 {
3264 assert(s>=0);
3265 if(s!=t) emit_mov(s,t);
3266 }
3267 else
3268 {
3269 emit_andimm(shift,31,HOST_TEMPREG);
3270 if(opcode2[i]==4) // SLLV
3271 {
3272 emit_shl(s,HOST_TEMPREG,t);
3273 }
3274 if(opcode2[i]==6) // SRLV
3275 {
3276 emit_shr(s,HOST_TEMPREG,t);
3277 }
3278 if(opcode2[i]==7) // SRAV
3279 {
3280 emit_sar(s,HOST_TEMPREG,t);
3281 }
3282 }
3283 }
3284 } else { // DSLLV/DSRLV/DSRAV
3285 signed char sh,sl,th,tl,shift;
3286 th=get_reg(i_regs->regmap,rt1[i]|64);
3287 tl=get_reg(i_regs->regmap,rt1[i]);
3288 sh=get_reg(i_regs->regmap,rs1[i]|64);
3289 sl=get_reg(i_regs->regmap,rs1[i]);
3290 shift=get_reg(i_regs->regmap,rs2[i]);
3291 if(tl>=0){
3292 if(rs1[i]==0)
3293 {
3294 emit_zeroreg(tl);
3295 if(th>=0) emit_zeroreg(th);
3296 }
3297 else if(rs2[i]==0)
3298 {
3299 assert(sl>=0);
3300 if(sl!=tl) emit_mov(sl,tl);
3301 if(th>=0&&sh!=th) emit_mov(sh,th);
3302 }
3303 else
3304 {
3305 // FIXME: What if shift==tl ?
3306 assert(shift!=tl);
3307 int temp=get_reg(i_regs->regmap,-1);
3308 int real_th=th;
3309 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3310 assert(sl>=0);
3311 assert(sh>=0);
3312 emit_andimm(shift,31,HOST_TEMPREG);
3313 if(opcode2[i]==0x14) // DSLLV
3314 {
3315 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3316 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3317 emit_orrshr(sl,HOST_TEMPREG,th);
3318 emit_andimm(shift,31,HOST_TEMPREG);
3319 emit_testimm(shift,32);
3320 emit_shl(sl,HOST_TEMPREG,tl);
3321 if(th>=0) emit_cmovne_reg(tl,th);
3322 emit_cmovne_imm(0,tl);
3323 }
3324 if(opcode2[i]==0x16) // DSRLV
3325 {
3326 assert(th>=0);
3327 emit_shr(sl,HOST_TEMPREG,tl);
3328 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3329 emit_orrshl(sh,HOST_TEMPREG,tl);
3330 emit_andimm(shift,31,HOST_TEMPREG);
3331 emit_testimm(shift,32);
3332 emit_shr(sh,HOST_TEMPREG,th);
3333 emit_cmovne_reg(th,tl);
3334 if(real_th>=0) emit_cmovne_imm(0,th);
3335 }
3336 if(opcode2[i]==0x17) // DSRAV
3337 {
3338 assert(th>=0);
3339 emit_shr(sl,HOST_TEMPREG,tl);
3340 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3341 if(real_th>=0) {
3342 assert(temp>=0);
3343 emit_sarimm(th,31,temp);
3344 }
3345 emit_orrshl(sh,HOST_TEMPREG,tl);
3346 emit_andimm(shift,31,HOST_TEMPREG);
3347 emit_testimm(shift,32);
3348 emit_sar(sh,HOST_TEMPREG,th);
3349 emit_cmovne_reg(th,tl);
3350 if(real_th>=0) emit_cmovne_reg(temp,th);
3351 }
3352 }
3353 }
3354 }
3355 }
3356}
3357#define shift_assemble shift_assemble_arm
3358
3359void loadlr_assemble_arm(int i,struct regstat *i_regs)
3360{
3361 int s,th,tl,temp,temp2,addr,map=-1;
3362 int offset;
3363 int jaddr=0;
af4ee1fe 3364 int memtarget=0,c=0;
57871462 3365 u_int hr,reglist=0;
3366 th=get_reg(i_regs->regmap,rt1[i]|64);
3367 tl=get_reg(i_regs->regmap,rt1[i]);
3368 s=get_reg(i_regs->regmap,rs1[i]);
3369 temp=get_reg(i_regs->regmap,-1);
3370 temp2=get_reg(i_regs->regmap,FTEMP);
3371 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3372 assert(addr<0);
3373 offset=imm[i];
3374 for(hr=0;hr<HOST_REGS;hr++) {
3375 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3376 }
3377 reglist|=1<<temp;
3378 if(offset||s<0||c) addr=temp2;
3379 else addr=s;
3380 if(s>=0) {
3381 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3382 if(c) {
3383 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3384 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3385 }
57871462 3386 }
535d208a 3387 if(!using_tlb) {
3388 if(!c) {
3389 #ifdef RAM_OFFSET
3390 map=get_reg(i_regs->regmap,ROREG);
3391 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3392 #endif
3393 emit_shlimm(addr,3,temp);
3394 if (opcode[i]==0x22||opcode[i]==0x26) {
3395 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 3396 }else{
535d208a 3397 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 3398 }
535d208a 3399 emit_cmpimm(addr,RAM_SIZE);
3400 jaddr=(int)out;
3401 emit_jno(0);
3402 }
3403 else {
3404 if (opcode[i]==0x22||opcode[i]==0x26) {
3405 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3406 }else{
3407 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3408 }
57871462 3409 }
535d208a 3410 }else{ // using tlb
3411 int a;
3412 if(c) {
3413 a=-1;
3414 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3415 a=0xFFFFFFFC; // LWL/LWR
3416 }else{
3417 a=0xFFFFFFF8; // LDL/LDR
3418 }
3419 map=get_reg(i_regs->regmap,TLREG);
3420 assert(map>=0);
ea3d2e6e 3421 reglist&=~(1<<map);
535d208a 3422 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3423 if(c) {
3424 if (opcode[i]==0x22||opcode[i]==0x26) {
3425 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3426 }else{
3427 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3428 }
535d208a 3429 }
3430 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3431 }
3432 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3433 if(!c||memtarget) {
3434 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3435 emit_readword_indexed_tlb(0,temp2,map,temp2);
3436 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3437 }
3438 else
3439 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);