yet more random armv5 tweaks
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
57871462 4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
0bbd1454 69void invalidate_addr_r0();
70void invalidate_addr_r1();
71void invalidate_addr_r2();
72void invalidate_addr_r3();
73void invalidate_addr_r4();
74void invalidate_addr_r5();
75void invalidate_addr_r6();
76void invalidate_addr_r7();
77void invalidate_addr_r8();
78void invalidate_addr_r9();
79void invalidate_addr_r10();
80void invalidate_addr_r12();
81
82const u_int invalidate_addr_reg[16] = {
83 (int)invalidate_addr_r0,
84 (int)invalidate_addr_r1,
85 (int)invalidate_addr_r2,
86 (int)invalidate_addr_r3,
87 (int)invalidate_addr_r4,
88 (int)invalidate_addr_r5,
89 (int)invalidate_addr_r6,
90 (int)invalidate_addr_r7,
91 (int)invalidate_addr_r8,
92 (int)invalidate_addr_r9,
93 (int)invalidate_addr_r10,
94 0,
95 (int)invalidate_addr_r12,
96 0,
97 0,
98 0};
99
57871462 100#include "fpu.h"
101
dd3a91a1 102unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
103
57871462 104/* Linker */
105
106void set_jump_target(int addr,u_int target)
107{
108 u_char *ptr=(u_char *)addr;
109 u_int *ptr2=(u_int *)ptr;
110 if(ptr[3]==0xe2) {
111 assert((target-(u_int)ptr2-8)<1024);
112 assert((addr&3)==0);
113 assert((target&3)==0);
114 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
115 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
116 }
117 else if(ptr[3]==0x72) {
118 // generated by emit_jno_unlikely
119 if((target-(u_int)ptr2-8)<1024) {
120 assert((addr&3)==0);
121 assert((target&3)==0);
122 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
123 }
124 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
125 assert((addr&3)==0);
126 assert((target&3)==0);
127 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
128 }
129 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
130 }
131 else {
132 assert((ptr[3]&0x0e)==0xa);
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137// This optionally copies the instruction from the target of the branch into
138// the space before the branch. Works, but the difference in speed is
139// usually insignificant.
140void set_jump_target_fillslot(int addr,u_int target,int copy)
141{
142 u_char *ptr=(u_char *)addr;
143 u_int *ptr2=(u_int *)ptr;
144 assert(!copy||ptr2[-1]==0xe28dd000);
145 if(ptr[3]==0xe2) {
146 assert(!copy);
147 assert((target-(u_int)ptr2-8)<4096);
148 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
149 }
150 else {
151 assert((ptr[3]&0x0e)==0xa);
152 u_int target_insn=*(u_int *)target;
153 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
154 copy=0;
155 }
156 if((target_insn&0x0c100000)==0x04100000) { // Load
157 copy=0;
158 }
159 if(target_insn&0x08000000) {
160 copy=0;
161 }
162 if(copy) {
163 ptr2[-1]=target_insn;
164 target+=4;
165 }
166 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
167 }
168}
169
170/* Literal pool */
171add_literal(int addr,int val)
172{
15776b68 173 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 174 literals[literalcount][0]=addr;
175 literals[literalcount][1]=val;
176 literalcount++;
177}
178
f76eeef9 179void *kill_pointer(void *stub)
57871462 180{
181 int *ptr=(int *)(stub+4);
182 assert((*ptr&0x0ff00000)==0x05900000);
183 u_int offset=*ptr&0xfff;
184 int **l_ptr=(void *)ptr+offset+8;
185 int *i_ptr=*l_ptr;
186 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 187 return i_ptr;
57871462 188}
189
f968d35d 190// find where external branch is liked to using addr of it's stub:
191// get address that insn one after stub loads (dyna_linker arg1),
192// treat it as a pointer to branch insn,
193// return addr where that branch jumps to
57871462 194int get_pointer(void *stub)
195{
196 //printf("get_pointer(%x)\n",(int)stub);
197 int *ptr=(int *)(stub+4);
f968d35d 198 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 199 u_int offset=*ptr&0xfff;
200 int **l_ptr=(void *)ptr+offset+8;
201 int *i_ptr=*l_ptr;
202 assert((*i_ptr&0x0f000000)==0x0a000000);
203 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
204}
205
206// Find the "clean" entry point from a "dirty" entry point
207// by skipping past the call to verify_code
208u_int get_clean_addr(int addr)
209{
210 int *ptr=(int *)addr;
211 #ifdef ARMv5_ONLY
212 ptr+=4;
213 #else
214 ptr+=6;
215 #endif
216 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
217 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
218 ptr++;
219 if((*ptr&0xFF000000)==0xea000000) {
220 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
221 }
222 return (u_int)ptr;
223}
224
225int verify_dirty(int addr)
226{
227 u_int *ptr=(u_int *)addr;
228 #ifdef ARMv5_ONLY
229 // get from literal pool
15776b68 230 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 231 u_int offset=*ptr&0xfff;
232 u_int *l_ptr=(void *)ptr+offset+8;
233 u_int source=l_ptr[0];
234 u_int copy=l_ptr[1];
235 u_int len=l_ptr[2];
236 ptr+=4;
237 #else
238 // ARMv7 movw/movt
239 assert((*ptr&0xFFF00000)==0xe3000000);
240 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
241 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
242 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
243 ptr+=6;
244 #endif
245 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
246 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 247 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 248 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
249 unsigned int page=source>>12;
250 unsigned int map_value=memory_map[page];
251 if(map_value>=0x80000000) return 0;
252 while(page<((source+len-1)>>12)) {
253 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
254 }
255 source = source+(map_value<<2);
256 }
257 //printf("verify_dirty: %x %x %x\n",source,copy,len);
258 return !memcmp((void *)source,(void *)copy,len);
259}
260
261// This doesn't necessarily find all clean entry points, just
262// guarantees that it's not dirty
263int isclean(int addr)
264{
265 #ifdef ARMv5_ONLY
266 int *ptr=((u_int *)addr)+4;
267 #else
268 int *ptr=((u_int *)addr)+6;
269 #endif
270 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
271 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
272 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
273 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
274 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
275 return 1;
276}
277
278void get_bounds(int addr,u_int *start,u_int *end)
279{
280 u_int *ptr=(u_int *)addr;
281 #ifdef ARMv5_ONLY
282 // get from literal pool
15776b68 283 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 284 u_int offset=*ptr&0xfff;
285 u_int *l_ptr=(void *)ptr+offset+8;
286 u_int source=l_ptr[0];
287 //u_int copy=l_ptr[1];
288 u_int len=l_ptr[2];
289 ptr+=4;
290 #else
291 // ARMv7 movw/movt
292 assert((*ptr&0xFFF00000)==0xe3000000);
293 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
294 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
295 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
296 ptr+=6;
297 #endif
298 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
299 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 300 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 301 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
302 if(memory_map[source>>12]>=0x80000000) source = 0;
303 else source = source+(memory_map[source>>12]<<2);
304 }
305 *start=source;
306 *end=source+len;
307}
308
309/* Register allocation */
310
311// Note: registers are allocated clean (unmodified state)
312// if you intend to modify the register, you must call dirty_reg().
313void alloc_reg(struct regstat *cur,int i,signed char reg)
314{
315 int r,hr;
316 int preferred_reg = (reg&7);
317 if(reg==CCREG) preferred_reg=HOST_CCREG;
318 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
319
320 // Don't allocate unused registers
321 if((cur->u>>reg)&1) return;
322
323 // see if it's already allocated
324 for(hr=0;hr<HOST_REGS;hr++)
325 {
326 if(cur->regmap[hr]==reg) return;
327 }
328
329 // Keep the same mapping if the register was already allocated in a loop
330 preferred_reg = loop_reg(i,reg,preferred_reg);
331
332 // Try to allocate the preferred register
333 if(cur->regmap[preferred_reg]==-1) {
334 cur->regmap[preferred_reg]=reg;
335 cur->dirty&=~(1<<preferred_reg);
336 cur->isconst&=~(1<<preferred_reg);
337 return;
338 }
339 r=cur->regmap[preferred_reg];
340 if(r<64&&((cur->u>>r)&1)) {
341 cur->regmap[preferred_reg]=reg;
342 cur->dirty&=~(1<<preferred_reg);
343 cur->isconst&=~(1<<preferred_reg);
344 return;
345 }
346 if(r>=64&&((cur->uu>>(r&63))&1)) {
347 cur->regmap[preferred_reg]=reg;
348 cur->dirty&=~(1<<preferred_reg);
349 cur->isconst&=~(1<<preferred_reg);
350 return;
351 }
352
353 // Clear any unneeded registers
354 // We try to keep the mapping consistent, if possible, because it
355 // makes branches easier (especially loops). So we try to allocate
356 // first (see above) before removing old mappings. If this is not
357 // possible then go ahead and clear out the registers that are no
358 // longer needed.
359 for(hr=0;hr<HOST_REGS;hr++)
360 {
361 r=cur->regmap[hr];
362 if(r>=0) {
363 if(r<64) {
364 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
365 }
366 else
367 {
368 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
369 }
370 }
371 }
372 // Try to allocate any available register, but prefer
373 // registers that have not been used recently.
374 if(i>0) {
375 for(hr=0;hr<HOST_REGS;hr++) {
376 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
377 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
378 cur->regmap[hr]=reg;
379 cur->dirty&=~(1<<hr);
380 cur->isconst&=~(1<<hr);
381 return;
382 }
383 }
384 }
385 }
386 // Try to allocate any available register
387 for(hr=0;hr<HOST_REGS;hr++) {
388 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
389 cur->regmap[hr]=reg;
390 cur->dirty&=~(1<<hr);
391 cur->isconst&=~(1<<hr);
392 return;
393 }
394 }
395
396 // Ok, now we have to evict someone
397 // Pick a register we hopefully won't need soon
398 u_char hsn[MAXREG+1];
399 memset(hsn,10,sizeof(hsn));
400 int j;
401 lsn(hsn,i,&preferred_reg);
402 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
403 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
404 if(i>0) {
405 // Don't evict the cycle count at entry points, otherwise the entry
406 // stub will have to write it.
407 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
408 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
409 for(j=10;j>=3;j--)
410 {
411 // Alloc preferred register if available
412 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
413 for(hr=0;hr<HOST_REGS;hr++) {
414 // Evict both parts of a 64-bit register
415 if((cur->regmap[hr]&63)==r) {
416 cur->regmap[hr]=-1;
417 cur->dirty&=~(1<<hr);
418 cur->isconst&=~(1<<hr);
419 }
420 }
421 cur->regmap[preferred_reg]=reg;
422 return;
423 }
424 for(r=1;r<=MAXREG;r++)
425 {
426 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
427 for(hr=0;hr<HOST_REGS;hr++) {
428 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
429 if(cur->regmap[hr]==r+64) {
430 cur->regmap[hr]=reg;
431 cur->dirty&=~(1<<hr);
432 cur->isconst&=~(1<<hr);
433 return;
434 }
435 }
436 }
437 for(hr=0;hr<HOST_REGS;hr++) {
438 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
439 if(cur->regmap[hr]==r) {
440 cur->regmap[hr]=reg;
441 cur->dirty&=~(1<<hr);
442 cur->isconst&=~(1<<hr);
443 return;
444 }
445 }
446 }
447 }
448 }
449 }
450 }
451 for(j=10;j>=0;j--)
452 {
453 for(r=1;r<=MAXREG;r++)
454 {
455 if(hsn[r]==j) {
456 for(hr=0;hr<HOST_REGS;hr++) {
457 if(cur->regmap[hr]==r+64) {
458 cur->regmap[hr]=reg;
459 cur->dirty&=~(1<<hr);
460 cur->isconst&=~(1<<hr);
461 return;
462 }
463 }
464 for(hr=0;hr<HOST_REGS;hr++) {
465 if(cur->regmap[hr]==r) {
466 cur->regmap[hr]=reg;
467 cur->dirty&=~(1<<hr);
468 cur->isconst&=~(1<<hr);
469 return;
470 }
471 }
472 }
473 }
474 }
475 printf("This shouldn't happen (alloc_reg)");exit(1);
476}
477
478void alloc_reg64(struct regstat *cur,int i,signed char reg)
479{
480 int preferred_reg = 8+(reg&1);
481 int r,hr;
482
483 // allocate the lower 32 bits
484 alloc_reg(cur,i,reg);
485
486 // Don't allocate unused registers
487 if((cur->uu>>reg)&1) return;
488
489 // see if the upper half is already allocated
490 for(hr=0;hr<HOST_REGS;hr++)
491 {
492 if(cur->regmap[hr]==reg+64) return;
493 }
494
495 // Keep the same mapping if the register was already allocated in a loop
496 preferred_reg = loop_reg(i,reg,preferred_reg);
497
498 // Try to allocate the preferred register
499 if(cur->regmap[preferred_reg]==-1) {
500 cur->regmap[preferred_reg]=reg|64;
501 cur->dirty&=~(1<<preferred_reg);
502 cur->isconst&=~(1<<preferred_reg);
503 return;
504 }
505 r=cur->regmap[preferred_reg];
506 if(r<64&&((cur->u>>r)&1)) {
507 cur->regmap[preferred_reg]=reg|64;
508 cur->dirty&=~(1<<preferred_reg);
509 cur->isconst&=~(1<<preferred_reg);
510 return;
511 }
512 if(r>=64&&((cur->uu>>(r&63))&1)) {
513 cur->regmap[preferred_reg]=reg|64;
514 cur->dirty&=~(1<<preferred_reg);
515 cur->isconst&=~(1<<preferred_reg);
516 return;
517 }
518
519 // Clear any unneeded registers
520 // We try to keep the mapping consistent, if possible, because it
521 // makes branches easier (especially loops). So we try to allocate
522 // first (see above) before removing old mappings. If this is not
523 // possible then go ahead and clear out the registers that are no
524 // longer needed.
525 for(hr=HOST_REGS-1;hr>=0;hr--)
526 {
527 r=cur->regmap[hr];
528 if(r>=0) {
529 if(r<64) {
530 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
531 }
532 else
533 {
534 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
535 }
536 }
537 }
538 // Try to allocate any available register, but prefer
539 // registers that have not been used recently.
540 if(i>0) {
541 for(hr=0;hr<HOST_REGS;hr++) {
542 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
543 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
544 cur->regmap[hr]=reg|64;
545 cur->dirty&=~(1<<hr);
546 cur->isconst&=~(1<<hr);
547 return;
548 }
549 }
550 }
551 }
552 // Try to allocate any available register
553 for(hr=0;hr<HOST_REGS;hr++) {
554 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
555 cur->regmap[hr]=reg|64;
556 cur->dirty&=~(1<<hr);
557 cur->isconst&=~(1<<hr);
558 return;
559 }
560 }
561
562 // Ok, now we have to evict someone
563 // Pick a register we hopefully won't need soon
564 u_char hsn[MAXREG+1];
565 memset(hsn,10,sizeof(hsn));
566 int j;
567 lsn(hsn,i,&preferred_reg);
568 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
569 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
570 if(i>0) {
571 // Don't evict the cycle count at entry points, otherwise the entry
572 // stub will have to write it.
573 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
574 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
575 for(j=10;j>=3;j--)
576 {
577 // Alloc preferred register if available
578 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
579 for(hr=0;hr<HOST_REGS;hr++) {
580 // Evict both parts of a 64-bit register
581 if((cur->regmap[hr]&63)==r) {
582 cur->regmap[hr]=-1;
583 cur->dirty&=~(1<<hr);
584 cur->isconst&=~(1<<hr);
585 }
586 }
587 cur->regmap[preferred_reg]=reg|64;
588 return;
589 }
590 for(r=1;r<=MAXREG;r++)
591 {
592 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
593 for(hr=0;hr<HOST_REGS;hr++) {
594 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
595 if(cur->regmap[hr]==r+64) {
596 cur->regmap[hr]=reg|64;
597 cur->dirty&=~(1<<hr);
598 cur->isconst&=~(1<<hr);
599 return;
600 }
601 }
602 }
603 for(hr=0;hr<HOST_REGS;hr++) {
604 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
605 if(cur->regmap[hr]==r) {
606 cur->regmap[hr]=reg|64;
607 cur->dirty&=~(1<<hr);
608 cur->isconst&=~(1<<hr);
609 return;
610 }
611 }
612 }
613 }
614 }
615 }
616 }
617 for(j=10;j>=0;j--)
618 {
619 for(r=1;r<=MAXREG;r++)
620 {
621 if(hsn[r]==j) {
622 for(hr=0;hr<HOST_REGS;hr++) {
623 if(cur->regmap[hr]==r+64) {
624 cur->regmap[hr]=reg|64;
625 cur->dirty&=~(1<<hr);
626 cur->isconst&=~(1<<hr);
627 return;
628 }
629 }
630 for(hr=0;hr<HOST_REGS;hr++) {
631 if(cur->regmap[hr]==r) {
632 cur->regmap[hr]=reg|64;
633 cur->dirty&=~(1<<hr);
634 cur->isconst&=~(1<<hr);
635 return;
636 }
637 }
638 }
639 }
640 }
641 printf("This shouldn't happen");exit(1);
642}
643
644// Allocate a temporary register. This is done without regard to
645// dirty status or whether the register we request is on the unneeded list
646// Note: This will only allocate one register, even if called multiple times
647void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
648{
649 int r,hr;
650 int preferred_reg = -1;
651
652 // see if it's already allocated
653 for(hr=0;hr<HOST_REGS;hr++)
654 {
655 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
656 }
657
658 // Try to allocate any available register
659 for(hr=HOST_REGS-1;hr>=0;hr--) {
660 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
661 cur->regmap[hr]=reg;
662 cur->dirty&=~(1<<hr);
663 cur->isconst&=~(1<<hr);
664 return;
665 }
666 }
667
668 // Find an unneeded register
669 for(hr=HOST_REGS-1;hr>=0;hr--)
670 {
671 r=cur->regmap[hr];
672 if(r>=0) {
673 if(r<64) {
674 if((cur->u>>r)&1) {
675 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
676 cur->regmap[hr]=reg;
677 cur->dirty&=~(1<<hr);
678 cur->isconst&=~(1<<hr);
679 return;
680 }
681 }
682 }
683 else
684 {
685 if((cur->uu>>(r&63))&1) {
686 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
687 cur->regmap[hr]=reg;
688 cur->dirty&=~(1<<hr);
689 cur->isconst&=~(1<<hr);
690 return;
691 }
692 }
693 }
694 }
695 }
696
697 // Ok, now we have to evict someone
698 // Pick a register we hopefully won't need soon
699 // TODO: we might want to follow unconditional jumps here
700 // TODO: get rid of dupe code and make this into a function
701 u_char hsn[MAXREG+1];
702 memset(hsn,10,sizeof(hsn));
703 int j;
704 lsn(hsn,i,&preferred_reg);
705 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
706 if(i>0) {
707 // Don't evict the cycle count at entry points, otherwise the entry
708 // stub will have to write it.
709 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
710 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
711 for(j=10;j>=3;j--)
712 {
713 for(r=1;r<=MAXREG;r++)
714 {
715 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
716 for(hr=0;hr<HOST_REGS;hr++) {
717 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
718 if(cur->regmap[hr]==r+64) {
719 cur->regmap[hr]=reg;
720 cur->dirty&=~(1<<hr);
721 cur->isconst&=~(1<<hr);
722 return;
723 }
724 }
725 }
726 for(hr=0;hr<HOST_REGS;hr++) {
727 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
728 if(cur->regmap[hr]==r) {
729 cur->regmap[hr]=reg;
730 cur->dirty&=~(1<<hr);
731 cur->isconst&=~(1<<hr);
732 return;
733 }
734 }
735 }
736 }
737 }
738 }
739 }
740 for(j=10;j>=0;j--)
741 {
742 for(r=1;r<=MAXREG;r++)
743 {
744 if(hsn[r]==j) {
745 for(hr=0;hr<HOST_REGS;hr++) {
746 if(cur->regmap[hr]==r+64) {
747 cur->regmap[hr]=reg;
748 cur->dirty&=~(1<<hr);
749 cur->isconst&=~(1<<hr);
750 return;
751 }
752 }
753 for(hr=0;hr<HOST_REGS;hr++) {
754 if(cur->regmap[hr]==r) {
755 cur->regmap[hr]=reg;
756 cur->dirty&=~(1<<hr);
757 cur->isconst&=~(1<<hr);
758 return;
759 }
760 }
761 }
762 }
763 }
764 printf("This shouldn't happen");exit(1);
765}
766// Allocate a specific ARM register.
767void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
768{
769 int n;
f776eb14 770 int dirty=0;
57871462 771
772 // see if it's already allocated (and dealloc it)
773 for(n=0;n<HOST_REGS;n++)
774 {
f776eb14 775 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
776 dirty=(cur->dirty>>n)&1;
777 cur->regmap[n]=-1;
778 }
57871462 779 }
780
781 cur->regmap[hr]=reg;
782 cur->dirty&=~(1<<hr);
f776eb14 783 cur->dirty|=dirty<<hr;
57871462 784 cur->isconst&=~(1<<hr);
785}
786
787// Alloc cycle count into dedicated register
788alloc_cc(struct regstat *cur,int i)
789{
790 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
791}
792
793/* Special alloc */
794
795
796/* Assembler */
797
798char regname[16][4] = {
799 "r0",
800 "r1",
801 "r2",
802 "r3",
803 "r4",
804 "r5",
805 "r6",
806 "r7",
807 "r8",
808 "r9",
809 "r10",
810 "fp",
811 "r12",
812 "sp",
813 "lr",
814 "pc"};
815
816void output_byte(u_char byte)
817{
818 *(out++)=byte;
819}
820void output_modrm(u_char mod,u_char rm,u_char ext)
821{
822 assert(mod<4);
823 assert(rm<8);
824 assert(ext<8);
825 u_char byte=(mod<<6)|(ext<<3)|rm;
826 *(out++)=byte;
827}
828void output_sib(u_char scale,u_char index,u_char base)
829{
830 assert(scale<4);
831 assert(index<8);
832 assert(base<8);
833 u_char byte=(scale<<6)|(index<<3)|base;
834 *(out++)=byte;
835}
836void output_w32(u_int word)
837{
838 *((u_int *)out)=word;
839 out+=4;
840}
841u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
842{
843 assert(rd<16);
844 assert(rn<16);
845 assert(rm<16);
846 return((rn<<16)|(rd<<12)|rm);
847}
848u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
849{
850 assert(rd<16);
851 assert(rn<16);
852 assert(imm<256);
853 assert((shift&1)==0);
854 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
855}
856u_int genimm(u_int imm,u_int *encoded)
857{
c2e3bd42 858 *encoded=0;
859 if(imm==0) return 1;
57871462 860 int i=32;
861 while(i>0)
862 {
863 if(imm<256) {
864 *encoded=((i&30)<<7)|imm;
865 return 1;
866 }
867 imm=(imm>>2)|(imm<<30);i-=2;
868 }
869 return 0;
870}
cfbd3c6e 871void genimm_checked(u_int imm,u_int *encoded)
872{
873 u_int ret=genimm(imm,encoded);
874 assert(ret);
875}
57871462 876u_int genjmp(u_int addr)
877{
878 int offset=addr-(int)out-8;
e80343e2 879 if(offset<-33554432||offset>=33554432) {
880 if (addr>2) {
881 printf("genjmp: out of range: %08x\n", offset);
882 exit(1);
883 }
884 return 0;
885 }
57871462 886 return ((u_int)offset>>2)&0xffffff;
887}
888
889void emit_mov(int rs,int rt)
890{
891 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
892 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
893}
894
895void emit_movs(int rs,int rt)
896{
897 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
898 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
899}
900
901void emit_add(int rs1,int rs2,int rt)
902{
903 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
904 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
905}
906
907void emit_adds(int rs1,int rs2,int rt)
908{
909 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
910 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
911}
912
913void emit_adcs(int rs1,int rs2,int rt)
914{
915 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
916 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
917}
918
919void emit_sbc(int rs1,int rs2,int rt)
920{
921 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
922 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
923}
924
925void emit_sbcs(int rs1,int rs2,int rt)
926{
927 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
928 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
929}
930
931void emit_neg(int rs, int rt)
932{
933 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
934 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
935}
936
937void emit_negs(int rs, int rt)
938{
939 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
940 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
941}
942
943void emit_sub(int rs1,int rs2,int rt)
944{
945 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
946 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
947}
948
949void emit_subs(int rs1,int rs2,int rt)
950{
951 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
952 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
953}
954
955void emit_zeroreg(int rt)
956{
957 assem_debug("mov %s,#0\n",regname[rt]);
958 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
959}
960
790ee18e 961void emit_loadlp(u_int imm,u_int rt)
962{
963 add_literal((int)out,imm);
964 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
965 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
966}
967void emit_movw(u_int imm,u_int rt)
968{
969 assert(imm<65536);
970 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
971 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
972}
973void emit_movt(u_int imm,u_int rt)
974{
975 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
976 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
977}
978void emit_movimm(u_int imm,u_int rt)
979{
980 u_int armval;
981 if(genimm(imm,&armval)) {
982 assem_debug("mov %s,#%d\n",regname[rt],imm);
983 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
984 }else if(genimm(~imm,&armval)) {
985 assem_debug("mvn %s,#%d\n",regname[rt],imm);
986 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
987 }else if(imm<65536) {
988 #ifdef ARMv5_ONLY
989 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
990 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
991 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
992 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
993 #else
994 emit_movw(imm,rt);
995 #endif
996 }else{
997 #ifdef ARMv5_ONLY
998 emit_loadlp(imm,rt);
999 #else
1000 emit_movw(imm&0x0000FFFF,rt);
1001 emit_movt(imm&0xFFFF0000,rt);
1002 #endif
1003 }
1004}
1005void emit_pcreladdr(u_int rt)
1006{
1007 assem_debug("add %s,pc,#?\n",regname[rt]);
1008 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1009}
1010
57871462 1011void emit_loadreg(int r, int hr)
1012{
3d624f89 1013#ifdef FORCE32
1014 if(r&64) {
1015 printf("64bit load in 32bit mode!\n");
7f2607ea 1016 assert(0);
1017 return;
3d624f89 1018 }
1019#endif
57871462 1020 if((r&63)==0)
1021 emit_zeroreg(hr);
1022 else {
3d624f89 1023 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1024 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1025 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1026 if(r==CCREG) addr=(int)&cycle_count;
1027 if(r==CSREG) addr=(int)&Status;
1028 if(r==FSREG) addr=(int)&FCR31;
1029 if(r==INVCP) addr=(int)&invc_ptr;
1030 u_int offset = addr-(u_int)&dynarec_local;
1031 assert(offset<4096);
1032 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1033 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1034 }
1035}
1036void emit_storereg(int r, int hr)
1037{
3d624f89 1038#ifdef FORCE32
1039 if(r&64) {
1040 printf("64bit store in 32bit mode!\n");
7f2607ea 1041 assert(0);
1042 return;
3d624f89 1043 }
1044#endif
1045 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1046 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1047 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1048 if(r==CCREG) addr=(int)&cycle_count;
1049 if(r==FSREG) addr=(int)&FCR31;
1050 u_int offset = addr-(u_int)&dynarec_local;
1051 assert(offset<4096);
1052 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1053 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1054}
1055
1056void emit_test(int rs, int rt)
1057{
1058 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1059 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1060}
1061
1062void emit_testimm(int rs,int imm)
1063{
1064 u_int armval;
5a05d80c 1065 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1066 genimm_checked(imm,&armval);
57871462 1067 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1068}
1069
b9b61529 1070void emit_testeqimm(int rs,int imm)
1071{
1072 u_int armval;
1073 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1074 genimm_checked(imm,&armval);
b9b61529 1075 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1076}
1077
57871462 1078void emit_not(int rs,int rt)
1079{
1080 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1081 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1082}
1083
b9b61529 1084void emit_mvnmi(int rs,int rt)
1085{
1086 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1087 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1088}
1089
57871462 1090void emit_and(u_int rs1,u_int rs2,u_int rt)
1091{
1092 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1093 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1094}
1095
1096void emit_or(u_int rs1,u_int rs2,u_int rt)
1097{
1098 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1099 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1100}
1101void emit_or_and_set_flags(int rs1,int rs2,int rt)
1102{
1103 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1104 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1105}
1106
f70d384d 1107void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1108{
1109 assert(rs<16);
1110 assert(rt<16);
1111 assert(imm<32);
1112 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1113 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1114}
1115
576bbd8f 1116void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1117{
1118 assert(rs<16);
1119 assert(rt<16);
1120 assert(imm<32);
1121 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1122 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1123}
1124
57871462 1125void emit_xor(u_int rs1,u_int rs2,u_int rt)
1126{
1127 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1128 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1129}
1130
57871462 1131void emit_addimm(u_int rs,int imm,u_int rt)
1132{
1133 assert(rs<16);
1134 assert(rt<16);
1135 if(imm!=0) {
1136 assert(imm>-65536&&imm<65536);
1137 u_int armval;
1138 if(genimm(imm,&armval)) {
1139 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1140 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1141 }else if(genimm(-imm,&armval)) {
1142 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1143 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1144 }else if(imm<0) {
1145 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1146 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1147 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1148 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1149 }else{
1150 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1151 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1152 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1153 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1154 }
1155 }
1156 else if(rs!=rt) emit_mov(rs,rt);
1157}
1158
1159void emit_addimm_and_set_flags(int imm,int rt)
1160{
1161 assert(imm>-65536&&imm<65536);
1162 u_int armval;
1163 if(genimm(imm,&armval)) {
1164 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1165 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1166 }else if(genimm(-imm,&armval)) {
1167 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1168 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1169 }else if(imm<0) {
1170 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1171 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1172 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1173 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1174 }else{
1175 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1176 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1177 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1178 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1179 }
1180}
1181void emit_addimm_no_flags(u_int imm,u_int rt)
1182{
1183 emit_addimm(rt,imm,rt);
1184}
1185
1186void emit_addnop(u_int r)
1187{
1188 assert(r<16);
1189 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1190 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1191}
1192
1193void emit_adcimm(u_int rs,int imm,u_int rt)
1194{
1195 u_int armval;
cfbd3c6e 1196 genimm_checked(imm,&armval);
57871462 1197 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1198 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1199}
1200/*void emit_sbcimm(int imm,u_int rt)
1201{
1202 u_int armval;
cfbd3c6e 1203 genimm_checked(imm,&armval);
57871462 1204 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1205 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1206}*/
1207void emit_sbbimm(int imm,u_int rt)
1208{
1209 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1210 assert(rt<8);
1211 if(imm<128&&imm>=-128) {
1212 output_byte(0x83);
1213 output_modrm(3,rt,3);
1214 output_byte(imm);
1215 }
1216 else
1217 {
1218 output_byte(0x81);
1219 output_modrm(3,rt,3);
1220 output_w32(imm);
1221 }
1222}
1223void emit_rscimm(int rs,int imm,u_int rt)
1224{
1225 assert(0);
1226 u_int armval;
cfbd3c6e 1227 genimm_checked(imm,&armval);
57871462 1228 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1229 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1230}
1231
1232void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1233{
1234 // TODO: if(genimm(imm,&armval)) ...
1235 // else
1236 emit_movimm(imm,HOST_TEMPREG);
1237 emit_adds(HOST_TEMPREG,rsl,rtl);
1238 emit_adcimm(rsh,0,rth);
1239}
1240
1241void emit_sbb(int rs1,int rs2)
1242{
1243 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1244 output_byte(0x19);
1245 output_modrm(3,rs1,rs2);
1246}
1247
1248void emit_andimm(int rs,int imm,int rt)
1249{
1250 u_int armval;
790ee18e 1251 if(imm==0) {
1252 emit_zeroreg(rt);
1253 }else if(genimm(imm,&armval)) {
57871462 1254 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1255 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1256 }else if(genimm(~imm,&armval)) {
1257 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1258 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1259 }else if(imm==65535) {
1260 #ifdef ARMv5_ONLY
1261 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1262 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1263 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1264 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1265 #else
1266 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1267 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1268 #endif
1269 }else{
1270 assert(imm>0&&imm<65535);
1271 #ifdef ARMv5_ONLY
1272 assem_debug("mov r14,#%d\n",imm&0xFF00);
1273 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1274 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1275 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1276 #else
1277 emit_movw(imm,HOST_TEMPREG);
1278 #endif
1279 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1280 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1281 }
1282}
1283
1284void emit_orimm(int rs,int imm,int rt)
1285{
1286 u_int armval;
790ee18e 1287 if(imm==0) {
1288 if(rs!=rt) emit_mov(rs,rt);
1289 }else if(genimm(imm,&armval)) {
57871462 1290 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1291 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1292 }else{
1293 assert(imm>0&&imm<65536);
1294 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1295 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1296 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1297 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1298 }
1299}
1300
1301void emit_xorimm(int rs,int imm,int rt)
1302{
57871462 1303 u_int armval;
790ee18e 1304 if(imm==0) {
1305 if(rs!=rt) emit_mov(rs,rt);
1306 }else if(genimm(imm,&armval)) {
57871462 1307 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1308 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1309 }else{
514ed0d9 1310 assert(imm>0&&imm<65536);
57871462 1311 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1312 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1313 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1314 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1315 }
1316}
1317
1318void emit_shlimm(int rs,u_int imm,int rt)
1319{
1320 assert(imm>0);
1321 assert(imm<32);
1322 //if(imm==1) ...
1323 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1324 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1325}
1326
1327void emit_shrimm(int rs,u_int imm,int rt)
1328{
1329 assert(imm>0);
1330 assert(imm<32);
1331 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1332 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1333}
1334
1335void emit_sarimm(int rs,u_int imm,int rt)
1336{
1337 assert(imm>0);
1338 assert(imm<32);
1339 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1340 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1341}
1342
1343void emit_rorimm(int rs,u_int imm,int rt)
1344{
1345 assert(imm>0);
1346 assert(imm<32);
1347 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1348 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1349}
1350
1351void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1352{
1353 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1354 assert(imm>0);
1355 assert(imm<32);
1356 //if(imm==1) ...
1357 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1358 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1359 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1360 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1361}
1362
1363void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1364{
1365 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1366 assert(imm>0);
1367 assert(imm<32);
1368 //if(imm==1) ...
1369 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1370 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1371 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1372 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1373}
1374
b9b61529 1375void emit_signextend16(int rs,int rt)
1376{
1377 #ifdef ARMv5_ONLY
1378 emit_shlimm(rs,16,rt);
1379 emit_sarimm(rt,16,rt);
1380 #else
1381 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1382 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1383 #endif
1384}
1385
57871462 1386void emit_shl(u_int rs,u_int shift,u_int rt)
1387{
1388 assert(rs<16);
1389 assert(rt<16);
1390 assert(shift<16);
1391 //if(imm==1) ...
1392 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1393 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1394}
1395void emit_shr(u_int rs,u_int shift,u_int rt)
1396{
1397 assert(rs<16);
1398 assert(rt<16);
1399 assert(shift<16);
1400 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1401 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1402}
1403void emit_sar(u_int rs,u_int shift,u_int rt)
1404{
1405 assert(rs<16);
1406 assert(rt<16);
1407 assert(shift<16);
1408 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1409 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1410}
1411void emit_shlcl(int r)
1412{
1413 assem_debug("shl %%%s,%%cl\n",regname[r]);
1414 assert(0);
1415}
1416void emit_shrcl(int r)
1417{
1418 assem_debug("shr %%%s,%%cl\n",regname[r]);
1419 assert(0);
1420}
1421void emit_sarcl(int r)
1422{
1423 assem_debug("sar %%%s,%%cl\n",regname[r]);
1424 assert(0);
1425}
1426
1427void emit_shldcl(int r1,int r2)
1428{
1429 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1430 assert(0);
1431}
1432void emit_shrdcl(int r1,int r2)
1433{
1434 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1435 assert(0);
1436}
1437void emit_orrshl(u_int rs,u_int shift,u_int rt)
1438{
1439 assert(rs<16);
1440 assert(rt<16);
1441 assert(shift<16);
1442 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1443 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1444}
1445void emit_orrshr(u_int rs,u_int shift,u_int rt)
1446{
1447 assert(rs<16);
1448 assert(rt<16);
1449 assert(shift<16);
1450 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1451 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1452}
1453
1454void emit_cmpimm(int rs,int imm)
1455{
1456 u_int armval;
1457 if(genimm(imm,&armval)) {
5a05d80c 1458 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1459 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1460 }else if(genimm(-imm,&armval)) {
5a05d80c 1461 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1462 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1463 }else if(imm>0) {
1464 assert(imm<65536);
1465 #ifdef ARMv5_ONLY
1466 emit_movimm(imm,HOST_TEMPREG);
1467 #else
1468 emit_movw(imm,HOST_TEMPREG);
1469 #endif
1470 assem_debug("cmp %s,r14\n",regname[rs]);
1471 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1472 }else{
1473 assert(imm>-65536);
1474 #ifdef ARMv5_ONLY
1475 emit_movimm(-imm,HOST_TEMPREG);
1476 #else
1477 emit_movw(-imm,HOST_TEMPREG);
1478 #endif
1479 assem_debug("cmn %s,r14\n",regname[rs]);
1480 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1481 }
1482}
1483
1484void emit_cmovne(u_int *addr,int rt)
1485{
1486 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1487 assert(0);
1488}
1489void emit_cmovl(u_int *addr,int rt)
1490{
1491 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1492 assert(0);
1493}
1494void emit_cmovs(u_int *addr,int rt)
1495{
1496 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1497 assert(0);
1498}
1499void emit_cmovne_imm(int imm,int rt)
1500{
1501 assem_debug("movne %s,#%d\n",regname[rt],imm);
1502 u_int armval;
cfbd3c6e 1503 genimm_checked(imm,&armval);
57871462 1504 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1505}
1506void emit_cmovl_imm(int imm,int rt)
1507{
1508 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1509 u_int armval;
cfbd3c6e 1510 genimm_checked(imm,&armval);
57871462 1511 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1512}
1513void emit_cmovb_imm(int imm,int rt)
1514{
1515 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1516 u_int armval;
cfbd3c6e 1517 genimm_checked(imm,&armval);
57871462 1518 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1519}
1520void emit_cmovs_imm(int imm,int rt)
1521{
1522 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1523 u_int armval;
cfbd3c6e 1524 genimm_checked(imm,&armval);
57871462 1525 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1526}
1527void emit_cmove_reg(int rs,int rt)
1528{
1529 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1530 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1531}
1532void emit_cmovne_reg(int rs,int rt)
1533{
1534 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1535 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1536}
1537void emit_cmovl_reg(int rs,int rt)
1538{
1539 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1540 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1541}
1542void emit_cmovs_reg(int rs,int rt)
1543{
1544 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1545 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1546}
1547
1548void emit_slti32(int rs,int imm,int rt)
1549{
1550 if(rs!=rt) emit_zeroreg(rt);
1551 emit_cmpimm(rs,imm);
1552 if(rs==rt) emit_movimm(0,rt);
1553 emit_cmovl_imm(1,rt);
1554}
1555void emit_sltiu32(int rs,int imm,int rt)
1556{
1557 if(rs!=rt) emit_zeroreg(rt);
1558 emit_cmpimm(rs,imm);
1559 if(rs==rt) emit_movimm(0,rt);
1560 emit_cmovb_imm(1,rt);
1561}
1562void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1563{
1564 assert(rsh!=rt);
1565 emit_slti32(rsl,imm,rt);
1566 if(imm>=0)
1567 {
1568 emit_test(rsh,rsh);
1569 emit_cmovne_imm(0,rt);
1570 emit_cmovs_imm(1,rt);
1571 }
1572 else
1573 {
1574 emit_cmpimm(rsh,-1);
1575 emit_cmovne_imm(0,rt);
1576 emit_cmovl_imm(1,rt);
1577 }
1578}
1579void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1580{
1581 assert(rsh!=rt);
1582 emit_sltiu32(rsl,imm,rt);
1583 if(imm>=0)
1584 {
1585 emit_test(rsh,rsh);
1586 emit_cmovne_imm(0,rt);
1587 }
1588 else
1589 {
1590 emit_cmpimm(rsh,-1);
1591 emit_cmovne_imm(1,rt);
1592 }
1593}
1594
1595void emit_cmp(int rs,int rt)
1596{
1597 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1598 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1599}
1600void emit_set_gz32(int rs, int rt)
1601{
1602 //assem_debug("set_gz32\n");
1603 emit_cmpimm(rs,1);
1604 emit_movimm(1,rt);
1605 emit_cmovl_imm(0,rt);
1606}
1607void emit_set_nz32(int rs, int rt)
1608{
1609 //assem_debug("set_nz32\n");
1610 if(rs!=rt) emit_movs(rs,rt);
1611 else emit_test(rs,rs);
1612 emit_cmovne_imm(1,rt);
1613}
1614void emit_set_gz64_32(int rsh, int rsl, int rt)
1615{
1616 //assem_debug("set_gz64\n");
1617 emit_set_gz32(rsl,rt);
1618 emit_test(rsh,rsh);
1619 emit_cmovne_imm(1,rt);
1620 emit_cmovs_imm(0,rt);
1621}
1622void emit_set_nz64_32(int rsh, int rsl, int rt)
1623{
1624 //assem_debug("set_nz64\n");
1625 emit_or_and_set_flags(rsh,rsl,rt);
1626 emit_cmovne_imm(1,rt);
1627}
1628void emit_set_if_less32(int rs1, int rs2, int rt)
1629{
1630 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1631 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1632 emit_cmp(rs1,rs2);
1633 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1634 emit_cmovl_imm(1,rt);
1635}
1636void emit_set_if_carry32(int rs1, int rs2, int rt)
1637{
1638 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1639 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1640 emit_cmp(rs1,rs2);
1641 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1642 emit_cmovb_imm(1,rt);
1643}
1644void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1645{
1646 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1647 assert(u1!=rt);
1648 assert(u2!=rt);
1649 emit_cmp(l1,l2);
1650 emit_movimm(0,rt);
1651 emit_sbcs(u1,u2,HOST_TEMPREG);
1652 emit_cmovl_imm(1,rt);
1653}
1654void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1655{
1656 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1657 assert(u1!=rt);
1658 assert(u2!=rt);
1659 emit_cmp(l1,l2);
1660 emit_movimm(0,rt);
1661 emit_sbcs(u1,u2,HOST_TEMPREG);
1662 emit_cmovb_imm(1,rt);
1663}
1664
1665void emit_call(int a)
1666{
1667 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1668 u_int offset=genjmp(a);
1669 output_w32(0xeb000000|offset);
1670}
1671void emit_jmp(int a)
1672{
1673 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1674 u_int offset=genjmp(a);
1675 output_w32(0xea000000|offset);
1676}
1677void emit_jne(int a)
1678{
1679 assem_debug("bne %x\n",a);
1680 u_int offset=genjmp(a);
1681 output_w32(0x1a000000|offset);
1682}
1683void emit_jeq(int a)
1684{
1685 assem_debug("beq %x\n",a);
1686 u_int offset=genjmp(a);
1687 output_w32(0x0a000000|offset);
1688}
1689void emit_js(int a)
1690{
1691 assem_debug("bmi %x\n",a);
1692 u_int offset=genjmp(a);
1693 output_w32(0x4a000000|offset);
1694}
1695void emit_jns(int a)
1696{
1697 assem_debug("bpl %x\n",a);
1698 u_int offset=genjmp(a);
1699 output_w32(0x5a000000|offset);
1700}
1701void emit_jl(int a)
1702{
1703 assem_debug("blt %x\n",a);
1704 u_int offset=genjmp(a);
1705 output_w32(0xba000000|offset);
1706}
1707void emit_jge(int a)
1708{
1709 assem_debug("bge %x\n",a);
1710 u_int offset=genjmp(a);
1711 output_w32(0xaa000000|offset);
1712}
1713void emit_jno(int a)
1714{
1715 assem_debug("bvc %x\n",a);
1716 u_int offset=genjmp(a);
1717 output_w32(0x7a000000|offset);
1718}
1719void emit_jc(int a)
1720{
1721 assem_debug("bcs %x\n",a);
1722 u_int offset=genjmp(a);
1723 output_w32(0x2a000000|offset);
1724}
1725void emit_jcc(int a)
1726{
1727 assem_debug("bcc %x\n",a);
1728 u_int offset=genjmp(a);
1729 output_w32(0x3a000000|offset);
1730}
1731
1732void emit_pushimm(int imm)
1733{
1734 assem_debug("push $%x\n",imm);
1735 assert(0);
1736}
1737void emit_pusha()
1738{
1739 assem_debug("pusha\n");
1740 assert(0);
1741}
1742void emit_popa()
1743{
1744 assem_debug("popa\n");
1745 assert(0);
1746}
1747void emit_pushreg(u_int r)
1748{
1749 assem_debug("push %%%s\n",regname[r]);
1750 assert(0);
1751}
1752void emit_popreg(u_int r)
1753{
1754 assem_debug("pop %%%s\n",regname[r]);
1755 assert(0);
1756}
1757void emit_callreg(u_int r)
1758{
1759 assem_debug("call *%%%s\n",regname[r]);
1760 assert(0);
1761}
1762void emit_jmpreg(u_int r)
1763{
1764 assem_debug("mov pc,%s\n",regname[r]);
1765 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1766}
1767
1768void emit_readword_indexed(int offset, int rs, int rt)
1769{
1770 assert(offset>-4096&&offset<4096);
1771 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1772 if(offset>=0) {
1773 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1774 }else{
1775 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1776 }
1777}
1778void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1779{
1780 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1781 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1782}
1783void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1784{
1785 if(map<0) emit_readword_indexed(addr, rs, rt);
1786 else {
1787 assert(addr==0);
1788 emit_readword_dualindexedx4(rs, map, rt);
1789 }
1790}
1791void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1792{
1793 if(map<0) {
1794 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1795 emit_readword_indexed(addr+4, rs, rl);
1796 }else{
1797 assert(rh!=rs);
1798 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1799 emit_addimm(map,1,map);
1800 emit_readword_indexed_tlb(addr, rs, map, rl);
1801 }
1802}
1803void emit_movsbl_indexed(int offset, int rs, int rt)
1804{
1805 assert(offset>-256&&offset<256);
1806 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1807 if(offset>=0) {
1808 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1809 }else{
1810 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1811 }
1812}
1813void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1814{
1815 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1816 else {
1817 if(addr==0) {
1818 emit_shlimm(map,2,map);
1819 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1820 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1821 }else{
1822 assert(addr>-256&&addr<256);
1823 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1824 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1825 emit_movsbl_indexed(addr, rt, rt);
1826 }
1827 }
1828}
1829void emit_movswl_indexed(int offset, int rs, int rt)
1830{
1831 assert(offset>-256&&offset<256);
1832 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1833 if(offset>=0) {
1834 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1835 }else{
1836 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1837 }
1838}
1839void emit_movzbl_indexed(int offset, int rs, int rt)
1840{
1841 assert(offset>-4096&&offset<4096);
1842 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1843 if(offset>=0) {
1844 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1845 }else{
1846 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1847 }
1848}
1849void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1850{
1851 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1852 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1853}
1854void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1855{
1856 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1857 else {
1858 if(addr==0) {
1859 emit_movzbl_dualindexedx4(rs, map, rt);
1860 }else{
1861 emit_addimm(rs,addr,rt);
1862 emit_movzbl_dualindexedx4(rt, map, rt);
1863 }
1864 }
1865}
1866void emit_movzwl_indexed(int offset, int rs, int rt)
1867{
1868 assert(offset>-256&&offset<256);
1869 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1870 if(offset>=0) {
1871 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1872 }else{
1873 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1874 }
1875}
1876void emit_readword(int addr, int rt)
1877{
1878 u_int offset = addr-(u_int)&dynarec_local;
1879 assert(offset<4096);
1880 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1881 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1882}
1883void emit_movsbl(int addr, int rt)
1884{
1885 u_int offset = addr-(u_int)&dynarec_local;
1886 assert(offset<256);
1887 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1888 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1889}
1890void emit_movswl(int addr, int rt)
1891{
1892 u_int offset = addr-(u_int)&dynarec_local;
1893 assert(offset<256);
1894 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1895 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1896}
1897void emit_movzbl(int addr, int rt)
1898{
1899 u_int offset = addr-(u_int)&dynarec_local;
1900 assert(offset<4096);
1901 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1902 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1903}
1904void emit_movzwl(int addr, int rt)
1905{
1906 u_int offset = addr-(u_int)&dynarec_local;
1907 assert(offset<256);
1908 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1909 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1910}
1911void emit_movzwl_reg(int rs, int rt)
1912{
1913 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1914 assert(0);
1915}
1916
1917void emit_xchg(int rs, int rt)
1918{
1919 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1920 assert(0);
1921}
1922void emit_writeword_indexed(int rt, int offset, int rs)
1923{
1924 assert(offset>-4096&&offset<4096);
1925 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1926 if(offset>=0) {
1927 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1928 }else{
1929 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1930 }
1931}
1932void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1933{
1934 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1935 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1936}
1937void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1938{
1939 if(map<0) emit_writeword_indexed(rt, addr, rs);
1940 else {
1941 assert(addr==0);
1942 emit_writeword_dualindexedx4(rt, rs, map);
1943 }
1944}
1945void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1946{
1947 if(map<0) {
1948 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1949 emit_writeword_indexed(rl, addr+4, rs);
1950 }else{
1951 assert(rh>=0);
1952 if(temp!=rs) emit_addimm(map,1,temp);
1953 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1954 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1955 else {
1956 emit_addimm(rs,4,rs);
1957 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1958 }
1959 }
1960}
1961void emit_writehword_indexed(int rt, int offset, int rs)
1962{
1963 assert(offset>-256&&offset<256);
1964 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1965 if(offset>=0) {
1966 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1967 }else{
1968 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1969 }
1970}
1971void emit_writebyte_indexed(int rt, int offset, int rs)
1972{
1973 assert(offset>-4096&&offset<4096);
1974 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1975 if(offset>=0) {
1976 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1977 }else{
1978 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1979 }
1980}
1981void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1982{
1983 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1984 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1985}
1986void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1987{
1988 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1989 else {
1990 if(addr==0) {
1991 emit_writebyte_dualindexedx4(rt, rs, map);
1992 }else{
1993 emit_addimm(rs,addr,temp);
1994 emit_writebyte_dualindexedx4(rt, temp, map);
1995 }
1996 }
1997}
1998void emit_writeword(int rt, int addr)
1999{
2000 u_int offset = addr-(u_int)&dynarec_local;
2001 assert(offset<4096);
2002 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2003 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2004}
2005void emit_writehword(int rt, int addr)
2006{
2007 u_int offset = addr-(u_int)&dynarec_local;
2008 assert(offset<256);
2009 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2010 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2011}
2012void emit_writebyte(int rt, int addr)
2013{
2014 u_int offset = addr-(u_int)&dynarec_local;
2015 assert(offset<4096);
74426039 2016 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2017 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2018}
2019void emit_writeword_imm(int imm, int addr)
2020{
2021 assem_debug("movl $%x,%x\n",imm,addr);
2022 assert(0);
2023}
2024void emit_writebyte_imm(int imm, int addr)
2025{
2026 assem_debug("movb $%x,%x\n",imm,addr);
2027 assert(0);
2028}
2029
2030void emit_mul(int rs)
2031{
2032 assem_debug("mul %%%s\n",regname[rs]);
2033 assert(0);
2034}
2035void emit_imul(int rs)
2036{
2037 assem_debug("imul %%%s\n",regname[rs]);
2038 assert(0);
2039}
2040void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2041{
2042 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2043 assert(rs1<16);
2044 assert(rs2<16);
2045 assert(hi<16);
2046 assert(lo<16);
2047 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2048}
2049void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2050{
2051 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2052 assert(rs1<16);
2053 assert(rs2<16);
2054 assert(hi<16);
2055 assert(lo<16);
2056 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2057}
2058
2059void emit_div(int rs)
2060{
2061 assem_debug("div %%%s\n",regname[rs]);
2062 assert(0);
2063}
2064void emit_idiv(int rs)
2065{
2066 assem_debug("idiv %%%s\n",regname[rs]);
2067 assert(0);
2068}
2069void emit_cdq()
2070{
2071 assem_debug("cdq\n");
2072 assert(0);
2073}
2074
2075void emit_clz(int rs,int rt)
2076{
2077 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2078 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2079}
2080
2081void emit_subcs(int rs1,int rs2,int rt)
2082{
2083 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2084 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2085}
2086
2087void emit_shrcc_imm(int rs,u_int imm,int rt)
2088{
2089 assert(imm>0);
2090 assert(imm<32);
2091 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2092 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2093}
2094
2095void emit_negmi(int rs, int rt)
2096{
2097 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2098 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2099}
2100
2101void emit_negsmi(int rs, int rt)
2102{
2103 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2104 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2105}
2106
2107void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2108{
2109 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2110 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2111}
2112
2113void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2114{
2115 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2116 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2117}
2118
2119void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2120{
2121 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2122 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2123}
2124
2125void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2126{
2127 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2128 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2129}
2130
2131void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2132{
2133 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2134 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2135}
2136
2137void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2138{
2139 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2140 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2141}
2142
2143void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2144{
2145 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2146 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2147}
2148
2149void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2150{
2151 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2152 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2153}
2154
2155void emit_teq(int rs, int rt)
2156{
2157 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2158 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2159}
2160
2161void emit_rsbimm(int rs, int imm, int rt)
2162{
2163 u_int armval;
cfbd3c6e 2164 genimm_checked(imm,&armval);
57871462 2165 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2166 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2167}
2168
2169// Load 2 immediates optimizing for small code size
2170void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2171{
2172 emit_movimm(imm1,rt1);
2173 u_int armval;
2174 if(genimm(imm2-imm1,&armval)) {
2175 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2176 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2177 }else if(genimm(imm1-imm2,&armval)) {
2178 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2179 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2180 }
2181 else emit_movimm(imm2,rt2);
2182}
2183
2184// Conditionally select one of two immediates, optimizing for small code size
2185// This will only be called if HAVE_CMOV_IMM is defined
2186void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2187{
2188 u_int armval;
2189 if(genimm(imm2-imm1,&armval)) {
2190 emit_movimm(imm1,rt);
2191 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2192 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2193 }else if(genimm(imm1-imm2,&armval)) {
2194 emit_movimm(imm1,rt);
2195 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2196 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2197 }
2198 else {
2199 #ifdef ARMv5_ONLY
2200 emit_movimm(imm1,rt);
2201 add_literal((int)out,imm2);
2202 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2203 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2204 #else
2205 emit_movw(imm1&0x0000FFFF,rt);
2206 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2207 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2208 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2209 }
2210 emit_movt(imm1&0xFFFF0000,rt);
2211 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2212 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2213 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2214 }
2215 #endif
2216 }
2217}
2218
2219// special case for checking invalid_code
2220void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2221{
2222 assert(0);
2223}
2224
2225// special case for checking invalid_code
2226void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2227{
2228 assert(imm<128&&imm>=0);
2229 assert(r>=0&&r<16);
2230 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2231 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2232 emit_cmpimm(HOST_TEMPREG,imm);
2233}
2234
2235// special case for tlb mapping
2236void emit_addsr12(int rs1,int rs2,int rt)
2237{
2238 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2239 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2240}
2241
0bbd1454 2242void emit_callne(int a)
2243{
2244 assem_debug("blne %x\n",a);
2245 u_int offset=genjmp(a);
2246 output_w32(0x1b000000|offset);
2247}
2248
57871462 2249// Used to preload hash table entries
2250void emit_prefetch(void *addr)
2251{
2252 assem_debug("prefetch %x\n",(int)addr);
2253 output_byte(0x0F);
2254 output_byte(0x18);
2255 output_modrm(0,5,1);
2256 output_w32((int)addr);
2257}
2258void emit_prefetchreg(int r)
2259{
2260 assem_debug("pld %s\n",regname[r]);
2261 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2262}
2263
2264// Special case for mini_ht
2265void emit_ldreq_indexed(int rs, u_int offset, int rt)
2266{
2267 assert(offset<4096);
2268 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2269 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2270}
2271
2272void emit_flds(int r,int sr)
2273{
2274 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2275 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2276}
2277
2278void emit_vldr(int r,int vr)
2279{
2280 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2281 output_w32(0xed900b00|(vr<<12)|(r<<16));
2282}
2283
2284void emit_fsts(int sr,int r)
2285{
2286 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2287 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2288}
2289
2290void emit_vstr(int vr,int r)
2291{
2292 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2293 output_w32(0xed800b00|(vr<<12)|(r<<16));
2294}
2295
2296void emit_ftosizs(int s,int d)
2297{
2298 assem_debug("ftosizs s%d,s%d\n",d,s);
2299 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2300}
2301
2302void emit_ftosizd(int s,int d)
2303{
2304 assem_debug("ftosizd s%d,d%d\n",d,s);
2305 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2306}
2307
2308void emit_fsitos(int s,int d)
2309{
2310 assem_debug("fsitos s%d,s%d\n",d,s);
2311 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2312}
2313
2314void emit_fsitod(int s,int d)
2315{
2316 assem_debug("fsitod d%d,s%d\n",d,s);
2317 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2318}
2319
2320void emit_fcvtds(int s,int d)
2321{
2322 assem_debug("fcvtds d%d,s%d\n",d,s);
2323 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2324}
2325
2326void emit_fcvtsd(int s,int d)
2327{
2328 assem_debug("fcvtsd s%d,d%d\n",d,s);
2329 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2330}
2331
2332void emit_fsqrts(int s,int d)
2333{
2334 assem_debug("fsqrts d%d,s%d\n",d,s);
2335 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2336}
2337
2338void emit_fsqrtd(int s,int d)
2339{
2340 assem_debug("fsqrtd s%d,d%d\n",d,s);
2341 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2342}
2343
2344void emit_fabss(int s,int d)
2345{
2346 assem_debug("fabss d%d,s%d\n",d,s);
2347 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2348}
2349
2350void emit_fabsd(int s,int d)
2351{
2352 assem_debug("fabsd s%d,d%d\n",d,s);
2353 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2354}
2355
2356void emit_fnegs(int s,int d)
2357{
2358 assem_debug("fnegs d%d,s%d\n",d,s);
2359 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2360}
2361
2362void emit_fnegd(int s,int d)
2363{
2364 assem_debug("fnegd s%d,d%d\n",d,s);
2365 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2366}
2367
2368void emit_fadds(int s1,int s2,int d)
2369{
2370 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2371 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2372}
2373
2374void emit_faddd(int s1,int s2,int d)
2375{
2376 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2377 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2378}
2379
2380void emit_fsubs(int s1,int s2,int d)
2381{
2382 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2383 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2384}
2385
2386void emit_fsubd(int s1,int s2,int d)
2387{
2388 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2389 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2390}
2391
2392void emit_fmuls(int s1,int s2,int d)
2393{
2394 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2395 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2396}
2397
2398void emit_fmuld(int s1,int s2,int d)
2399{
2400 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2401 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2402}
2403
2404void emit_fdivs(int s1,int s2,int d)
2405{
2406 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2407 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2408}
2409
2410void emit_fdivd(int s1,int s2,int d)
2411{
2412 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2413 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2414}
2415
2416void emit_fcmps(int x,int y)
2417{
2418 assem_debug("fcmps s14, s15\n");
2419 output_w32(0xeeb47a67);
2420}
2421
2422void emit_fcmpd(int x,int y)
2423{
2424 assem_debug("fcmpd d6, d7\n");
2425 output_w32(0xeeb46b47);
2426}
2427
2428void emit_fmstat()
2429{
2430 assem_debug("fmstat\n");
2431 output_w32(0xeef1fa10);
2432}
2433
2434void emit_bicne_imm(int rs,int imm,int rt)
2435{
2436 u_int armval;
cfbd3c6e 2437 genimm_checked(imm,&armval);
57871462 2438 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2439 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2440}
2441
2442void emit_biccs_imm(int rs,int imm,int rt)
2443{
2444 u_int armval;
cfbd3c6e 2445 genimm_checked(imm,&armval);
57871462 2446 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2447 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2448}
2449
2450void emit_bicvc_imm(int rs,int imm,int rt)
2451{
2452 u_int armval;
cfbd3c6e 2453 genimm_checked(imm,&armval);
57871462 2454 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2455 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2456}
2457
2458void emit_bichi_imm(int rs,int imm,int rt)
2459{
2460 u_int armval;
cfbd3c6e 2461 genimm_checked(imm,&armval);
57871462 2462 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2463 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2464}
2465
2466void emit_orrvs_imm(int rs,int imm,int rt)
2467{
2468 u_int armval;
cfbd3c6e 2469 genimm_checked(imm,&armval);
57871462 2470 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2471 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2472}
2473
b9b61529 2474void emit_orrne_imm(int rs,int imm,int rt)
2475{
2476 u_int armval;
cfbd3c6e 2477 genimm_checked(imm,&armval);
b9b61529 2478 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2479 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2480}
2481
2482void emit_andne_imm(int rs,int imm,int rt)
2483{
2484 u_int armval;
cfbd3c6e 2485 genimm_checked(imm,&armval);
b9b61529 2486 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2487 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2488}
2489
57871462 2490void emit_jno_unlikely(int a)
2491{
2492 //emit_jno(a);
2493 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2494 output_w32(0x72800000|rd_rn_rm(15,15,0));
2495}
2496
2497// Save registers before function call
2498void save_regs(u_int reglist)
2499{
2500 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2501 if(!reglist) return;
2502 assem_debug("stmia fp,{");
2503 if(reglist&1) assem_debug("r0, ");
2504 if(reglist&2) assem_debug("r1, ");
2505 if(reglist&4) assem_debug("r2, ");
2506 if(reglist&8) assem_debug("r3, ");
2507 if(reglist&0x1000) assem_debug("r12");
2508 assem_debug("}\n");
2509 output_w32(0xe88b0000|reglist);
2510}
2511// Restore registers after function call
2512void restore_regs(u_int reglist)
2513{
2514 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2515 if(!reglist) return;
2516 assem_debug("ldmia fp,{");
2517 if(reglist&1) assem_debug("r0, ");
2518 if(reglist&2) assem_debug("r1, ");
2519 if(reglist&4) assem_debug("r2, ");
2520 if(reglist&8) assem_debug("r3, ");
2521 if(reglist&0x1000) assem_debug("r12");
2522 assem_debug("}\n");
2523 output_w32(0xe89b0000|reglist);
2524}
2525
2526// Write back consts using r14 so we don't disturb the other registers
2527void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2528{
2529 int hr;
2530 for(hr=0;hr<HOST_REGS;hr++) {
2531 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2532 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2533 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2534 int value=constmap[i][hr];
2535 if(value==0) {
2536 emit_zeroreg(HOST_TEMPREG);
2537 }
2538 else {
2539 emit_movimm(value,HOST_TEMPREG);
2540 }
2541 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2542#ifndef FORCE32
57871462 2543 if((i_is32>>i_regmap[hr])&1) {
2544 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2545 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2546 }
24385cae 2547#endif
57871462 2548 }
2549 }
2550 }
2551 }
2552}
2553
2554/* Stubs/epilogue */
2555
2556void literal_pool(int n)
2557{
2558 if(!literalcount) return;
2559 if(n) {
2560 if((int)out-literals[0][0]<4096-n) return;
2561 }
2562 u_int *ptr;
2563 int i;
2564 for(i=0;i<literalcount;i++)
2565 {
2566 ptr=(u_int *)literals[i][0];
2567 u_int offset=(u_int)out-(u_int)ptr-8;
2568 assert(offset<4096);
2569 assert(!(offset&3));
2570 *ptr|=offset;
2571 output_w32(literals[i][1]);
2572 }
2573 literalcount=0;
2574}
2575
2576void literal_pool_jumpover(int n)
2577{
2578 if(!literalcount) return;
2579 if(n) {
2580 if((int)out-literals[0][0]<4096-n) return;
2581 }
2582 int jaddr=(int)out;
2583 emit_jmp(0);
2584 literal_pool(0);
2585 set_jump_target(jaddr,(int)out);
2586}
2587
2588emit_extjump2(int addr, int target, int linker)
2589{
2590 u_char *ptr=(u_char *)addr;
2591 assert((ptr[3]&0x0e)==0xa);
2592 emit_loadlp(target,0);
2593 emit_loadlp(addr,1);
24385cae 2594 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2595 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2596//DEBUG >
2597#ifdef DEBUG_CYCLE_COUNT
2598 emit_readword((int)&last_count,ECX);
2599 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2600 emit_readword((int)&next_interupt,ECX);
2601 emit_writeword(HOST_CCREG,(int)&Count);
2602 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2603 emit_writeword(ECX,(int)&last_count);
2604#endif
2605//DEBUG <
2606 emit_jmp(linker);
2607}
2608
2609emit_extjump(int addr, int target)
2610{
2611 emit_extjump2(addr, target, (int)dyna_linker);
2612}
2613emit_extjump_ds(int addr, int target)
2614{
2615 emit_extjump2(addr, target, (int)dyna_linker_ds);
2616}
2617
cbbab9cd 2618#ifdef PCSX
2619#include "pcsxmem_inline.c"
2620#endif
2621
57871462 2622do_readstub(int n)
2623{
2624 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2625 literal_pool(256);
2626 set_jump_target(stubs[n][1],(int)out);
2627 int type=stubs[n][0];
2628 int i=stubs[n][3];
2629 int rs=stubs[n][4];
2630 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2631 u_int reglist=stubs[n][7];
2632 signed char *i_regmap=i_regs->regmap;
2633 int addr=get_reg(i_regmap,AGEN1+(i&1));
2634 int rth,rt;
2635 int ds;
b9b61529 2636 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2637 rth=get_reg(i_regmap,FTEMP|64);
2638 rt=get_reg(i_regmap,FTEMP);
2639 }else{
2640 rth=get_reg(i_regmap,rt1[i]|64);
2641 rt=get_reg(i_regmap,rt1[i]);
2642 }
2643 assert(rs>=0);
57871462 2644 if(addr<0) addr=rt;
535d208a 2645 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2646 assert(addr>=0);
2647 int ftable=0;
2648 if(type==LOADB_STUB||type==LOADBU_STUB)
2649 ftable=(int)readmemb;
2650 if(type==LOADH_STUB||type==LOADHU_STUB)
2651 ftable=(int)readmemh;
2652 if(type==LOADW_STUB)
2653 ftable=(int)readmem;
24385cae 2654#ifndef FORCE32
57871462 2655 if(type==LOADD_STUB)
2656 ftable=(int)readmemd;
24385cae 2657#endif
2658 assert(ftable!=0);
57871462 2659 emit_writeword(rs,(int)&address);
2660 //emit_pusha();
2661 save_regs(reglist);
97a238a6 2662#ifndef PCSX
57871462 2663 ds=i_regs!=&regs[i];
2664 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2665 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2666 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2667 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2668 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2669#endif
57871462 2670 emit_shrimm(rs,16,1);
2671 int cc=get_reg(i_regmap,CCREG);
2672 if(cc<0) {
2673 emit_loadreg(CCREG,2);
2674 }
2675 emit_movimm(ftable,0);
2676 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2677#ifndef PCSX
57871462 2678 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2679#endif
57871462 2680 //emit_readword((int)&last_count,temp);
2681 //emit_add(cc,temp,cc);
2682 //emit_writeword(cc,(int)&Count);
2683 //emit_mov(15,14);
2684 emit_call((int)&indirect_jump_indexed);
2685 //emit_callreg(rs);
2686 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2687#ifndef PCSX
57871462 2688 // We really shouldn't need to update the count here,
2689 // but not doing so causes random crashes...
2690 emit_readword((int)&Count,HOST_TEMPREG);
2691 emit_readword((int)&next_interupt,2);
2692 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2693 emit_writeword(2,(int)&last_count);
2694 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2695 if(cc<0) {
2696 emit_storereg(CCREG,HOST_TEMPREG);
2697 }
f51dc36c 2698#endif
57871462 2699 //emit_popa();
2700 restore_regs(reglist);
2701 //if((cc=get_reg(regmap,CCREG))>=0) {
2702 // emit_loadreg(CCREG,cc);
2703 //}
f18c0f46 2704 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2705 assert(rt>=0);
2706 if(type==LOADB_STUB)
2707 emit_movsbl((int)&readmem_dword,rt);
2708 if(type==LOADBU_STUB)
2709 emit_movzbl((int)&readmem_dword,rt);
2710 if(type==LOADH_STUB)
2711 emit_movswl((int)&readmem_dword,rt);
2712 if(type==LOADHU_STUB)
2713 emit_movzwl((int)&readmem_dword,rt);
2714 if(type==LOADW_STUB)
2715 emit_readword((int)&readmem_dword,rt);
2716 if(type==LOADD_STUB) {
2717 emit_readword((int)&readmem_dword,rt);
2718 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2719 }
57871462 2720 }
2721 emit_jmp(stubs[n][2]); // return address
2722}
2723
2724inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2725{
2726 int rs=get_reg(regmap,target);
2727 int rth=get_reg(regmap,target|64);
2728 int rt=get_reg(regmap,target);
535d208a 2729 if(rs<0) rs=get_reg(regmap,-1);
57871462 2730 assert(rs>=0);
57871462 2731 int ftable=0;
2732 if(type==LOADB_STUB||type==LOADBU_STUB)
2733 ftable=(int)readmemb;
2734 if(type==LOADH_STUB||type==LOADHU_STUB)
2735 ftable=(int)readmemh;
2736 if(type==LOADW_STUB)
2737 ftable=(int)readmem;
24385cae 2738#ifndef FORCE32
57871462 2739 if(type==LOADD_STUB)
2740 ftable=(int)readmemd;
24385cae 2741#endif
2742 assert(ftable!=0);
cbbab9cd 2743#ifdef PCSX
2744 if(pcsx_direct_read(type,addr,target?rs:-1,rt))
2745 return;
2746#endif
fd99c415 2747 if(target==0)
2748 emit_movimm(addr,rs);
57871462 2749 emit_writeword(rs,(int)&address);
2750 //emit_pusha();
2751 save_regs(reglist);
0c1fe38b 2752#ifndef PCSX
2753 if((signed int)addr>=(signed int)0xC0000000) {
2754 // Theoretically we can have a pagefault here, if the TLB has never
2755 // been enabled and the address is outside the range 80000000..BFFFFFFF
2756 // Write out the registers so the pagefault can be handled. This is
2757 // a very rare case and likely represents a bug.
2758 int ds=regmap!=regs[i].regmap;
2759 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2760 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2761 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2762 }
2763#endif
57871462 2764 //emit_shrimm(rs,16,1);
2765 int cc=get_reg(regmap,CCREG);
2766 if(cc<0) {
2767 emit_loadreg(CCREG,2);
2768 }
2769 //emit_movimm(ftable,0);
2770 emit_movimm(((u_int *)ftable)[addr>>16],0);
2771 //emit_readword((int)&last_count,12);
2772 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2773#ifndef PCSX
57871462 2774 if((signed int)addr>=(signed int)0xC0000000) {
2775 // Pagefault address
2776 int ds=regmap!=regs[i].regmap;
2777 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2778 }
f51dc36c 2779#endif
57871462 2780 //emit_add(12,2,2);
2781 //emit_writeword(2,(int)&Count);
2782 //emit_call(((u_int *)ftable)[addr>>16]);
2783 emit_call((int)&indirect_jump);
f51dc36c 2784#ifndef PCSX
57871462 2785 // We really shouldn't need to update the count here,
2786 // but not doing so causes random crashes...
2787 emit_readword((int)&Count,HOST_TEMPREG);
2788 emit_readword((int)&next_interupt,2);
2789 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2790 emit_writeword(2,(int)&last_count);
2791 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2792 if(cc<0) {
2793 emit_storereg(CCREG,HOST_TEMPREG);
2794 }
f51dc36c 2795#endif
57871462 2796 //emit_popa();
2797 restore_regs(reglist);
fd99c415 2798 if(rt>=0) {
2799 if(type==LOADB_STUB)
2800 emit_movsbl((int)&readmem_dword,rt);
2801 if(type==LOADBU_STUB)
2802 emit_movzbl((int)&readmem_dword,rt);
2803 if(type==LOADH_STUB)
2804 emit_movswl((int)&readmem_dword,rt);
2805 if(type==LOADHU_STUB)
2806 emit_movzwl((int)&readmem_dword,rt);
2807 if(type==LOADW_STUB)
2808 emit_readword((int)&readmem_dword,rt);
2809 if(type==LOADD_STUB) {
2810 emit_readword((int)&readmem_dword,rt);
2811 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2812 }
57871462 2813 }
2814}
2815
2816do_writestub(int n)
2817{
2818 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2819 literal_pool(256);
2820 set_jump_target(stubs[n][1],(int)out);
2821 int type=stubs[n][0];
2822 int i=stubs[n][3];
2823 int rs=stubs[n][4];
2824 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2825 u_int reglist=stubs[n][7];
2826 signed char *i_regmap=i_regs->regmap;
2827 int addr=get_reg(i_regmap,AGEN1+(i&1));
2828 int rth,rt,r;
2829 int ds;
b9b61529 2830 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2831 rth=get_reg(i_regmap,FTEMP|64);
2832 rt=get_reg(i_regmap,r=FTEMP);
2833 }else{
2834 rth=get_reg(i_regmap,rs2[i]|64);
2835 rt=get_reg(i_regmap,r=rs2[i]);
2836 }
2837 assert(rs>=0);
2838 assert(rt>=0);
2839 if(addr<0) addr=get_reg(i_regmap,-1);
2840 assert(addr>=0);
2841 int ftable=0;
2842 if(type==STOREB_STUB)
2843 ftable=(int)writememb;
2844 if(type==STOREH_STUB)
2845 ftable=(int)writememh;
2846 if(type==STOREW_STUB)
2847 ftable=(int)writemem;
24385cae 2848#ifndef FORCE32
57871462 2849 if(type==STORED_STUB)
2850 ftable=(int)writememd;
24385cae 2851#endif
2852 assert(ftable!=0);
57871462 2853 emit_writeword(rs,(int)&address);
2854 //emit_shrimm(rs,16,rs);
2855 //emit_movmem_indexedx4(ftable,rs,rs);
2856 if(type==STOREB_STUB)
2857 emit_writebyte(rt,(int)&byte);
2858 if(type==STOREH_STUB)
2859 emit_writehword(rt,(int)&hword);
2860 if(type==STOREW_STUB)
2861 emit_writeword(rt,(int)&word);
2862 if(type==STORED_STUB) {
3d624f89 2863#ifndef FORCE32
57871462 2864 emit_writeword(rt,(int)&dword);
2865 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2866#else
2867 printf("STORED_STUB\n");
2868#endif
57871462 2869 }
2870 //emit_pusha();
2871 save_regs(reglist);
97a238a6 2872#ifndef PCSX
57871462 2873 ds=i_regs!=&regs[i];
2874 int real_rs=get_reg(i_regmap,rs1[i]);
2875 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2876 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2877 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2878 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2879#endif
57871462 2880 emit_shrimm(rs,16,1);
2881 int cc=get_reg(i_regmap,CCREG);
2882 if(cc<0) {
2883 emit_loadreg(CCREG,2);
2884 }
2885 emit_movimm(ftable,0);
2886 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2887#ifndef PCSX
57871462 2888 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2889#endif
57871462 2890 //emit_readword((int)&last_count,temp);
2891 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2892 //emit_add(cc,temp,cc);
2893 //emit_writeword(cc,(int)&Count);
2894 emit_call((int)&indirect_jump_indexed);
2895 //emit_callreg(rs);
2896 emit_readword((int)&Count,HOST_TEMPREG);
2897 emit_readword((int)&next_interupt,2);
2898 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2899 emit_writeword(2,(int)&last_count);
2900 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2901 if(cc<0) {
2902 emit_storereg(CCREG,HOST_TEMPREG);
2903 }
2904 //emit_popa();
2905 restore_regs(reglist);
2906 //if((cc=get_reg(regmap,CCREG))>=0) {
2907 // emit_loadreg(CCREG,cc);
2908 //}
2909 emit_jmp(stubs[n][2]); // return address
2910}
2911
2912inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2913{
2914 int rs=get_reg(regmap,-1);
2915 int rth=get_reg(regmap,target|64);
2916 int rt=get_reg(regmap,target);
2917 assert(rs>=0);
2918 assert(rt>=0);
cbbab9cd 2919#ifdef PCSX
2920 if(pcsx_direct_write(type,addr,rs,rt,regmap))
2921 return;
2922#endif
57871462 2923 int ftable=0;
2924 if(type==STOREB_STUB)
2925 ftable=(int)writememb;
2926 if(type==STOREH_STUB)
2927 ftable=(int)writememh;
2928 if(type==STOREW_STUB)
2929 ftable=(int)writemem;
24385cae 2930#ifndef FORCE32
57871462 2931 if(type==STORED_STUB)
2932 ftable=(int)writememd;
24385cae 2933#endif
2934 assert(ftable!=0);
57871462 2935 emit_writeword(rs,(int)&address);
2936 //emit_shrimm(rs,16,rs);
2937 //emit_movmem_indexedx4(ftable,rs,rs);
2938 if(type==STOREB_STUB)
2939 emit_writebyte(rt,(int)&byte);
2940 if(type==STOREH_STUB)
2941 emit_writehword(rt,(int)&hword);
2942 if(type==STOREW_STUB)
2943 emit_writeword(rt,(int)&word);
2944 if(type==STORED_STUB) {
3d624f89 2945#ifndef FORCE32
57871462 2946 emit_writeword(rt,(int)&dword);
2947 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2948#else
2949 printf("STORED_STUB\n");
2950#endif
57871462 2951 }
2952 //emit_pusha();
2953 save_regs(reglist);
0c1fe38b 2954#ifndef PCSX
2955 // rearmed note: load_all_consts prevents BIOS boot, some bug?
2956 if((signed int)addr>=(signed int)0xC0000000) {
2957 // Theoretically we can have a pagefault here, if the TLB has never
2958 // been enabled and the address is outside the range 80000000..BFFFFFFF
2959 // Write out the registers so the pagefault can be handled. This is
2960 // a very rare case and likely represents a bug.
2961 int ds=regmap!=regs[i].regmap;
2962 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2963 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2964 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2965 }
2966#endif
57871462 2967 //emit_shrimm(rs,16,1);
2968 int cc=get_reg(regmap,CCREG);
2969 if(cc<0) {
2970 emit_loadreg(CCREG,2);
2971 }
2972 //emit_movimm(ftable,0);
2973 emit_movimm(((u_int *)ftable)[addr>>16],0);
2974 //emit_readword((int)&last_count,12);
2975 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
f51dc36c 2976#ifndef PCSX
57871462 2977 if((signed int)addr>=(signed int)0xC0000000) {
2978 // Pagefault address
2979 int ds=regmap!=regs[i].regmap;
2980 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2981 }
f51dc36c 2982#endif
57871462 2983 //emit_add(12,2,2);
2984 //emit_writeword(2,(int)&Count);
2985 //emit_call(((u_int *)ftable)[addr>>16]);
2986 emit_call((int)&indirect_jump);
2987 emit_readword((int)&Count,HOST_TEMPREG);
2988 emit_readword((int)&next_interupt,2);
2989 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2990 emit_writeword(2,(int)&last_count);
2991 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2992 if(cc<0) {
2993 emit_storereg(CCREG,HOST_TEMPREG);
2994 }
2995 //emit_popa();
2996 restore_regs(reglist);
2997}
2998
2999do_unalignedwritestub(int n)
3000{
b7918751 3001 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3002 literal_pool(256);
57871462 3003 set_jump_target(stubs[n][1],(int)out);
b7918751 3004
3005 int i=stubs[n][3];
3006 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3007 int addr=stubs[n][5];
3008 u_int reglist=stubs[n][7];
3009 signed char *i_regmap=i_regs->regmap;
3010 int temp2=get_reg(i_regmap,FTEMP);
3011 int rt;
3012 int ds, real_rs;
3013 rt=get_reg(i_regmap,rs2[i]);
3014 assert(rt>=0);
3015 assert(addr>=0);
3016 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3017 reglist|=(1<<addr);
3018 reglist&=~(1<<temp2);
3019
3020 emit_andimm(addr,0xfffffffc,temp2);
3021 emit_writeword(temp2,(int)&address);
3022
3023 save_regs(reglist);
97a238a6 3024#ifndef PCSX
b7918751 3025 ds=i_regs!=&regs[i];
3026 real_rs=get_reg(i_regmap,rs1[i]);
3027 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3028 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3029 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3030 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3031#endif
b7918751 3032 emit_shrimm(addr,16,1);
3033 int cc=get_reg(i_regmap,CCREG);
3034 if(cc<0) {
3035 emit_loadreg(CCREG,2);
3036 }
3037 emit_movimm((u_int)readmem,0);
3038 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3039#ifndef PCSX
3040 // pagefault address
3041 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3042#endif
b7918751 3043 emit_call((int)&indirect_jump_indexed);
3044 restore_regs(reglist);
3045
3046 emit_readword((int)&readmem_dword,temp2);
3047 int temp=addr; //hmh
3048 emit_shlimm(addr,3,temp);
3049 emit_andimm(temp,24,temp);
3050#ifdef BIG_ENDIAN_MIPS
3051 if (opcode[i]==0x2e) // SWR
3052#else
3053 if (opcode[i]==0x2a) // SWL
3054#endif
3055 emit_xorimm(temp,24,temp);
3056 emit_movimm(-1,HOST_TEMPREG);
55439448 3057 if (opcode[i]==0x2a) { // SWL
b7918751 3058 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3059 emit_orrshr(rt,temp,temp2);
3060 }else{
3061 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3062 emit_orrshl(rt,temp,temp2);
3063 }
3064 emit_readword((int)&address,addr);
3065 emit_writeword(temp2,(int)&word);
3066 //save_regs(reglist); // don't need to, no state changes
3067 emit_shrimm(addr,16,1);
3068 emit_movimm((u_int)writemem,0);
3069 //emit_call((int)&indirect_jump_indexed);
3070 emit_mov(15,14);
3071 emit_readword_dualindexedx4(0,1,15);
3072 emit_readword((int)&Count,HOST_TEMPREG);
3073 emit_readword((int)&next_interupt,2);
3074 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3075 emit_writeword(2,(int)&last_count);
3076 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3077 if(cc<0) {
3078 emit_storereg(CCREG,HOST_TEMPREG);
3079 }
3080 restore_regs(reglist);
57871462 3081 emit_jmp(stubs[n][2]); // return address
3082}
3083
3084void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3085{
3086 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3087}
3088
3089do_invstub(int n)
3090{
3091 literal_pool(20);
3092 u_int reglist=stubs[n][3];
3093 set_jump_target(stubs[n][1],(int)out);
3094 save_regs(reglist);
3095 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3096 emit_call((int)&invalidate_addr);
3097 restore_regs(reglist);
3098 emit_jmp(stubs[n][2]); // return address
3099}
3100
3101int do_dirty_stub(int i)
3102{
3103 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3104 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3105 #ifdef PCSX
3106 addr=(u_int)source;
3107 #endif
57871462 3108 // Careful about the code output here, verify_dirty needs to parse it.
3109 #ifdef ARMv5_ONLY
ac545b3a 3110 emit_loadlp(addr,1);
57871462 3111 emit_loadlp((int)copy,2);
3112 emit_loadlp(slen*4,3);
3113 #else
ac545b3a 3114 emit_movw(addr&0x0000FFFF,1);
57871462 3115 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3116 emit_movt(addr&0xFFFF0000,1);
57871462 3117 emit_movt(((u_int)copy)&0xFFFF0000,2);
3118 emit_movw(slen*4,3);
3119 #endif
3120 emit_movimm(start+i*4,0);
3121 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3122 int entry=(int)out;
3123 load_regs_entry(i);
3124 if(entry==(int)out) entry=instr_addr[i];
3125 emit_jmp(instr_addr[i]);
3126 return entry;
3127}
3128
3129void do_dirty_stub_ds()
3130{
3131 // Careful about the code output here, verify_dirty needs to parse it.
3132 #ifdef ARMv5_ONLY
3133 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3134 emit_loadlp((int)copy,2);
3135 emit_loadlp(slen*4,3);
3136 #else
3137 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3138 emit_movw(((u_int)copy)&0x0000FFFF,2);
3139 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3140 emit_movt(((u_int)copy)&0xFFFF0000,2);
3141 emit_movw(slen*4,3);
3142 #endif
3143 emit_movimm(start+1,0);
3144 emit_call((int)&verify_code_ds);
3145}
3146
3147do_cop1stub(int n)
3148{
3149 literal_pool(256);
3150 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3151 set_jump_target(stubs[n][1],(int)out);
3152 int i=stubs[n][3];
3d624f89 3153// int rs=stubs[n][4];
57871462 3154 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3155 int ds=stubs[n][6];
3156 if(!ds) {
3157 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3158 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3159 }
3160 //else {printf("fp exception in delay slot\n");}
3161 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3162 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3163 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3164 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3165 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3166}
3167
3168/* TLB */
3169
3170int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3171{
3172 if(c) {
3173 if((signed int)addr>=(signed int)0xC0000000) {
3174 // address_generation already loaded the const
3175 emit_readword_dualindexedx4(FP,map,map);
3176 }
3177 else
3178 return -1; // No mapping
3179 }
3180 else {
3181 assert(s!=map);
3182 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3183 emit_addsr12(map,s,map);
3184 // Schedule this while we wait on the load
3185 //if(x) emit_xorimm(s,x,ar);
3186 if(shift>=0) emit_shlimm(s,3,shift);
3187 if(~a) emit_andimm(s,a,ar);
3188 emit_readword_dualindexedx4(FP,map,map);
3189 }
3190 return map;
3191}
3192int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3193{
3194 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3195 emit_test(map,map);
3196 *jaddr=(int)out;
3197 emit_js(0);
3198 }
3199 return map;
3200}
3201
3202int gen_tlb_addr_r(int ar, int map) {
3203 if(map>=0) {
3204 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3205 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3206 }
3207}
3208
3209int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3210{
3211 if(c) {
3212 if(addr<0x80800000||addr>=0xC0000000) {
3213 // address_generation already loaded the const
3214 emit_readword_dualindexedx4(FP,map,map);
3215 }
3216 else
3217 return -1; // No mapping
3218 }
3219 else {
3220 assert(s!=map);
3221 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3222 emit_addsr12(map,s,map);
3223 // Schedule this while we wait on the load
3224 //if(x) emit_xorimm(s,x,ar);
3225 emit_readword_dualindexedx4(FP,map,map);
3226 }
3227 return map;
3228}
3229int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3230{
3231 if(!c||addr<0x80800000||addr>=0xC0000000) {
3232 emit_testimm(map,0x40000000);
3233 *jaddr=(int)out;
3234 emit_jne(0);
3235 }
3236}
3237
3238int gen_tlb_addr_w(int ar, int map) {
3239 if(map>=0) {
3240 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3241 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3242 }
3243}
3244
3245// Generate the address of the memory_map entry, relative to dynarec_local
3246generate_map_const(u_int addr,int reg) {
3247 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3248 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3249}
3250
3251/* Special assem */
3252
3253void shift_assemble_arm(int i,struct regstat *i_regs)
3254{
3255 if(rt1[i]) {
3256 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3257 {
3258 signed char s,t,shift;
3259 t=get_reg(i_regs->regmap,rt1[i]);
3260 s=get_reg(i_regs->regmap,rs1[i]);
3261 shift=get_reg(i_regs->regmap,rs2[i]);
3262 if(t>=0){
3263 if(rs1[i]==0)
3264 {
3265 emit_zeroreg(t);
3266 }
3267 else if(rs2[i]==0)
3268 {
3269 assert(s>=0);
3270 if(s!=t) emit_mov(s,t);
3271 }
3272 else
3273 {
3274 emit_andimm(shift,31,HOST_TEMPREG);
3275 if(opcode2[i]==4) // SLLV
3276 {
3277 emit_shl(s,HOST_TEMPREG,t);
3278 }
3279 if(opcode2[i]==6) // SRLV
3280 {
3281 emit_shr(s,HOST_TEMPREG,t);
3282 }
3283 if(opcode2[i]==7) // SRAV
3284 {
3285 emit_sar(s,HOST_TEMPREG,t);
3286 }
3287 }
3288 }
3289 } else { // DSLLV/DSRLV/DSRAV
3290 signed char sh,sl,th,tl,shift;
3291 th=get_reg(i_regs->regmap,rt1[i]|64);
3292 tl=get_reg(i_regs->regmap,rt1[i]);
3293 sh=get_reg(i_regs->regmap,rs1[i]|64);
3294 sl=get_reg(i_regs->regmap,rs1[i]);
3295 shift=get_reg(i_regs->regmap,rs2[i]);
3296 if(tl>=0){
3297 if(rs1[i]==0)
3298 {
3299 emit_zeroreg(tl);
3300 if(th>=0) emit_zeroreg(th);
3301 }
3302 else if(rs2[i]==0)
3303 {
3304 assert(sl>=0);
3305 if(sl!=tl) emit_mov(sl,tl);
3306 if(th>=0&&sh!=th) emit_mov(sh,th);
3307 }
3308 else
3309 {
3310 // FIXME: What if shift==tl ?
3311 assert(shift!=tl);
3312 int temp=get_reg(i_regs->regmap,-1);
3313 int real_th=th;
3314 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3315 assert(sl>=0);
3316 assert(sh>=0);
3317 emit_andimm(shift,31,HOST_TEMPREG);
3318 if(opcode2[i]==0x14) // DSLLV
3319 {
3320 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3321 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3322 emit_orrshr(sl,HOST_TEMPREG,th);
3323 emit_andimm(shift,31,HOST_TEMPREG);
3324 emit_testimm(shift,32);
3325 emit_shl(sl,HOST_TEMPREG,tl);
3326 if(th>=0) emit_cmovne_reg(tl,th);
3327 emit_cmovne_imm(0,tl);
3328 }
3329 if(opcode2[i]==0x16) // DSRLV
3330 {
3331 assert(th>=0);
3332 emit_shr(sl,HOST_TEMPREG,tl);
3333 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3334 emit_orrshl(sh,HOST_TEMPREG,tl);
3335 emit_andimm(shift,31,HOST_TEMPREG);
3336 emit_testimm(shift,32);
3337 emit_shr(sh,HOST_TEMPREG,th);
3338 emit_cmovne_reg(th,tl);
3339 if(real_th>=0) emit_cmovne_imm(0,th);
3340 }
3341 if(opcode2[i]==0x17) // DSRAV
3342 {
3343 assert(th>=0);
3344 emit_shr(sl,HOST_TEMPREG,tl);
3345 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3346 if(real_th>=0) {
3347 assert(temp>=0);
3348 emit_sarimm(th,31,temp);
3349 }
3350 emit_orrshl(sh,HOST_TEMPREG,tl);
3351 emit_andimm(shift,31,HOST_TEMPREG);
3352 emit_testimm(shift,32);
3353 emit_sar(sh,HOST_TEMPREG,th);
3354 emit_cmovne_reg(th,tl);
3355 if(real_th>=0) emit_cmovne_reg(temp,th);
3356 }
3357 }
3358 }
3359 }
3360 }
3361}
3362#define shift_assemble shift_assemble_arm
3363
3364void loadlr_assemble_arm(int i,struct regstat *i_regs)
3365{
3366 int s,th,tl,temp,temp2,addr,map=-1;
3367 int offset;
3368 int jaddr=0;
af4ee1fe 3369 int memtarget=0,c=0;
57871462 3370 u_int hr,reglist=0;
3371 th=get_reg(i_regs->regmap,rt1[i]|64);
3372 tl=get_reg(i_regs->regmap,rt1[i]);
3373 s=get_reg(i_regs->regmap,rs1[i]);
3374 temp=get_reg(i_regs->regmap,-1);
3375 temp2=get_reg(i_regs->regmap,FTEMP);
3376 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3377 assert(addr<0);
3378 offset=imm[i];
3379 for(hr=0;hr<HOST_REGS;hr++) {
3380 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3381 }
3382 reglist|=1<<temp;
3383 if(offset||s<0||c) addr=temp2;
3384 else addr=s;
3385 if(s>=0) {
3386 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3387 if(c) {
3388 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3389 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3390 }
57871462 3391 }
535d208a 3392 if(!using_tlb) {
3393 if(!c) {
3394 #ifdef RAM_OFFSET
3395 map=get_reg(i_regs->regmap,ROREG);
3396 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3397 #endif
3398 emit_shlimm(addr,3,temp);
3399 if (opcode[i]==0x22||opcode[i]==0x26) {
3400 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 3401 }else{
535d208a 3402 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 3403 }
535d208a 3404 emit_cmpimm(addr,RAM_SIZE);
3405 jaddr=(int)out;
3406 emit_jno(0);
3407 }
3408 else {
3409 if (opcode[i]==0x22||opcode[i]==0x26) {
3410 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3411 }else{
3412 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3413 }
57871462 3414 }
535d208a 3415 }else{ // using tlb
3416 int a;
3417 if(c) {
3418 a=-1;
3419 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3420 a=0xFFFFFFFC; // LWL/LWR
3421 }else{
3422 a=0xFFFFFFF8; // LDL/LDR
3423 }
3424 map=get_reg(i_regs->regmap,TLREG);
3425 assert(map>=0);
ea3d2e6e 3426 reglist&=~(1<<map);
535d208a 3427 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3428 if(c) {
3429 if (opcode[i]==0x22||opcode[i]==0x26) {
3430 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3431 }else{
3432 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462