add unmodified Ari64 drc to track it's changes
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
27extern precomp_instr fake_pc;
28extern void *dynarec_local;
29extern u_int memory_map[1048576];
30extern u_int mini_ht[32][2];
31extern u_int rounding_modes[4];
32
33void indirect_jump_indexed();
34void indirect_jump();
35void do_interrupt();
36void jump_vaddr_r0();
37void jump_vaddr_r1();
38void jump_vaddr_r2();
39void jump_vaddr_r3();
40void jump_vaddr_r4();
41void jump_vaddr_r5();
42void jump_vaddr_r6();
43void jump_vaddr_r7();
44void jump_vaddr_r8();
45void jump_vaddr_r9();
46void jump_vaddr_r10();
47void jump_vaddr_r12();
48
49const u_int jump_vaddr_reg[16] = {
50 (int)jump_vaddr_r0,
51 (int)jump_vaddr_r1,
52 (int)jump_vaddr_r2,
53 (int)jump_vaddr_r3,
54 (int)jump_vaddr_r4,
55 (int)jump_vaddr_r5,
56 (int)jump_vaddr_r6,
57 (int)jump_vaddr_r7,
58 (int)jump_vaddr_r8,
59 (int)jump_vaddr_r9,
60 (int)jump_vaddr_r10,
61 0,
62 (int)jump_vaddr_r12,
63 0,
64 0,
65 0};
66
67#include "fpu.h"
68
69/* Linker */
70
71void set_jump_target(int addr,u_int target)
72{
73 u_char *ptr=(u_char *)addr;
74 u_int *ptr2=(u_int *)ptr;
75 if(ptr[3]==0xe2) {
76 assert((target-(u_int)ptr2-8)<1024);
77 assert((addr&3)==0);
78 assert((target&3)==0);
79 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
80 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
81 }
82 else if(ptr[3]==0x72) {
83 // generated by emit_jno_unlikely
84 if((target-(u_int)ptr2-8)<1024) {
85 assert((addr&3)==0);
86 assert((target&3)==0);
87 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
88 }
89 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
90 assert((addr&3)==0);
91 assert((target&3)==0);
92 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
93 }
94 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
95 }
96 else {
97 assert((ptr[3]&0x0e)==0xa);
98 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
99 }
100}
101
102// This optionally copies the instruction from the target of the branch into
103// the space before the branch. Works, but the difference in speed is
104// usually insignificant.
105void set_jump_target_fillslot(int addr,u_int target,int copy)
106{
107 u_char *ptr=(u_char *)addr;
108 u_int *ptr2=(u_int *)ptr;
109 assert(!copy||ptr2[-1]==0xe28dd000);
110 if(ptr[3]==0xe2) {
111 assert(!copy);
112 assert((target-(u_int)ptr2-8)<4096);
113 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
114 }
115 else {
116 assert((ptr[3]&0x0e)==0xa);
117 u_int target_insn=*(u_int *)target;
118 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
119 copy=0;
120 }
121 if((target_insn&0x0c100000)==0x04100000) { // Load
122 copy=0;
123 }
124 if(target_insn&0x08000000) {
125 copy=0;
126 }
127 if(copy) {
128 ptr2[-1]=target_insn;
129 target+=4;
130 }
131 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
132 }
133}
134
135/* Literal pool */
136add_literal(int addr,int val)
137{
138 literals[literalcount][0]=addr;
139 literals[literalcount][1]=val;
140 literalcount++;
141}
142
143void kill_pointer(void *stub)
144{
145 int *ptr=(int *)(stub+4);
146 assert((*ptr&0x0ff00000)==0x05900000);
147 u_int offset=*ptr&0xfff;
148 int **l_ptr=(void *)ptr+offset+8;
149 int *i_ptr=*l_ptr;
150 set_jump_target((int)i_ptr,(int)stub);
151}
152
153int get_pointer(void *stub)
154{
155 //printf("get_pointer(%x)\n",(int)stub);
156 int *ptr=(int *)(stub+4);
157 assert((*ptr&0x0ff00000)==0x05900000);
158 u_int offset=*ptr&0xfff;
159 int **l_ptr=(void *)ptr+offset+8;
160 int *i_ptr=*l_ptr;
161 assert((*i_ptr&0x0f000000)==0x0a000000);
162 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
163}
164
165// Find the "clean" entry point from a "dirty" entry point
166// by skipping past the call to verify_code
167u_int get_clean_addr(int addr)
168{
169 int *ptr=(int *)addr;
170 #ifdef ARMv5_ONLY
171 ptr+=4;
172 #else
173 ptr+=6;
174 #endif
175 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
176 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
177 ptr++;
178 if((*ptr&0xFF000000)==0xea000000) {
179 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
180 }
181 return (u_int)ptr;
182}
183
184int verify_dirty(int addr)
185{
186 u_int *ptr=(u_int *)addr;
187 #ifdef ARMv5_ONLY
188 // get from literal pool
189 assert((*ptr&0xFFF00000)==0xe5900000);
190 u_int offset=*ptr&0xfff;
191 u_int *l_ptr=(void *)ptr+offset+8;
192 u_int source=l_ptr[0];
193 u_int copy=l_ptr[1];
194 u_int len=l_ptr[2];
195 ptr+=4;
196 #else
197 // ARMv7 movw/movt
198 assert((*ptr&0xFFF00000)==0xe3000000);
199 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
200 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
201 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
202 ptr+=6;
203 #endif
204 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
205 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
206 u_int verifier=(int)ptr+((*ptr<<8)>>6)+8; // get target of bl
207 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
208 unsigned int page=source>>12;
209 unsigned int map_value=memory_map[page];
210 if(map_value>=0x80000000) return 0;
211 while(page<((source+len-1)>>12)) {
212 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
213 }
214 source = source+(map_value<<2);
215 }
216 //printf("verify_dirty: %x %x %x\n",source,copy,len);
217 return !memcmp((void *)source,(void *)copy,len);
218}
219
220// This doesn't necessarily find all clean entry points, just
221// guarantees that it's not dirty
222int isclean(int addr)
223{
224 #ifdef ARMv5_ONLY
225 int *ptr=((u_int *)addr)+4;
226 #else
227 int *ptr=((u_int *)addr)+6;
228 #endif
229 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
230 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
231 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
232 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
233 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
234 return 1;
235}
236
237void get_bounds(int addr,u_int *start,u_int *end)
238{
239 u_int *ptr=(u_int *)addr;
240 #ifdef ARMv5_ONLY
241 // get from literal pool
242 assert((*ptr&0xFFF00000)==0xe5900000);
243 u_int offset=*ptr&0xfff;
244 u_int *l_ptr=(void *)ptr+offset+8;
245 u_int source=l_ptr[0];
246 //u_int copy=l_ptr[1];
247 u_int len=l_ptr[2];
248 ptr+=4;
249 #else
250 // ARMv7 movw/movt
251 assert((*ptr&0xFFF00000)==0xe3000000);
252 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
253 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
254 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
255 ptr+=6;
256 #endif
257 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
258 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
259 u_int verifier=(int)ptr+((*ptr<<8)>>6)+8; // get target of bl
260 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
261 if(memory_map[source>>12]>=0x80000000) source = 0;
262 else source = source+(memory_map[source>>12]<<2);
263 }
264 *start=source;
265 *end=source+len;
266}
267
268/* Register allocation */
269
270// Note: registers are allocated clean (unmodified state)
271// if you intend to modify the register, you must call dirty_reg().
272void alloc_reg(struct regstat *cur,int i,signed char reg)
273{
274 int r,hr;
275 int preferred_reg = (reg&7);
276 if(reg==CCREG) preferred_reg=HOST_CCREG;
277 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
278
279 // Don't allocate unused registers
280 if((cur->u>>reg)&1) return;
281
282 // see if it's already allocated
283 for(hr=0;hr<HOST_REGS;hr++)
284 {
285 if(cur->regmap[hr]==reg) return;
286 }
287
288 // Keep the same mapping if the register was already allocated in a loop
289 preferred_reg = loop_reg(i,reg,preferred_reg);
290
291 // Try to allocate the preferred register
292 if(cur->regmap[preferred_reg]==-1) {
293 cur->regmap[preferred_reg]=reg;
294 cur->dirty&=~(1<<preferred_reg);
295 cur->isconst&=~(1<<preferred_reg);
296 return;
297 }
298 r=cur->regmap[preferred_reg];
299 if(r<64&&((cur->u>>r)&1)) {
300 cur->regmap[preferred_reg]=reg;
301 cur->dirty&=~(1<<preferred_reg);
302 cur->isconst&=~(1<<preferred_reg);
303 return;
304 }
305 if(r>=64&&((cur->uu>>(r&63))&1)) {
306 cur->regmap[preferred_reg]=reg;
307 cur->dirty&=~(1<<preferred_reg);
308 cur->isconst&=~(1<<preferred_reg);
309 return;
310 }
311
312 // Clear any unneeded registers
313 // We try to keep the mapping consistent, if possible, because it
314 // makes branches easier (especially loops). So we try to allocate
315 // first (see above) before removing old mappings. If this is not
316 // possible then go ahead and clear out the registers that are no
317 // longer needed.
318 for(hr=0;hr<HOST_REGS;hr++)
319 {
320 r=cur->regmap[hr];
321 if(r>=0) {
322 if(r<64) {
323 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
324 }
325 else
326 {
327 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
328 }
329 }
330 }
331 // Try to allocate any available register, but prefer
332 // registers that have not been used recently.
333 if(i>0) {
334 for(hr=0;hr<HOST_REGS;hr++) {
335 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
336 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
337 cur->regmap[hr]=reg;
338 cur->dirty&=~(1<<hr);
339 cur->isconst&=~(1<<hr);
340 return;
341 }
342 }
343 }
344 }
345 // Try to allocate any available register
346 for(hr=0;hr<HOST_REGS;hr++) {
347 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
348 cur->regmap[hr]=reg;
349 cur->dirty&=~(1<<hr);
350 cur->isconst&=~(1<<hr);
351 return;
352 }
353 }
354
355 // Ok, now we have to evict someone
356 // Pick a register we hopefully won't need soon
357 u_char hsn[MAXREG+1];
358 memset(hsn,10,sizeof(hsn));
359 int j;
360 lsn(hsn,i,&preferred_reg);
361 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
362 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
363 if(i>0) {
364 // Don't evict the cycle count at entry points, otherwise the entry
365 // stub will have to write it.
366 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
367 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
368 for(j=10;j>=3;j--)
369 {
370 // Alloc preferred register if available
371 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
372 for(hr=0;hr<HOST_REGS;hr++) {
373 // Evict both parts of a 64-bit register
374 if((cur->regmap[hr]&63)==r) {
375 cur->regmap[hr]=-1;
376 cur->dirty&=~(1<<hr);
377 cur->isconst&=~(1<<hr);
378 }
379 }
380 cur->regmap[preferred_reg]=reg;
381 return;
382 }
383 for(r=1;r<=MAXREG;r++)
384 {
385 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
386 for(hr=0;hr<HOST_REGS;hr++) {
387 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
388 if(cur->regmap[hr]==r+64) {
389 cur->regmap[hr]=reg;
390 cur->dirty&=~(1<<hr);
391 cur->isconst&=~(1<<hr);
392 return;
393 }
394 }
395 }
396 for(hr=0;hr<HOST_REGS;hr++) {
397 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
398 if(cur->regmap[hr]==r) {
399 cur->regmap[hr]=reg;
400 cur->dirty&=~(1<<hr);
401 cur->isconst&=~(1<<hr);
402 return;
403 }
404 }
405 }
406 }
407 }
408 }
409 }
410 for(j=10;j>=0;j--)
411 {
412 for(r=1;r<=MAXREG;r++)
413 {
414 if(hsn[r]==j) {
415 for(hr=0;hr<HOST_REGS;hr++) {
416 if(cur->regmap[hr]==r+64) {
417 cur->regmap[hr]=reg;
418 cur->dirty&=~(1<<hr);
419 cur->isconst&=~(1<<hr);
420 return;
421 }
422 }
423 for(hr=0;hr<HOST_REGS;hr++) {
424 if(cur->regmap[hr]==r) {
425 cur->regmap[hr]=reg;
426 cur->dirty&=~(1<<hr);
427 cur->isconst&=~(1<<hr);
428 return;
429 }
430 }
431 }
432 }
433 }
434 printf("This shouldn't happen (alloc_reg)");exit(1);
435}
436
437void alloc_reg64(struct regstat *cur,int i,signed char reg)
438{
439 int preferred_reg = 8+(reg&1);
440 int r,hr;
441
442 // allocate the lower 32 bits
443 alloc_reg(cur,i,reg);
444
445 // Don't allocate unused registers
446 if((cur->uu>>reg)&1) return;
447
448 // see if the upper half is already allocated
449 for(hr=0;hr<HOST_REGS;hr++)
450 {
451 if(cur->regmap[hr]==reg+64) return;
452 }
453
454 // Keep the same mapping if the register was already allocated in a loop
455 preferred_reg = loop_reg(i,reg,preferred_reg);
456
457 // Try to allocate the preferred register
458 if(cur->regmap[preferred_reg]==-1) {
459 cur->regmap[preferred_reg]=reg|64;
460 cur->dirty&=~(1<<preferred_reg);
461 cur->isconst&=~(1<<preferred_reg);
462 return;
463 }
464 r=cur->regmap[preferred_reg];
465 if(r<64&&((cur->u>>r)&1)) {
466 cur->regmap[preferred_reg]=reg|64;
467 cur->dirty&=~(1<<preferred_reg);
468 cur->isconst&=~(1<<preferred_reg);
469 return;
470 }
471 if(r>=64&&((cur->uu>>(r&63))&1)) {
472 cur->regmap[preferred_reg]=reg|64;
473 cur->dirty&=~(1<<preferred_reg);
474 cur->isconst&=~(1<<preferred_reg);
475 return;
476 }
477
478 // Clear any unneeded registers
479 // We try to keep the mapping consistent, if possible, because it
480 // makes branches easier (especially loops). So we try to allocate
481 // first (see above) before removing old mappings. If this is not
482 // possible then go ahead and clear out the registers that are no
483 // longer needed.
484 for(hr=HOST_REGS-1;hr>=0;hr--)
485 {
486 r=cur->regmap[hr];
487 if(r>=0) {
488 if(r<64) {
489 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
490 }
491 else
492 {
493 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
494 }
495 }
496 }
497 // Try to allocate any available register, but prefer
498 // registers that have not been used recently.
499 if(i>0) {
500 for(hr=0;hr<HOST_REGS;hr++) {
501 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
502 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
503 cur->regmap[hr]=reg|64;
504 cur->dirty&=~(1<<hr);
505 cur->isconst&=~(1<<hr);
506 return;
507 }
508 }
509 }
510 }
511 // Try to allocate any available register
512 for(hr=0;hr<HOST_REGS;hr++) {
513 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
514 cur->regmap[hr]=reg|64;
515 cur->dirty&=~(1<<hr);
516 cur->isconst&=~(1<<hr);
517 return;
518 }
519 }
520
521 // Ok, now we have to evict someone
522 // Pick a register we hopefully won't need soon
523 u_char hsn[MAXREG+1];
524 memset(hsn,10,sizeof(hsn));
525 int j;
526 lsn(hsn,i,&preferred_reg);
527 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
528 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
529 if(i>0) {
530 // Don't evict the cycle count at entry points, otherwise the entry
531 // stub will have to write it.
532 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
533 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
534 for(j=10;j>=3;j--)
535 {
536 // Alloc preferred register if available
537 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
538 for(hr=0;hr<HOST_REGS;hr++) {
539 // Evict both parts of a 64-bit register
540 if((cur->regmap[hr]&63)==r) {
541 cur->regmap[hr]=-1;
542 cur->dirty&=~(1<<hr);
543 cur->isconst&=~(1<<hr);
544 }
545 }
546 cur->regmap[preferred_reg]=reg|64;
547 return;
548 }
549 for(r=1;r<=MAXREG;r++)
550 {
551 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
552 for(hr=0;hr<HOST_REGS;hr++) {
553 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
554 if(cur->regmap[hr]==r+64) {
555 cur->regmap[hr]=reg|64;
556 cur->dirty&=~(1<<hr);
557 cur->isconst&=~(1<<hr);
558 return;
559 }
560 }
561 }
562 for(hr=0;hr<HOST_REGS;hr++) {
563 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
564 if(cur->regmap[hr]==r) {
565 cur->regmap[hr]=reg|64;
566 cur->dirty&=~(1<<hr);
567 cur->isconst&=~(1<<hr);
568 return;
569 }
570 }
571 }
572 }
573 }
574 }
575 }
576 for(j=10;j>=0;j--)
577 {
578 for(r=1;r<=MAXREG;r++)
579 {
580 if(hsn[r]==j) {
581 for(hr=0;hr<HOST_REGS;hr++) {
582 if(cur->regmap[hr]==r+64) {
583 cur->regmap[hr]=reg|64;
584 cur->dirty&=~(1<<hr);
585 cur->isconst&=~(1<<hr);
586 return;
587 }
588 }
589 for(hr=0;hr<HOST_REGS;hr++) {
590 if(cur->regmap[hr]==r) {
591 cur->regmap[hr]=reg|64;
592 cur->dirty&=~(1<<hr);
593 cur->isconst&=~(1<<hr);
594 return;
595 }
596 }
597 }
598 }
599 }
600 printf("This shouldn't happen");exit(1);
601}
602
603// Allocate a temporary register. This is done without regard to
604// dirty status or whether the register we request is on the unneeded list
605// Note: This will only allocate one register, even if called multiple times
606void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
607{
608 int r,hr;
609 int preferred_reg = -1;
610
611 // see if it's already allocated
612 for(hr=0;hr<HOST_REGS;hr++)
613 {
614 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
615 }
616
617 // Try to allocate any available register
618 for(hr=HOST_REGS-1;hr>=0;hr--) {
619 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
620 cur->regmap[hr]=reg;
621 cur->dirty&=~(1<<hr);
622 cur->isconst&=~(1<<hr);
623 return;
624 }
625 }
626
627 // Find an unneeded register
628 for(hr=HOST_REGS-1;hr>=0;hr--)
629 {
630 r=cur->regmap[hr];
631 if(r>=0) {
632 if(r<64) {
633 if((cur->u>>r)&1) {
634 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
635 cur->regmap[hr]=reg;
636 cur->dirty&=~(1<<hr);
637 cur->isconst&=~(1<<hr);
638 return;
639 }
640 }
641 }
642 else
643 {
644 if((cur->uu>>(r&63))&1) {
645 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
646 cur->regmap[hr]=reg;
647 cur->dirty&=~(1<<hr);
648 cur->isconst&=~(1<<hr);
649 return;
650 }
651 }
652 }
653 }
654 }
655
656 // Ok, now we have to evict someone
657 // Pick a register we hopefully won't need soon
658 // TODO: we might want to follow unconditional jumps here
659 // TODO: get rid of dupe code and make this into a function
660 u_char hsn[MAXREG+1];
661 memset(hsn,10,sizeof(hsn));
662 int j;
663 lsn(hsn,i,&preferred_reg);
664 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
665 if(i>0) {
666 // Don't evict the cycle count at entry points, otherwise the entry
667 // stub will have to write it.
668 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
669 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
670 for(j=10;j>=3;j--)
671 {
672 for(r=1;r<=MAXREG;r++)
673 {
674 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
675 for(hr=0;hr<HOST_REGS;hr++) {
676 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
677 if(cur->regmap[hr]==r+64) {
678 cur->regmap[hr]=reg;
679 cur->dirty&=~(1<<hr);
680 cur->isconst&=~(1<<hr);
681 return;
682 }
683 }
684 }
685 for(hr=0;hr<HOST_REGS;hr++) {
686 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
687 if(cur->regmap[hr]==r) {
688 cur->regmap[hr]=reg;
689 cur->dirty&=~(1<<hr);
690 cur->isconst&=~(1<<hr);
691 return;
692 }
693 }
694 }
695 }
696 }
697 }
698 }
699 for(j=10;j>=0;j--)
700 {
701 for(r=1;r<=MAXREG;r++)
702 {
703 if(hsn[r]==j) {
704 for(hr=0;hr<HOST_REGS;hr++) {
705 if(cur->regmap[hr]==r+64) {
706 cur->regmap[hr]=reg;
707 cur->dirty&=~(1<<hr);
708 cur->isconst&=~(1<<hr);
709 return;
710 }
711 }
712 for(hr=0;hr<HOST_REGS;hr++) {
713 if(cur->regmap[hr]==r) {
714 cur->regmap[hr]=reg;
715 cur->dirty&=~(1<<hr);
716 cur->isconst&=~(1<<hr);
717 return;
718 }
719 }
720 }
721 }
722 }
723 printf("This shouldn't happen");exit(1);
724}
725// Allocate a specific ARM register.
726void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
727{
728 int n;
729
730 // see if it's already allocated (and dealloc it)
731 for(n=0;n<HOST_REGS;n++)
732 {
733 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
734 }
735
736 cur->regmap[hr]=reg;
737 cur->dirty&=~(1<<hr);
738 cur->isconst&=~(1<<hr);
739}
740
741// Alloc cycle count into dedicated register
742alloc_cc(struct regstat *cur,int i)
743{
744 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
745}
746
747/* Special alloc */
748
749
750/* Assembler */
751
752char regname[16][4] = {
753 "r0",
754 "r1",
755 "r2",
756 "r3",
757 "r4",
758 "r5",
759 "r6",
760 "r7",
761 "r8",
762 "r9",
763 "r10",
764 "fp",
765 "r12",
766 "sp",
767 "lr",
768 "pc"};
769
770void output_byte(u_char byte)
771{
772 *(out++)=byte;
773}
774void output_modrm(u_char mod,u_char rm,u_char ext)
775{
776 assert(mod<4);
777 assert(rm<8);
778 assert(ext<8);
779 u_char byte=(mod<<6)|(ext<<3)|rm;
780 *(out++)=byte;
781}
782void output_sib(u_char scale,u_char index,u_char base)
783{
784 assert(scale<4);
785 assert(index<8);
786 assert(base<8);
787 u_char byte=(scale<<6)|(index<<3)|base;
788 *(out++)=byte;
789}
790void output_w32(u_int word)
791{
792 *((u_int *)out)=word;
793 out+=4;
794}
795u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
796{
797 assert(rd<16);
798 assert(rn<16);
799 assert(rm<16);
800 return((rn<<16)|(rd<<12)|rm);
801}
802u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
803{
804 assert(rd<16);
805 assert(rn<16);
806 assert(imm<256);
807 assert((shift&1)==0);
808 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
809}
810u_int genimm(u_int imm,u_int *encoded)
811{
812 if(imm==0) {*encoded=0;return 1;}
813 int i=32;
814 while(i>0)
815 {
816 if(imm<256) {
817 *encoded=((i&30)<<7)|imm;
818 return 1;
819 }
820 imm=(imm>>2)|(imm<<30);i-=2;
821 }
822 return 0;
823}
824u_int genjmp(u_int addr)
825{
826 int offset=addr-(int)out-8;
827 if(offset<-33554432||offset>=33554432) return 0;
828 return ((u_int)offset>>2)&0xffffff;
829}
830
831void emit_mov(int rs,int rt)
832{
833 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
834 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
835}
836
837void emit_movs(int rs,int rt)
838{
839 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
840 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
841}
842
843void emit_add(int rs1,int rs2,int rt)
844{
845 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
846 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
847}
848
849void emit_adds(int rs1,int rs2,int rt)
850{
851 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
852 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
853}
854
855void emit_adcs(int rs1,int rs2,int rt)
856{
857 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
858 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
859}
860
861void emit_sbc(int rs1,int rs2,int rt)
862{
863 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
864 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
865}
866
867void emit_sbcs(int rs1,int rs2,int rt)
868{
869 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
870 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
871}
872
873void emit_neg(int rs, int rt)
874{
875 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
876 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
877}
878
879void emit_negs(int rs, int rt)
880{
881 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
882 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
883}
884
885void emit_sub(int rs1,int rs2,int rt)
886{
887 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
888 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
889}
890
891void emit_subs(int rs1,int rs2,int rt)
892{
893 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
894 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
895}
896
897void emit_zeroreg(int rt)
898{
899 assem_debug("mov %s,#0\n",regname[rt]);
900 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
901}
902
903void emit_loadreg(int r, int hr)
904{
905 if((r&63)==0)
906 emit_zeroreg(hr);
907 else {
908 int addr=((int)reg)+((r&63)<<3)+((r&64)>>4);
909 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
910 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
911 if(r==CCREG) addr=(int)&cycle_count;
912 if(r==CSREG) addr=(int)&Status;
913 if(r==FSREG) addr=(int)&FCR31;
914 if(r==INVCP) addr=(int)&invc_ptr;
915 u_int offset = addr-(u_int)&dynarec_local;
916 assert(offset<4096);
917 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
918 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
919 }
920}
921void emit_storereg(int r, int hr)
922{
923 int addr=((int)reg)+((r&63)<<3)+((r&64)>>4);
924 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
925 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
926 if(r==CCREG) addr=(int)&cycle_count;
927 if(r==FSREG) addr=(int)&FCR31;
928 u_int offset = addr-(u_int)&dynarec_local;
929 assert(offset<4096);
930 assem_debug("str %s,fp+%d\n",regname[hr],offset);
931 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
932}
933
934void emit_test(int rs, int rt)
935{
936 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
937 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
938}
939
940void emit_testimm(int rs,int imm)
941{
942 u_int armval;
943 assem_debug("tst %s,$%d\n",regname[rs],imm);
944 assert(genimm(imm,&armval));
945 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
946}
947
948void emit_not(int rs,int rt)
949{
950 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
951 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
952}
953
954void emit_and(u_int rs1,u_int rs2,u_int rt)
955{
956 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
957 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
958}
959
960void emit_or(u_int rs1,u_int rs2,u_int rt)
961{
962 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
963 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
964}
965void emit_or_and_set_flags(int rs1,int rs2,int rt)
966{
967 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
968 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
969}
970
971void emit_xor(u_int rs1,u_int rs2,u_int rt)
972{
973 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
974 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
975}
976
977void emit_loadlp(u_int imm,u_int rt)
978{
979 add_literal((int)out,imm);
980 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
981 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
982}
983void emit_movw(u_int imm,u_int rt)
984{
985 assert(imm<65536);
986 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
987 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
988}
989void emit_movt(u_int imm,u_int rt)
990{
991 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
992 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
993}
994void emit_movimm(u_int imm,u_int rt)
995{
996 u_int armval;
997 if(genimm(imm,&armval)) {
998 assem_debug("mov %s,#%d\n",regname[rt],imm);
999 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1000 }else if(genimm(~imm,&armval)) {
1001 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1002 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1003 }else if(imm<65536) {
1004 #ifdef ARMv5_ONLY
1005 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1006 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1007 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1008 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1009 #else
1010 emit_movw(imm,rt);
1011 #endif
1012 }else{
1013 #ifdef ARMv5_ONLY
1014 emit_loadlp(imm,rt);
1015 #else
1016 emit_movw(imm&0x0000FFFF,rt);
1017 emit_movt(imm&0xFFFF0000,rt);
1018 #endif
1019 }
1020}
1021void emit_pcreladdr(u_int rt)
1022{
1023 assem_debug("add %s,pc,#?\n",regname[rt]);
1024 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1025}
1026
1027void emit_addimm(u_int rs,int imm,u_int rt)
1028{
1029 assert(rs<16);
1030 assert(rt<16);
1031 if(imm!=0) {
1032 assert(imm>-65536&&imm<65536);
1033 u_int armval;
1034 if(genimm(imm,&armval)) {
1035 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1036 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1037 }else if(genimm(-imm,&armval)) {
1038 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1039 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1040 }else if(imm<0) {
1041 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1042 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1043 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1044 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1045 }else{
1046 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1047 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1048 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1049 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1050 }
1051 }
1052 else if(rs!=rt) emit_mov(rs,rt);
1053}
1054
1055void emit_addimm_and_set_flags(int imm,int rt)
1056{
1057 assert(imm>-65536&&imm<65536);
1058 u_int armval;
1059 if(genimm(imm,&armval)) {
1060 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1061 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1062 }else if(genimm(-imm,&armval)) {
1063 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1064 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1065 }else if(imm<0) {
1066 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1067 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1068 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1069 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1070 }else{
1071 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1072 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1073 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1074 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1075 }
1076}
1077void emit_addimm_no_flags(u_int imm,u_int rt)
1078{
1079 emit_addimm(rt,imm,rt);
1080}
1081
1082void emit_addnop(u_int r)
1083{
1084 assert(r<16);
1085 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1086 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1087}
1088
1089void emit_adcimm(u_int rs,int imm,u_int rt)
1090{
1091 u_int armval;
1092 assert(genimm(imm,&armval));
1093 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1094 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1095}
1096/*void emit_sbcimm(int imm,u_int rt)
1097{
1098 u_int armval;
1099 assert(genimm(imm,&armval));
1100 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1101 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1102}*/
1103void emit_sbbimm(int imm,u_int rt)
1104{
1105 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1106 assert(rt<8);
1107 if(imm<128&&imm>=-128) {
1108 output_byte(0x83);
1109 output_modrm(3,rt,3);
1110 output_byte(imm);
1111 }
1112 else
1113 {
1114 output_byte(0x81);
1115 output_modrm(3,rt,3);
1116 output_w32(imm);
1117 }
1118}
1119void emit_rscimm(int rs,int imm,u_int rt)
1120{
1121 assert(0);
1122 u_int armval;
1123 assert(genimm(imm,&armval));
1124 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1125 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1126}
1127
1128void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1129{
1130 // TODO: if(genimm(imm,&armval)) ...
1131 // else
1132 emit_movimm(imm,HOST_TEMPREG);
1133 emit_adds(HOST_TEMPREG,rsl,rtl);
1134 emit_adcimm(rsh,0,rth);
1135}
1136
1137void emit_sbb(int rs1,int rs2)
1138{
1139 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1140 output_byte(0x19);
1141 output_modrm(3,rs1,rs2);
1142}
1143
1144void emit_andimm(int rs,int imm,int rt)
1145{
1146 u_int armval;
1147 if(genimm(imm,&armval)) {
1148 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1149 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1150 }else if(genimm(~imm,&armval)) {
1151 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1152 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1153 }else if(imm==65535) {
1154 #ifdef ARMv5_ONLY
1155 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1156 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1157 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1158 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1159 #else
1160 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1161 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1162 #endif
1163 }else{
1164 assert(imm>0&&imm<65535);
1165 #ifdef ARMv5_ONLY
1166 assem_debug("mov r14,#%d\n",imm&0xFF00);
1167 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1168 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1169 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1170 #else
1171 emit_movw(imm,HOST_TEMPREG);
1172 #endif
1173 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1174 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1175 }
1176}
1177
1178void emit_orimm(int rs,int imm,int rt)
1179{
1180 u_int armval;
1181 if(genimm(imm,&armval)) {
1182 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1183 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1184 }else{
1185 assert(imm>0&&imm<65536);
1186 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1187 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1188 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1189 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1190 }
1191}
1192
1193void emit_xorimm(int rs,int imm,int rt)
1194{
1195 assert(imm>0&&imm<65536);
1196 u_int armval;
1197 if(genimm(imm,&armval)) {
1198 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1199 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1200 }else{
1201 assert(imm>0);
1202 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1203 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1204 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1205 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1206 }
1207}
1208
1209void emit_shlimm(int rs,u_int imm,int rt)
1210{
1211 assert(imm>0);
1212 assert(imm<32);
1213 //if(imm==1) ...
1214 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1215 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1216}
1217
1218void emit_shrimm(int rs,u_int imm,int rt)
1219{
1220 assert(imm>0);
1221 assert(imm<32);
1222 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1223 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1224}
1225
1226void emit_sarimm(int rs,u_int imm,int rt)
1227{
1228 assert(imm>0);
1229 assert(imm<32);
1230 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1231 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1232}
1233
1234void emit_rorimm(int rs,u_int imm,int rt)
1235{
1236 assert(imm>0);
1237 assert(imm<32);
1238 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1239 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1240}
1241
1242void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1243{
1244 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1245 assert(imm>0);
1246 assert(imm<32);
1247 //if(imm==1) ...
1248 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1249 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1250 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1251 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1252}
1253
1254void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1255{
1256 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1257 assert(imm>0);
1258 assert(imm<32);
1259 //if(imm==1) ...
1260 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1261 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1262 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1263 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1264}
1265
1266void emit_shl(u_int rs,u_int shift,u_int rt)
1267{
1268 assert(rs<16);
1269 assert(rt<16);
1270 assert(shift<16);
1271 //if(imm==1) ...
1272 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1273 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1274}
1275void emit_shr(u_int rs,u_int shift,u_int rt)
1276{
1277 assert(rs<16);
1278 assert(rt<16);
1279 assert(shift<16);
1280 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1281 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1282}
1283void emit_sar(u_int rs,u_int shift,u_int rt)
1284{
1285 assert(rs<16);
1286 assert(rt<16);
1287 assert(shift<16);
1288 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1289 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1290}
1291void emit_shlcl(int r)
1292{
1293 assem_debug("shl %%%s,%%cl\n",regname[r]);
1294 assert(0);
1295}
1296void emit_shrcl(int r)
1297{
1298 assem_debug("shr %%%s,%%cl\n",regname[r]);
1299 assert(0);
1300}
1301void emit_sarcl(int r)
1302{
1303 assem_debug("sar %%%s,%%cl\n",regname[r]);
1304 assert(0);
1305}
1306
1307void emit_shldcl(int r1,int r2)
1308{
1309 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1310 assert(0);
1311}
1312void emit_shrdcl(int r1,int r2)
1313{
1314 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1315 assert(0);
1316}
1317void emit_orrshl(u_int rs,u_int shift,u_int rt)
1318{
1319 assert(rs<16);
1320 assert(rt<16);
1321 assert(shift<16);
1322 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1323 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1324}
1325void emit_orrshr(u_int rs,u_int shift,u_int rt)
1326{
1327 assert(rs<16);
1328 assert(rt<16);
1329 assert(shift<16);
1330 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1331 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1332}
1333
1334void emit_cmpimm(int rs,int imm)
1335{
1336 u_int armval;
1337 if(genimm(imm,&armval)) {
1338 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1339 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1340 }else if(genimm(-imm,&armval)) {
1341 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1342 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1343 }else if(imm>0) {
1344 assert(imm<65536);
1345 #ifdef ARMv5_ONLY
1346 emit_movimm(imm,HOST_TEMPREG);
1347 #else
1348 emit_movw(imm,HOST_TEMPREG);
1349 #endif
1350 assem_debug("cmp %s,r14\n",regname[rs]);
1351 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1352 }else{
1353 assert(imm>-65536);
1354 #ifdef ARMv5_ONLY
1355 emit_movimm(-imm,HOST_TEMPREG);
1356 #else
1357 emit_movw(-imm,HOST_TEMPREG);
1358 #endif
1359 assem_debug("cmn %s,r14\n",regname[rs]);
1360 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1361 }
1362}
1363
1364void emit_cmovne(u_int *addr,int rt)
1365{
1366 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1367 assert(0);
1368}
1369void emit_cmovl(u_int *addr,int rt)
1370{
1371 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1372 assert(0);
1373}
1374void emit_cmovs(u_int *addr,int rt)
1375{
1376 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1377 assert(0);
1378}
1379void emit_cmovne_imm(int imm,int rt)
1380{
1381 assem_debug("movne %s,#%d\n",regname[rt],imm);
1382 u_int armval;
1383 assert(genimm(imm,&armval));
1384 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1385}
1386void emit_cmovl_imm(int imm,int rt)
1387{
1388 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1389 u_int armval;
1390 assert(genimm(imm,&armval));
1391 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1392}
1393void emit_cmovb_imm(int imm,int rt)
1394{
1395 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1396 u_int armval;
1397 assert(genimm(imm,&armval));
1398 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1399}
1400void emit_cmovs_imm(int imm,int rt)
1401{
1402 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1403 u_int armval;
1404 assert(genimm(imm,&armval));
1405 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1406}
1407void emit_cmove_reg(int rs,int rt)
1408{
1409 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1410 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1411}
1412void emit_cmovne_reg(int rs,int rt)
1413{
1414 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1415 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1416}
1417void emit_cmovl_reg(int rs,int rt)
1418{
1419 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1420 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1421}
1422void emit_cmovs_reg(int rs,int rt)
1423{
1424 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1425 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1426}
1427
1428void emit_slti32(int rs,int imm,int rt)
1429{
1430 if(rs!=rt) emit_zeroreg(rt);
1431 emit_cmpimm(rs,imm);
1432 if(rs==rt) emit_movimm(0,rt);
1433 emit_cmovl_imm(1,rt);
1434}
1435void emit_sltiu32(int rs,int imm,int rt)
1436{
1437 if(rs!=rt) emit_zeroreg(rt);
1438 emit_cmpimm(rs,imm);
1439 if(rs==rt) emit_movimm(0,rt);
1440 emit_cmovb_imm(1,rt);
1441}
1442void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1443{
1444 assert(rsh!=rt);
1445 emit_slti32(rsl,imm,rt);
1446 if(imm>=0)
1447 {
1448 emit_test(rsh,rsh);
1449 emit_cmovne_imm(0,rt);
1450 emit_cmovs_imm(1,rt);
1451 }
1452 else
1453 {
1454 emit_cmpimm(rsh,-1);
1455 emit_cmovne_imm(0,rt);
1456 emit_cmovl_imm(1,rt);
1457 }
1458}
1459void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1460{
1461 assert(rsh!=rt);
1462 emit_sltiu32(rsl,imm,rt);
1463 if(imm>=0)
1464 {
1465 emit_test(rsh,rsh);
1466 emit_cmovne_imm(0,rt);
1467 }
1468 else
1469 {
1470 emit_cmpimm(rsh,-1);
1471 emit_cmovne_imm(1,rt);
1472 }
1473}
1474
1475void emit_cmp(int rs,int rt)
1476{
1477 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1478 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1479}
1480void emit_set_gz32(int rs, int rt)
1481{
1482 //assem_debug("set_gz32\n");
1483 emit_cmpimm(rs,1);
1484 emit_movimm(1,rt);
1485 emit_cmovl_imm(0,rt);
1486}
1487void emit_set_nz32(int rs, int rt)
1488{
1489 //assem_debug("set_nz32\n");
1490 if(rs!=rt) emit_movs(rs,rt);
1491 else emit_test(rs,rs);
1492 emit_cmovne_imm(1,rt);
1493}
1494void emit_set_gz64_32(int rsh, int rsl, int rt)
1495{
1496 //assem_debug("set_gz64\n");
1497 emit_set_gz32(rsl,rt);
1498 emit_test(rsh,rsh);
1499 emit_cmovne_imm(1,rt);
1500 emit_cmovs_imm(0,rt);
1501}
1502void emit_set_nz64_32(int rsh, int rsl, int rt)
1503{
1504 //assem_debug("set_nz64\n");
1505 emit_or_and_set_flags(rsh,rsl,rt);
1506 emit_cmovne_imm(1,rt);
1507}
1508void emit_set_if_less32(int rs1, int rs2, int rt)
1509{
1510 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1511 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1512 emit_cmp(rs1,rs2);
1513 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1514 emit_cmovl_imm(1,rt);
1515}
1516void emit_set_if_carry32(int rs1, int rs2, int rt)
1517{
1518 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1519 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1520 emit_cmp(rs1,rs2);
1521 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1522 emit_cmovb_imm(1,rt);
1523}
1524void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1525{
1526 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1527 assert(u1!=rt);
1528 assert(u2!=rt);
1529 emit_cmp(l1,l2);
1530 emit_movimm(0,rt);
1531 emit_sbcs(u1,u2,HOST_TEMPREG);
1532 emit_cmovl_imm(1,rt);
1533}
1534void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1535{
1536 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1537 assert(u1!=rt);
1538 assert(u2!=rt);
1539 emit_cmp(l1,l2);
1540 emit_movimm(0,rt);
1541 emit_sbcs(u1,u2,HOST_TEMPREG);
1542 emit_cmovb_imm(1,rt);
1543}
1544
1545void emit_call(int a)
1546{
1547 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1548 u_int offset=genjmp(a);
1549 output_w32(0xeb000000|offset);
1550}
1551void emit_jmp(int a)
1552{
1553 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1554 u_int offset=genjmp(a);
1555 output_w32(0xea000000|offset);
1556}
1557void emit_jne(int a)
1558{
1559 assem_debug("bne %x\n",a);
1560 u_int offset=genjmp(a);
1561 output_w32(0x1a000000|offset);
1562}
1563void emit_jeq(int a)
1564{
1565 assem_debug("beq %x\n",a);
1566 u_int offset=genjmp(a);
1567 output_w32(0x0a000000|offset);
1568}
1569void emit_js(int a)
1570{
1571 assem_debug("bmi %x\n",a);
1572 u_int offset=genjmp(a);
1573 output_w32(0x4a000000|offset);
1574}
1575void emit_jns(int a)
1576{
1577 assem_debug("bpl %x\n",a);
1578 u_int offset=genjmp(a);
1579 output_w32(0x5a000000|offset);
1580}
1581void emit_jl(int a)
1582{
1583 assem_debug("blt %x\n",a);
1584 u_int offset=genjmp(a);
1585 output_w32(0xba000000|offset);
1586}
1587void emit_jge(int a)
1588{
1589 assem_debug("bge %x\n",a);
1590 u_int offset=genjmp(a);
1591 output_w32(0xaa000000|offset);
1592}
1593void emit_jno(int a)
1594{
1595 assem_debug("bvc %x\n",a);
1596 u_int offset=genjmp(a);
1597 output_w32(0x7a000000|offset);
1598}
1599void emit_jc(int a)
1600{
1601 assem_debug("bcs %x\n",a);
1602 u_int offset=genjmp(a);
1603 output_w32(0x2a000000|offset);
1604}
1605void emit_jcc(int a)
1606{
1607 assem_debug("bcc %x\n",a);
1608 u_int offset=genjmp(a);
1609 output_w32(0x3a000000|offset);
1610}
1611
1612void emit_pushimm(int imm)
1613{
1614 assem_debug("push $%x\n",imm);
1615 assert(0);
1616}
1617void emit_pusha()
1618{
1619 assem_debug("pusha\n");
1620 assert(0);
1621}
1622void emit_popa()
1623{
1624 assem_debug("popa\n");
1625 assert(0);
1626}
1627void emit_pushreg(u_int r)
1628{
1629 assem_debug("push %%%s\n",regname[r]);
1630 assert(0);
1631}
1632void emit_popreg(u_int r)
1633{
1634 assem_debug("pop %%%s\n",regname[r]);
1635 assert(0);
1636}
1637void emit_callreg(u_int r)
1638{
1639 assem_debug("call *%%%s\n",regname[r]);
1640 assert(0);
1641}
1642void emit_jmpreg(u_int r)
1643{
1644 assem_debug("mov pc,%s\n",regname[r]);
1645 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1646}
1647
1648void emit_readword_indexed(int offset, int rs, int rt)
1649{
1650 assert(offset>-4096&&offset<4096);
1651 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1652 if(offset>=0) {
1653 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1654 }else{
1655 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1656 }
1657}
1658void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1659{
1660 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1661 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1662}
1663void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1664{
1665 if(map<0) emit_readword_indexed(addr, rs, rt);
1666 else {
1667 assert(addr==0);
1668 emit_readword_dualindexedx4(rs, map, rt);
1669 }
1670}
1671void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1672{
1673 if(map<0) {
1674 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1675 emit_readword_indexed(addr+4, rs, rl);
1676 }else{
1677 assert(rh!=rs);
1678 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1679 emit_addimm(map,1,map);
1680 emit_readword_indexed_tlb(addr, rs, map, rl);
1681 }
1682}
1683void emit_movsbl_indexed(int offset, int rs, int rt)
1684{
1685 assert(offset>-256&&offset<256);
1686 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1687 if(offset>=0) {
1688 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1689 }else{
1690 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1691 }
1692}
1693void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1694{
1695 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1696 else {
1697 if(addr==0) {
1698 emit_shlimm(map,2,map);
1699 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1700 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1701 }else{
1702 assert(addr>-256&&addr<256);
1703 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1704 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1705 emit_movsbl_indexed(addr, rt, rt);
1706 }
1707 }
1708}
1709void emit_movswl_indexed(int offset, int rs, int rt)
1710{
1711 assert(offset>-256&&offset<256);
1712 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1713 if(offset>=0) {
1714 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1715 }else{
1716 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1717 }
1718}
1719void emit_movzbl_indexed(int offset, int rs, int rt)
1720{
1721 assert(offset>-4096&&offset<4096);
1722 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1723 if(offset>=0) {
1724 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1725 }else{
1726 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1727 }
1728}
1729void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1730{
1731 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1732 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1733}
1734void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1735{
1736 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1737 else {
1738 if(addr==0) {
1739 emit_movzbl_dualindexedx4(rs, map, rt);
1740 }else{
1741 emit_addimm(rs,addr,rt);
1742 emit_movzbl_dualindexedx4(rt, map, rt);
1743 }
1744 }
1745}
1746void emit_movzwl_indexed(int offset, int rs, int rt)
1747{
1748 assert(offset>-256&&offset<256);
1749 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1750 if(offset>=0) {
1751 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1752 }else{
1753 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1754 }
1755}
1756void emit_readword(int addr, int rt)
1757{
1758 u_int offset = addr-(u_int)&dynarec_local;
1759 assert(offset<4096);
1760 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1761 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1762}
1763void emit_movsbl(int addr, int rt)
1764{
1765 u_int offset = addr-(u_int)&dynarec_local;
1766 assert(offset<256);
1767 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1768 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1769}
1770void emit_movswl(int addr, int rt)
1771{
1772 u_int offset = addr-(u_int)&dynarec_local;
1773 assert(offset<256);
1774 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1775 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1776}
1777void emit_movzbl(int addr, int rt)
1778{
1779 u_int offset = addr-(u_int)&dynarec_local;
1780 assert(offset<4096);
1781 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1782 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1783}
1784void emit_movzwl(int addr, int rt)
1785{
1786 u_int offset = addr-(u_int)&dynarec_local;
1787 assert(offset<256);
1788 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1789 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1790}
1791void emit_movzwl_reg(int rs, int rt)
1792{
1793 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1794 assert(0);
1795}
1796
1797void emit_xchg(int rs, int rt)
1798{
1799 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1800 assert(0);
1801}
1802void emit_writeword_indexed(int rt, int offset, int rs)
1803{
1804 assert(offset>-4096&&offset<4096);
1805 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1806 if(offset>=0) {
1807 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1808 }else{
1809 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1810 }
1811}
1812void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1813{
1814 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1815 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1816}
1817void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1818{
1819 if(map<0) emit_writeword_indexed(rt, addr, rs);
1820 else {
1821 assert(addr==0);
1822 emit_writeword_dualindexedx4(rt, rs, map);
1823 }
1824}
1825void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1826{
1827 if(map<0) {
1828 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1829 emit_writeword_indexed(rl, addr+4, rs);
1830 }else{
1831 assert(rh>=0);
1832 if(temp!=rs) emit_addimm(map,1,temp);
1833 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1834 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1835 else {
1836 emit_addimm(rs,4,rs);
1837 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1838 }
1839 }
1840}
1841void emit_writehword_indexed(int rt, int offset, int rs)
1842{
1843 assert(offset>-256&&offset<256);
1844 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1845 if(offset>=0) {
1846 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1847 }else{
1848 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1849 }
1850}
1851void emit_writebyte_indexed(int rt, int offset, int rs)
1852{
1853 assert(offset>-4096&&offset<4096);
1854 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1855 if(offset>=0) {
1856 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1857 }else{
1858 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1859 }
1860}
1861void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1862{
1863 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1864 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1865}
1866void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1867{
1868 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1869 else {
1870 if(addr==0) {
1871 emit_writebyte_dualindexedx4(rt, rs, map);
1872 }else{
1873 emit_addimm(rs,addr,temp);
1874 emit_writebyte_dualindexedx4(rt, temp, map);
1875 }
1876 }
1877}
1878void emit_writeword(int rt, int addr)
1879{
1880 u_int offset = addr-(u_int)&dynarec_local;
1881 assert(offset<4096);
1882 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1883 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1884}
1885void emit_writehword(int rt, int addr)
1886{
1887 u_int offset = addr-(u_int)&dynarec_local;
1888 assert(offset<256);
1889 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1890 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1891}
1892void emit_writebyte(int rt, int addr)
1893{
1894 u_int offset = addr-(u_int)&dynarec_local;
1895 assert(offset<4096);
1896 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1897 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1898}
1899void emit_writeword_imm(int imm, int addr)
1900{
1901 assem_debug("movl $%x,%x\n",imm,addr);
1902 assert(0);
1903}
1904void emit_writebyte_imm(int imm, int addr)
1905{
1906 assem_debug("movb $%x,%x\n",imm,addr);
1907 assert(0);
1908}
1909
1910void emit_mul(int rs)
1911{
1912 assem_debug("mul %%%s\n",regname[rs]);
1913 assert(0);
1914}
1915void emit_imul(int rs)
1916{
1917 assem_debug("imul %%%s\n",regname[rs]);
1918 assert(0);
1919}
1920void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1921{
1922 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1923 assert(rs1<16);
1924 assert(rs2<16);
1925 assert(hi<16);
1926 assert(lo<16);
1927 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1928}
1929void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1930{
1931 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1932 assert(rs1<16);
1933 assert(rs2<16);
1934 assert(hi<16);
1935 assert(lo<16);
1936 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1937}
1938
1939void emit_div(int rs)
1940{
1941 assem_debug("div %%%s\n",regname[rs]);
1942 assert(0);
1943}
1944void emit_idiv(int rs)
1945{
1946 assem_debug("idiv %%%s\n",regname[rs]);
1947 assert(0);
1948}
1949void emit_cdq()
1950{
1951 assem_debug("cdq\n");
1952 assert(0);
1953}
1954
1955void emit_clz(int rs,int rt)
1956{
1957 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1958 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1959}
1960
1961void emit_subcs(int rs1,int rs2,int rt)
1962{
1963 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1964 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1965}
1966
1967void emit_shrcc_imm(int rs,u_int imm,int rt)
1968{
1969 assert(imm>0);
1970 assert(imm<32);
1971 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1972 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1973}
1974
1975void emit_negmi(int rs, int rt)
1976{
1977 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1978 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1979}
1980
1981void emit_negsmi(int rs, int rt)
1982{
1983 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1984 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1985}
1986
1987void emit_orreq(u_int rs1,u_int rs2,u_int rt)
1988{
1989 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1990 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
1991}
1992
1993void emit_orrne(u_int rs1,u_int rs2,u_int rt)
1994{
1995 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1996 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
1997}
1998
1999void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2000{
2001 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2002 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2003}
2004
2005void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2006{
2007 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2008 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2009}
2010
2011void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2012{
2013 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2014 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2015}
2016
2017void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2018{
2019 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2020 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2021}
2022
2023void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2024{
2025 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2026 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2027}
2028
2029void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2030{
2031 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2032 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2033}
2034
2035void emit_teq(int rs, int rt)
2036{
2037 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2038 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2039}
2040
2041void emit_rsbimm(int rs, int imm, int rt)
2042{
2043 u_int armval;
2044 assert(genimm(imm,&armval));
2045 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2046 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2047}
2048
2049// Load 2 immediates optimizing for small code size
2050void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2051{
2052 emit_movimm(imm1,rt1);
2053 u_int armval;
2054 if(genimm(imm2-imm1,&armval)) {
2055 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2056 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2057 }else if(genimm(imm1-imm2,&armval)) {
2058 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2059 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2060 }
2061 else emit_movimm(imm2,rt2);
2062}
2063
2064// Conditionally select one of two immediates, optimizing for small code size
2065// This will only be called if HAVE_CMOV_IMM is defined
2066void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2067{
2068 u_int armval;
2069 if(genimm(imm2-imm1,&armval)) {
2070 emit_movimm(imm1,rt);
2071 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2072 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2073 }else if(genimm(imm1-imm2,&armval)) {
2074 emit_movimm(imm1,rt);
2075 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2076 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2077 }
2078 else {
2079 #ifdef ARMv5_ONLY
2080 emit_movimm(imm1,rt);
2081 add_literal((int)out,imm2);
2082 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2083 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2084 #else
2085 emit_movw(imm1&0x0000FFFF,rt);
2086 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2087 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2088 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2089 }
2090 emit_movt(imm1&0xFFFF0000,rt);
2091 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2092 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2093 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2094 }
2095 #endif
2096 }
2097}
2098
2099// special case for checking invalid_code
2100void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2101{
2102 assert(0);
2103}
2104
2105// special case for checking invalid_code
2106void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2107{
2108 assert(imm<128&&imm>=0);
2109 assert(r>=0&&r<16);
2110 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2111 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2112 emit_cmpimm(HOST_TEMPREG,imm);
2113}
2114
2115// special case for tlb mapping
2116void emit_addsr12(int rs1,int rs2,int rt)
2117{
2118 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2119 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2120}
2121
2122// Used to preload hash table entries
2123void emit_prefetch(void *addr)
2124{
2125 assem_debug("prefetch %x\n",(int)addr);
2126 output_byte(0x0F);
2127 output_byte(0x18);
2128 output_modrm(0,5,1);
2129 output_w32((int)addr);
2130}
2131void emit_prefetchreg(int r)
2132{
2133 assem_debug("pld %s\n",regname[r]);
2134 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2135}
2136
2137// Special case for mini_ht
2138void emit_ldreq_indexed(int rs, u_int offset, int rt)
2139{
2140 assert(offset<4096);
2141 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2142 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2143}
2144
2145void emit_flds(int r,int sr)
2146{
2147 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2148 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2149}
2150
2151void emit_vldr(int r,int vr)
2152{
2153 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2154 output_w32(0xed900b00|(vr<<12)|(r<<16));
2155}
2156
2157void emit_fsts(int sr,int r)
2158{
2159 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2160 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2161}
2162
2163void emit_vstr(int vr,int r)
2164{
2165 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2166 output_w32(0xed800b00|(vr<<12)|(r<<16));
2167}
2168
2169void emit_ftosizs(int s,int d)
2170{
2171 assem_debug("ftosizs s%d,s%d\n",d,s);
2172 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2173}
2174
2175void emit_ftosizd(int s,int d)
2176{
2177 assem_debug("ftosizd s%d,d%d\n",d,s);
2178 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2179}
2180
2181void emit_fsitos(int s,int d)
2182{
2183 assem_debug("fsitos s%d,s%d\n",d,s);
2184 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2185}
2186
2187void emit_fsitod(int s,int d)
2188{
2189 assem_debug("fsitod d%d,s%d\n",d,s);
2190 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2191}
2192
2193void emit_fcvtds(int s,int d)
2194{
2195 assem_debug("fcvtds d%d,s%d\n",d,s);
2196 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2197}
2198
2199void emit_fcvtsd(int s,int d)
2200{
2201 assem_debug("fcvtsd s%d,d%d\n",d,s);
2202 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2203}
2204
2205void emit_fsqrts(int s,int d)
2206{
2207 assem_debug("fsqrts d%d,s%d\n",d,s);
2208 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2209}
2210
2211void emit_fsqrtd(int s,int d)
2212{
2213 assem_debug("fsqrtd s%d,d%d\n",d,s);
2214 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2215}
2216
2217void emit_fabss(int s,int d)
2218{
2219 assem_debug("fabss d%d,s%d\n",d,s);
2220 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2221}
2222
2223void emit_fabsd(int s,int d)
2224{
2225 assem_debug("fabsd s%d,d%d\n",d,s);
2226 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2227}
2228
2229void emit_fnegs(int s,int d)
2230{
2231 assem_debug("fnegs d%d,s%d\n",d,s);
2232 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2233}
2234
2235void emit_fnegd(int s,int d)
2236{
2237 assem_debug("fnegd s%d,d%d\n",d,s);
2238 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2239}
2240
2241void emit_fadds(int s1,int s2,int d)
2242{
2243 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2244 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2245}
2246
2247void emit_faddd(int s1,int s2,int d)
2248{
2249 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2250 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2251}
2252
2253void emit_fsubs(int s1,int s2,int d)
2254{
2255 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2256 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2257}
2258
2259void emit_fsubd(int s1,int s2,int d)
2260{
2261 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2262 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2263}
2264
2265void emit_fmuls(int s1,int s2,int d)
2266{
2267 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2268 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2269}
2270
2271void emit_fmuld(int s1,int s2,int d)
2272{
2273 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2274 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2275}
2276
2277void emit_fdivs(int s1,int s2,int d)
2278{
2279 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2280 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2281}
2282
2283void emit_fdivd(int s1,int s2,int d)
2284{
2285 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2286 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2287}
2288
2289void emit_fcmps(int x,int y)
2290{
2291 assem_debug("fcmps s14, s15\n");
2292 output_w32(0xeeb47a67);
2293}
2294
2295void emit_fcmpd(int x,int y)
2296{
2297 assem_debug("fcmpd d6, d7\n");
2298 output_w32(0xeeb46b47);
2299}
2300
2301void emit_fmstat()
2302{
2303 assem_debug("fmstat\n");
2304 output_w32(0xeef1fa10);
2305}
2306
2307void emit_bicne_imm(int rs,int imm,int rt)
2308{
2309 u_int armval;
2310 assert(genimm(imm,&armval));
2311 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2312 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2313}
2314
2315void emit_biccs_imm(int rs,int imm,int rt)
2316{
2317 u_int armval;
2318 assert(genimm(imm,&armval));
2319 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2320 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2321}
2322
2323void emit_bicvc_imm(int rs,int imm,int rt)
2324{
2325 u_int armval;
2326 assert(genimm(imm,&armval));
2327 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2328 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2329}
2330
2331void emit_bichi_imm(int rs,int imm,int rt)
2332{
2333 u_int armval;
2334 assert(genimm(imm,&armval));
2335 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2336 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2337}
2338
2339void emit_orrvs_imm(int rs,int imm,int rt)
2340{
2341 u_int armval;
2342 assert(genimm(imm,&armval));
2343 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2344 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2345}
2346
2347void emit_jno_unlikely(int a)
2348{
2349 //emit_jno(a);
2350 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2351 output_w32(0x72800000|rd_rn_rm(15,15,0));
2352}
2353
2354// Save registers before function call
2355void save_regs(u_int reglist)
2356{
2357 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2358 if(!reglist) return;
2359 assem_debug("stmia fp,{");
2360 if(reglist&1) assem_debug("r0, ");
2361 if(reglist&2) assem_debug("r1, ");
2362 if(reglist&4) assem_debug("r2, ");
2363 if(reglist&8) assem_debug("r3, ");
2364 if(reglist&0x1000) assem_debug("r12");
2365 assem_debug("}\n");
2366 output_w32(0xe88b0000|reglist);
2367}
2368// Restore registers after function call
2369void restore_regs(u_int reglist)
2370{
2371 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2372 if(!reglist) return;
2373 assem_debug("ldmia fp,{");
2374 if(reglist&1) assem_debug("r0, ");
2375 if(reglist&2) assem_debug("r1, ");
2376 if(reglist&4) assem_debug("r2, ");
2377 if(reglist&8) assem_debug("r3, ");
2378 if(reglist&0x1000) assem_debug("r12");
2379 assem_debug("}\n");
2380 output_w32(0xe89b0000|reglist);
2381}
2382
2383// Write back consts using r14 so we don't disturb the other registers
2384void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2385{
2386 int hr;
2387 for(hr=0;hr<HOST_REGS;hr++) {
2388 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2389 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2390 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2391 int value=constmap[i][hr];
2392 if(value==0) {
2393 emit_zeroreg(HOST_TEMPREG);
2394 }
2395 else {
2396 emit_movimm(value,HOST_TEMPREG);
2397 }
2398 emit_storereg(i_regmap[hr],HOST_TEMPREG);
2399 if((i_is32>>i_regmap[hr])&1) {
2400 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2401 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2402 }
2403 }
2404 }
2405 }
2406 }
2407}
2408
2409/* Stubs/epilogue */
2410
2411void literal_pool(int n)
2412{
2413 if(!literalcount) return;
2414 if(n) {
2415 if((int)out-literals[0][0]<4096-n) return;
2416 }
2417 u_int *ptr;
2418 int i;
2419 for(i=0;i<literalcount;i++)
2420 {
2421 ptr=(u_int *)literals[i][0];
2422 u_int offset=(u_int)out-(u_int)ptr-8;
2423 assert(offset<4096);
2424 assert(!(offset&3));
2425 *ptr|=offset;
2426 output_w32(literals[i][1]);
2427 }
2428 literalcount=0;
2429}
2430
2431void literal_pool_jumpover(int n)
2432{
2433 if(!literalcount) return;
2434 if(n) {
2435 if((int)out-literals[0][0]<4096-n) return;
2436 }
2437 int jaddr=(int)out;
2438 emit_jmp(0);
2439 literal_pool(0);
2440 set_jump_target(jaddr,(int)out);
2441}
2442
2443emit_extjump2(int addr, int target, int linker)
2444{
2445 u_char *ptr=(u_char *)addr;
2446 assert((ptr[3]&0x0e)==0xa);
2447 emit_loadlp(target,0);
2448 emit_loadlp(addr,1);
2449 assert(addr>=0x7000000&&addr<0x7FFFFFF);
2450 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2451//DEBUG >
2452#ifdef DEBUG_CYCLE_COUNT
2453 emit_readword((int)&last_count,ECX);
2454 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2455 emit_readword((int)&next_interupt,ECX);
2456 emit_writeword(HOST_CCREG,(int)&Count);
2457 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2458 emit_writeword(ECX,(int)&last_count);
2459#endif
2460//DEBUG <
2461 emit_jmp(linker);
2462}
2463
2464emit_extjump(int addr, int target)
2465{
2466 emit_extjump2(addr, target, (int)dyna_linker);
2467}
2468emit_extjump_ds(int addr, int target)
2469{
2470 emit_extjump2(addr, target, (int)dyna_linker_ds);
2471}
2472
2473do_readstub(int n)
2474{
2475 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2476 literal_pool(256);
2477 set_jump_target(stubs[n][1],(int)out);
2478 int type=stubs[n][0];
2479 int i=stubs[n][3];
2480 int rs=stubs[n][4];
2481 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2482 u_int reglist=stubs[n][7];
2483 signed char *i_regmap=i_regs->regmap;
2484 int addr=get_reg(i_regmap,AGEN1+(i&1));
2485 int rth,rt;
2486 int ds;
2487 if(itype[i]==C1LS||itype[i]==LOADLR) {
2488 rth=get_reg(i_regmap,FTEMP|64);
2489 rt=get_reg(i_regmap,FTEMP);
2490 }else{
2491 rth=get_reg(i_regmap,rt1[i]|64);
2492 rt=get_reg(i_regmap,rt1[i]);
2493 }
2494 assert(rs>=0);
2495 assert(rt>=0);
2496 if(addr<0) addr=rt;
2497 assert(addr>=0);
2498 int ftable=0;
2499 if(type==LOADB_STUB||type==LOADBU_STUB)
2500 ftable=(int)readmemb;
2501 if(type==LOADH_STUB||type==LOADHU_STUB)
2502 ftable=(int)readmemh;
2503 if(type==LOADW_STUB)
2504 ftable=(int)readmem;
2505 if(type==LOADD_STUB)
2506 ftable=(int)readmemd;
2507 emit_writeword(rs,(int)&address);
2508 //emit_pusha();
2509 save_regs(reglist);
2510 ds=i_regs!=&regs[i];
2511 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2512 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2513 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2514 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2515 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2516 emit_shrimm(rs,16,1);
2517 int cc=get_reg(i_regmap,CCREG);
2518 if(cc<0) {
2519 emit_loadreg(CCREG,2);
2520 }
2521 emit_movimm(ftable,0);
2522 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2523 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2524 //emit_readword((int)&last_count,temp);
2525 //emit_add(cc,temp,cc);
2526 //emit_writeword(cc,(int)&Count);
2527 //emit_mov(15,14);
2528 emit_call((int)&indirect_jump_indexed);
2529 //emit_callreg(rs);
2530 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2531 // We really shouldn't need to update the count here,
2532 // but not doing so causes random crashes...
2533 emit_readword((int)&Count,HOST_TEMPREG);
2534 emit_readword((int)&next_interupt,2);
2535 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2536 emit_writeword(2,(int)&last_count);
2537 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2538 if(cc<0) {
2539 emit_storereg(CCREG,HOST_TEMPREG);
2540 }
2541 //emit_popa();
2542 restore_regs(reglist);
2543 //if((cc=get_reg(regmap,CCREG))>=0) {
2544 // emit_loadreg(CCREG,cc);
2545 //}
2546 if(type==LOADB_STUB)
2547 emit_movsbl((int)&readmem_dword,rt);
2548 if(type==LOADBU_STUB)
2549 emit_movzbl((int)&readmem_dword,rt);
2550 if(type==LOADH_STUB)
2551 emit_movswl((int)&readmem_dword,rt);
2552 if(type==LOADHU_STUB)
2553 emit_movzwl((int)&readmem_dword,rt);
2554 if(type==LOADW_STUB)
2555 emit_readword((int)&readmem_dword,rt);
2556 if(type==LOADD_STUB) {
2557 emit_readword((int)&readmem_dword,rt);
2558 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2559 }
2560 emit_jmp(stubs[n][2]); // return address
2561}
2562
2563inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2564{
2565 int rs=get_reg(regmap,target);
2566 int rth=get_reg(regmap,target|64);
2567 int rt=get_reg(regmap,target);
2568 assert(rs>=0);
2569 assert(rt>=0);
2570 int ftable=0;
2571 if(type==LOADB_STUB||type==LOADBU_STUB)
2572 ftable=(int)readmemb;
2573 if(type==LOADH_STUB||type==LOADHU_STUB)
2574 ftable=(int)readmemh;
2575 if(type==LOADW_STUB)
2576 ftable=(int)readmem;
2577 if(type==LOADD_STUB)
2578 ftable=(int)readmemd;
2579 emit_writeword(rs,(int)&address);
2580 //emit_pusha();
2581 save_regs(reglist);
2582 //emit_shrimm(rs,16,1);
2583 int cc=get_reg(regmap,CCREG);
2584 if(cc<0) {
2585 emit_loadreg(CCREG,2);
2586 }
2587 //emit_movimm(ftable,0);
2588 emit_movimm(((u_int *)ftable)[addr>>16],0);
2589 //emit_readword((int)&last_count,12);
2590 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2591 if((signed int)addr>=(signed int)0xC0000000) {
2592 // Pagefault address
2593 int ds=regmap!=regs[i].regmap;
2594 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2595 }
2596 //emit_add(12,2,2);
2597 //emit_writeword(2,(int)&Count);
2598 //emit_call(((u_int *)ftable)[addr>>16]);
2599 emit_call((int)&indirect_jump);
2600 // We really shouldn't need to update the count here,
2601 // but not doing so causes random crashes...
2602 emit_readword((int)&Count,HOST_TEMPREG);
2603 emit_readword((int)&next_interupt,2);
2604 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2605 emit_writeword(2,(int)&last_count);
2606 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2607 if(cc<0) {
2608 emit_storereg(CCREG,HOST_TEMPREG);
2609 }
2610 //emit_popa();
2611 restore_regs(reglist);
2612 if(type==LOADB_STUB)
2613 emit_movsbl((int)&readmem_dword,rt);
2614 if(type==LOADBU_STUB)
2615 emit_movzbl((int)&readmem_dword,rt);
2616 if(type==LOADH_STUB)
2617 emit_movswl((int)&readmem_dword,rt);
2618 if(type==LOADHU_STUB)
2619 emit_movzwl((int)&readmem_dword,rt);
2620 if(type==LOADW_STUB)
2621 emit_readword((int)&readmem_dword,rt);
2622 if(type==LOADD_STUB) {
2623 emit_readword((int)&readmem_dword,rt);
2624 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2625 }
2626}
2627
2628do_writestub(int n)
2629{
2630 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2631 literal_pool(256);
2632 set_jump_target(stubs[n][1],(int)out);
2633 int type=stubs[n][0];
2634 int i=stubs[n][3];
2635 int rs=stubs[n][4];
2636 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2637 u_int reglist=stubs[n][7];
2638 signed char *i_regmap=i_regs->regmap;
2639 int addr=get_reg(i_regmap,AGEN1+(i&1));
2640 int rth,rt,r;
2641 int ds;
2642 if(itype[i]==C1LS) {
2643 rth=get_reg(i_regmap,FTEMP|64);
2644 rt=get_reg(i_regmap,r=FTEMP);
2645 }else{
2646 rth=get_reg(i_regmap,rs2[i]|64);
2647 rt=get_reg(i_regmap,r=rs2[i]);
2648 }
2649 assert(rs>=0);
2650 assert(rt>=0);
2651 if(addr<0) addr=get_reg(i_regmap,-1);
2652 assert(addr>=0);
2653 int ftable=0;
2654 if(type==STOREB_STUB)
2655 ftable=(int)writememb;
2656 if(type==STOREH_STUB)
2657 ftable=(int)writememh;
2658 if(type==STOREW_STUB)
2659 ftable=(int)writemem;
2660 if(type==STORED_STUB)
2661 ftable=(int)writememd;
2662 emit_writeword(rs,(int)&address);
2663 //emit_shrimm(rs,16,rs);
2664 //emit_movmem_indexedx4(ftable,rs,rs);
2665 if(type==STOREB_STUB)
2666 emit_writebyte(rt,(int)&byte);
2667 if(type==STOREH_STUB)
2668 emit_writehword(rt,(int)&hword);
2669 if(type==STOREW_STUB)
2670 emit_writeword(rt,(int)&word);
2671 if(type==STORED_STUB) {
2672 emit_writeword(rt,(int)&dword);
2673 emit_writeword(r?rth:rt,(int)&dword+4);
2674 }
2675 //emit_pusha();
2676 save_regs(reglist);
2677 ds=i_regs!=&regs[i];
2678 int real_rs=get_reg(i_regmap,rs1[i]);
2679 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2680 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2681 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2682 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2683 emit_shrimm(rs,16,1);
2684 int cc=get_reg(i_regmap,CCREG);
2685 if(cc<0) {
2686 emit_loadreg(CCREG,2);
2687 }
2688 emit_movimm(ftable,0);
2689 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2690 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2691 //emit_readword((int)&last_count,temp);
2692 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2693 //emit_add(cc,temp,cc);
2694 //emit_writeword(cc,(int)&Count);
2695 emit_call((int)&indirect_jump_indexed);
2696 //emit_callreg(rs);
2697 emit_readword((int)&Count,HOST_TEMPREG);
2698 emit_readword((int)&next_interupt,2);
2699 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2700 emit_writeword(2,(int)&last_count);
2701 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2702 if(cc<0) {
2703 emit_storereg(CCREG,HOST_TEMPREG);
2704 }
2705 //emit_popa();
2706 restore_regs(reglist);
2707 //if((cc=get_reg(regmap,CCREG))>=0) {
2708 // emit_loadreg(CCREG,cc);
2709 //}
2710 emit_jmp(stubs[n][2]); // return address
2711}
2712
2713inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2714{
2715 int rs=get_reg(regmap,-1);
2716 int rth=get_reg(regmap,target|64);
2717 int rt=get_reg(regmap,target);
2718 assert(rs>=0);
2719 assert(rt>=0);
2720 int ftable=0;
2721 if(type==STOREB_STUB)
2722 ftable=(int)writememb;
2723 if(type==STOREH_STUB)
2724 ftable=(int)writememh;
2725 if(type==STOREW_STUB)
2726 ftable=(int)writemem;
2727 if(type==STORED_STUB)
2728 ftable=(int)writememd;
2729 emit_writeword(rs,(int)&address);
2730 //emit_shrimm(rs,16,rs);
2731 //emit_movmem_indexedx4(ftable,rs,rs);
2732 if(type==STOREB_STUB)
2733 emit_writebyte(rt,(int)&byte);
2734 if(type==STOREH_STUB)
2735 emit_writehword(rt,(int)&hword);
2736 if(type==STOREW_STUB)
2737 emit_writeword(rt,(int)&word);
2738 if(type==STORED_STUB) {
2739 emit_writeword(rt,(int)&dword);
2740 emit_writeword(target?rth:rt,(int)&dword+4);
2741 }
2742 //emit_pusha();
2743 save_regs(reglist);
2744 //emit_shrimm(rs,16,1);
2745 int cc=get_reg(regmap,CCREG);
2746 if(cc<0) {
2747 emit_loadreg(CCREG,2);
2748 }
2749 //emit_movimm(ftable,0);
2750 emit_movimm(((u_int *)ftable)[addr>>16],0);
2751 //emit_readword((int)&last_count,12);
2752 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2753 if((signed int)addr>=(signed int)0xC0000000) {
2754 // Pagefault address
2755 int ds=regmap!=regs[i].regmap;
2756 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2757 }
2758 //emit_add(12,2,2);
2759 //emit_writeword(2,(int)&Count);
2760 //emit_call(((u_int *)ftable)[addr>>16]);
2761 emit_call((int)&indirect_jump);
2762 emit_readword((int)&Count,HOST_TEMPREG);
2763 emit_readword((int)&next_interupt,2);
2764 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2765 emit_writeword(2,(int)&last_count);
2766 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2767 if(cc<0) {
2768 emit_storereg(CCREG,HOST_TEMPREG);
2769 }
2770 //emit_popa();
2771 restore_regs(reglist);
2772}
2773
2774do_unalignedwritestub(int n)
2775{
2776 set_jump_target(stubs[n][1],(int)out);
2777 output_w32(0xef000000);
2778 emit_jmp(stubs[n][2]); // return address
2779}
2780
2781void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2782{
2783 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2784}
2785
2786do_invstub(int n)
2787{
2788 literal_pool(20);
2789 u_int reglist=stubs[n][3];
2790 set_jump_target(stubs[n][1],(int)out);
2791 save_regs(reglist);
2792 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2793 emit_call((int)&invalidate_addr);
2794 restore_regs(reglist);
2795 emit_jmp(stubs[n][2]); // return address
2796}
2797
2798int do_dirty_stub(int i)
2799{
2800 assem_debug("do_dirty_stub %x\n",start+i*4);
2801 // Careful about the code output here, verify_dirty needs to parse it.
2802 #ifdef ARMv5_ONLY
2803 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2804 emit_loadlp((int)copy,2);
2805 emit_loadlp(slen*4,3);
2806 #else
2807 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2808 emit_movw(((u_int)copy)&0x0000FFFF,2);
2809 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2810 emit_movt(((u_int)copy)&0xFFFF0000,2);
2811 emit_movw(slen*4,3);
2812 #endif
2813 emit_movimm(start+i*4,0);
2814 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2815 int entry=(int)out;
2816 load_regs_entry(i);
2817 if(entry==(int)out) entry=instr_addr[i];
2818 emit_jmp(instr_addr[i]);
2819 return entry;
2820}
2821
2822void do_dirty_stub_ds()
2823{
2824 // Careful about the code output here, verify_dirty needs to parse it.
2825 #ifdef ARMv5_ONLY
2826 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2827 emit_loadlp((int)copy,2);
2828 emit_loadlp(slen*4,3);
2829 #else
2830 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2831 emit_movw(((u_int)copy)&0x0000FFFF,2);
2832 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2833 emit_movt(((u_int)copy)&0xFFFF0000,2);
2834 emit_movw(slen*4,3);
2835 #endif
2836 emit_movimm(start+1,0);
2837 emit_call((int)&verify_code_ds);
2838}
2839
2840do_cop1stub(int n)
2841{
2842 literal_pool(256);
2843 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
2844 set_jump_target(stubs[n][1],(int)out);
2845 int i=stubs[n][3];
2846 int rs=stubs[n][4];
2847 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2848 int ds=stubs[n][6];
2849 if(!ds) {
2850 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2851 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2852 }
2853 //else {printf("fp exception in delay slot\n");}
2854 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2855 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2856 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2857 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
2858 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
2859}
2860
2861/* TLB */
2862
2863int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
2864{
2865 if(c) {
2866 if((signed int)addr>=(signed int)0xC0000000) {
2867 // address_generation already loaded the const
2868 emit_readword_dualindexedx4(FP,map,map);
2869 }
2870 else
2871 return -1; // No mapping
2872 }
2873 else {
2874 assert(s!=map);
2875 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
2876 emit_addsr12(map,s,map);
2877 // Schedule this while we wait on the load
2878 //if(x) emit_xorimm(s,x,ar);
2879 if(shift>=0) emit_shlimm(s,3,shift);
2880 if(~a) emit_andimm(s,a,ar);
2881 emit_readword_dualindexedx4(FP,map,map);
2882 }
2883 return map;
2884}
2885int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
2886{
2887 if(!c||(signed int)addr>=(signed int)0xC0000000) {
2888 emit_test(map,map);
2889 *jaddr=(int)out;
2890 emit_js(0);
2891 }
2892 return map;
2893}
2894
2895int gen_tlb_addr_r(int ar, int map) {
2896 if(map>=0) {
2897 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
2898 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
2899 }
2900}
2901
2902int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
2903{
2904 if(c) {
2905 if(addr<0x80800000||addr>=0xC0000000) {
2906 // address_generation already loaded the const
2907 emit_readword_dualindexedx4(FP,map,map);
2908 }
2909 else
2910 return -1; // No mapping
2911 }
2912 else {
2913 assert(s!=map);
2914 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
2915 emit_addsr12(map,s,map);
2916 // Schedule this while we wait on the load
2917 //if(x) emit_xorimm(s,x,ar);
2918 emit_readword_dualindexedx4(FP,map,map);
2919 }
2920 return map;
2921}
2922int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
2923{
2924 if(!c||addr<0x80800000||addr>=0xC0000000) {
2925 emit_testimm(map,0x40000000);
2926 *jaddr=(int)out;
2927 emit_jne(0);
2928 }
2929}
2930
2931int gen_tlb_addr_w(int ar, int map) {
2932 if(map>=0) {
2933 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
2934 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
2935 }
2936}
2937
2938// Generate the address of the memory_map entry, relative to dynarec_local
2939generate_map_const(u_int addr,int reg) {
2940 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
2941 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
2942}
2943
2944/* Special assem */
2945
2946void shift_assemble_arm(int i,struct regstat *i_regs)
2947{
2948 if(rt1[i]) {
2949 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2950 {
2951 signed char s,t,shift;
2952 t=get_reg(i_regs->regmap,rt1[i]);
2953 s=get_reg(i_regs->regmap,rs1[i]);
2954 shift=get_reg(i_regs->regmap,rs2[i]);
2955 if(t>=0){
2956 if(rs1[i]==0)
2957 {
2958 emit_zeroreg(t);
2959 }
2960 else if(rs2[i]==0)
2961 {
2962 assert(s>=0);
2963 if(s!=t) emit_mov(s,t);
2964 }
2965 else
2966 {
2967 emit_andimm(shift,31,HOST_TEMPREG);
2968 if(opcode2[i]==4) // SLLV
2969 {
2970 emit_shl(s,HOST_TEMPREG,t);
2971 }
2972 if(opcode2[i]==6) // SRLV
2973 {
2974 emit_shr(s,HOST_TEMPREG,t);
2975 }
2976 if(opcode2[i]==7) // SRAV
2977 {
2978 emit_sar(s,HOST_TEMPREG,t);
2979 }
2980 }
2981 }
2982 } else { // DSLLV/DSRLV/DSRAV
2983 signed char sh,sl,th,tl,shift;
2984 th=get_reg(i_regs->regmap,rt1[i]|64);
2985 tl=get_reg(i_regs->regmap,rt1[i]);
2986 sh=get_reg(i_regs->regmap,rs1[i]|64);
2987 sl=get_reg(i_regs->regmap,rs1[i]);
2988 shift=get_reg(i_regs->regmap,rs2[i]);
2989 if(tl>=0){
2990 if(rs1[i]==0)
2991 {
2992 emit_zeroreg(tl);
2993 if(th>=0) emit_zeroreg(th);
2994 }
2995 else if(rs2[i]==0)
2996 {
2997 assert(sl>=0);
2998 if(sl!=tl) emit_mov(sl,tl);
2999 if(th>=0&&sh!=th) emit_mov(sh,th);
3000 }
3001 else
3002 {
3003 // FIXME: What if shift==tl ?
3004 assert(shift!=tl);
3005 int temp=get_reg(i_regs->regmap,-1);
3006 int real_th=th;
3007 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3008 assert(sl>=0);
3009 assert(sh>=0);
3010 emit_andimm(shift,31,HOST_TEMPREG);
3011 if(opcode2[i]==0x14) // DSLLV
3012 {
3013 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3014 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3015 emit_orrshr(sl,HOST_TEMPREG,th);
3016 emit_andimm(shift,31,HOST_TEMPREG);
3017 emit_testimm(shift,32);
3018 emit_shl(sl,HOST_TEMPREG,tl);
3019 if(th>=0) emit_cmovne_reg(tl,th);
3020 emit_cmovne_imm(0,tl);
3021 }
3022 if(opcode2[i]==0x16) // DSRLV
3023 {
3024 assert(th>=0);
3025 emit_shr(sl,HOST_TEMPREG,tl);
3026 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3027 emit_orrshl(sh,HOST_TEMPREG,tl);
3028 emit_andimm(shift,31,HOST_TEMPREG);
3029 emit_testimm(shift,32);
3030 emit_shr(sh,HOST_TEMPREG,th);
3031 emit_cmovne_reg(th,tl);
3032 if(real_th>=0) emit_cmovne_imm(0,th);
3033 }
3034 if(opcode2[i]==0x17) // DSRAV
3035 {
3036 assert(th>=0);
3037 emit_shr(sl,HOST_TEMPREG,tl);
3038 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3039 if(real_th>=0) {
3040 assert(temp>=0);
3041 emit_sarimm(th,31,temp);
3042 }
3043 emit_orrshl(sh,HOST_TEMPREG,tl);
3044 emit_andimm(shift,31,HOST_TEMPREG);
3045 emit_testimm(shift,32);
3046 emit_sar(sh,HOST_TEMPREG,th);
3047 emit_cmovne_reg(th,tl);
3048 if(real_th>=0) emit_cmovne_reg(temp,th);
3049 }
3050 }
3051 }
3052 }
3053 }
3054}
3055#define shift_assemble shift_assemble_arm
3056
3057void loadlr_assemble_arm(int i,struct regstat *i_regs)
3058{
3059 int s,th,tl,temp,temp2,addr,map=-1;
3060 int offset;
3061 int jaddr=0;
3062 int memtarget,c=0;
3063 u_int hr,reglist=0;
3064 th=get_reg(i_regs->regmap,rt1[i]|64);
3065 tl=get_reg(i_regs->regmap,rt1[i]);
3066 s=get_reg(i_regs->regmap,rs1[i]);
3067 temp=get_reg(i_regs->regmap,-1);
3068 temp2=get_reg(i_regs->regmap,FTEMP);
3069 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3070 assert(addr<0);
3071 offset=imm[i];
3072 for(hr=0;hr<HOST_REGS;hr++) {
3073 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3074 }
3075 reglist|=1<<temp;
3076 if(offset||s<0||c) addr=temp2;
3077 else addr=s;
3078 if(s>=0) {
3079 c=(i_regs->wasconst>>s)&1;
3080 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
3081 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3082 }
3083 if(tl>=0) {
3084 //assert(tl>=0);
3085 //assert(rt1[i]);
3086 if(!using_tlb) {
3087 if(!c) {
3088 emit_shlimm(addr,3,temp);
3089 if (opcode[i]==0x22||opcode[i]==0x26) {
3090 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3091 }else{
3092 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3093 }
3094 emit_cmpimm(addr,0x800000);
3095 jaddr=(int)out;
3096 emit_jno(0);
3097 }
3098 else {
3099 if (opcode[i]==0x22||opcode[i]==0x26) {
3100 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3101 }else{
3102 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3103 }
3104 }
3105 }else{ // using tlb
3106 int a;
3107 if(c) {
3108 a=-1;
3109 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3110 a=0xFFFFFFFC; // LWL/LWR
3111 }else{
3112 a=0xFFFFFFF8; // LDL/LDR
3113 }
3114 map=get_reg(i_regs->regmap,TLREG);
3115 assert(map>=0);
3116 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3117 if(c) {
3118 if (opcode[i]==0x22||opcode[i]==0x26) {
3119 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3120 }else{
3121 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3122 }
3123 }
3124 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3125 }
3126 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3127 if(!c||memtarget) {
3128 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3129 emit_readword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2);
3130 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3131 }
3132 else
3133 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3134 emit_andimm(temp,24,temp);
3135 if (opcode[i]==0x26) emit_xorimm(temp,24,temp); // LWR
3136 emit_movimm(-1,HOST_TEMPREG);
3137 if (opcode[i]==0x26) {
3138 emit_shr(temp2,temp,temp2);
3139 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3140 }else{
3141 emit_shl(temp2,temp,temp2);
3142 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3143 }
3144 emit_or(temp2,tl,tl);
3145 //emit_storereg(rt1[i],tl); // DEBUG
3146 }
3147 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3148 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3149 if(!c||memtarget) {
3150 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3151 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3152 emit_readdword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2h,temp2);
3153 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3154 }
3155 else
3156 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3157 emit_testimm(temp,32);
3158 emit_andimm(temp,24,temp);
3159 if (opcode[i]==0x1A) { // LDL
3160 emit_rsbimm(temp,32,HOST_TEMPREG);
3161 emit_shl(temp2h,temp,temp2h);
3162 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3163 emit_movimm(-1,HOST_TEMPREG);
3164 emit_shl(temp2,temp,temp2);
3165 emit_cmove_reg(temp2h,th);
3166 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3167 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3168 emit_orreq(temp2,tl,tl);
3169 emit_orrne(temp2,th,th);
3170 }
3171 if (opcode[i]==0x1B) { // LDR
3172 emit_xorimm(temp,24,temp);
3173 emit_rsbimm(temp,32,HOST_TEMPREG);
3174 emit_shr(temp2,temp,temp2);
3175 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3176 emit_movimm(-1,HOST_TEMPREG);
3177 emit_shr(temp2h,temp,temp2h);
3178 emit_cmovne_reg(temp2,tl);
3179 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3180 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3181 emit_orrne(temp2h,th,th);
3182 emit_orreq(temp2h,tl,tl);
3183 }
3184 }
3185 }
3186}
3187#define loadlr_assemble loadlr_assemble_arm
3188
3189void cop0_assemble(int i,struct regstat *i_regs)
3190{
3191 if(opcode2[i]==0) // MFC0
3192 {
3193 signed char t=get_reg(i_regs->regmap,rt1[i]);
3194 char copr=(source[i]>>11)&0x1f;
3195 //assert(t>=0); // Why does this happen? OOT is weird
3196 if(t>=0) {
3197 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3198 emit_movimm((source[i]>>11)&0x1f,1);
3199 emit_writeword(0,(int)&PC);
3200 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3201 if(copr==9) {
3202 emit_readword((int)&last_count,ECX);
3203 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3204 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3205 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3206 emit_writeword(HOST_CCREG,(int)&Count);
3207 }
3208 emit_call((int)MFC0);
3209 emit_readword((int)&readmem_dword,t);
3210 }
3211 }
3212 else if(opcode2[i]==4) // MTC0
3213 {
3214 signed char s=get_reg(i_regs->regmap,rs1[i]);
3215 char copr=(source[i]>>11)&0x1f;
3216 assert(s>=0);
3217 emit_writeword(s,(int)&readmem_dword);
3218 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3219 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3220 emit_movimm((source[i]>>11)&0x1f,1);
3221 emit_writeword(0,(int)&PC);
3222 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3223 if(copr==9||copr==11||copr==12) {
3224 emit_readword((int)&last_count,ECX);
3225 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3226 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3227 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3228 emit_writeword(HOST_CCREG,(int)&Count);
3229 }
3230 // What a mess. The status register (12) can enable interrupts,
3231 // so needs a special case to handle a pending interrupt.
3232 // The interrupt must be taken immediately, because a subsequent
3233 // instruction might disable interrupts again.
3234 if(copr==12&&!is_delayslot) {
3235 emit_movimm(start+i*4+4,0);
3236 emit_movimm(0,1);
3237 emit_writeword(0,(int)&pcaddr);
3238 emit_writeword(1,(int)&pending_exception);
3239 }
3240 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3241 //else
3242 emit_call((int)MTC0);
3243 if(copr==9||copr==11||copr==12) {
3244 emit_readword((int)&Count,HOST_CCREG);
3245 emit_readword((int)&next_interupt,ECX);
3246 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3247 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3248 emit_writeword(ECX,(int)&last_count);
3249 emit_storereg(CCREG,HOST_CCREG);
3250 }
3251 if(copr==12) {
3252 assert(!is_delayslot);
3253 emit_readword((int)&pending_exception,14);
3254 }
3255 emit_loadreg(rs1[i],s);
3256 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3257 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3258 if(copr==12) {
3259 emit_test(14,14);
3260 emit_jne((int)&do_interrupt);
3261 }
3262 cop1_usable=0;
3263 }
3264 else
3265 {
3266 assert(opcode2[i]==0x10);
3267 if((source[i]&0x3f)==0x01) // TLBR
3268 emit_call((int)TLBR);
3269 if((source[i]&0x3f)==0x02) // TLBWI
3270 emit_call((int)TLBWI_new);
3271 if((source[i]&0x3f)==0x06) { // TLBWR
3272 // The TLB entry written by TLBWR is dependent on the count,
3273 // so update the cycle count
3274 emit_readword((int)&last_count,ECX);
3275 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3276 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3277 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3278 emit_writeword(HOST_CCREG,(int)&Count);
3279 emit_call((int)TLBWR_new);
3280 }
3281 if((source[i]&0x3f)==0x08) // TLBP
3282 emit_call((int)TLBP);
3283 if((source[i]&0x3f)==0x18) // ERET
3284 {
3285 int count=ccadj[i];
3286 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3287 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3288 emit_jmp((int)jump_eret);
3289 }
3290 }
3291}
3292
3293void cop1_assemble(int i,struct regstat *i_regs)
3294{
3295 // Check cop1 unusable
3296 if(!cop1_usable) {
3297 signed char rs=get_reg(i_regs->regmap,CSREG);
3298 assert(rs>=0);
3299 emit_testimm(rs,0x20000000);
3300 int jaddr=(int)out;
3301 emit_jeq(0);
3302 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3303 cop1_usable=1;
3304 }
3305 if (opcode2[i]==0) { // MFC1
3306 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3307 if(tl>=0) {
3308 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3309 emit_readword_indexed(0,tl,tl);
3310 }
3311 }
3312 else if (opcode2[i]==1) { // DMFC1
3313 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3314 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3315 if(tl>=0) {
3316 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3317 if(th>=0) emit_readword_indexed(4,tl,th);
3318 emit_readword_indexed(0,tl,tl);
3319 }
3320 }
3321 else if (opcode2[i]==4) { // MTC1
3322 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3323 signed char temp=get_reg(i_regs->regmap,-1);
3324 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3325 emit_writeword_indexed(sl,0,temp);
3326 }
3327 else if (opcode2[i]==5) { // DMTC1
3328 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3329 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3330 signed char temp=get_reg(i_regs->regmap,-1);
3331 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3332 emit_writeword_indexed(sh,4,temp);
3333 emit_writeword_indexed(sl,0,temp);
3334 }
3335 else if (opcode2[i]==2) // CFC1
3336 {
3337 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3338 if(tl>=0) {
3339 u_int copr=(source[i]>>11)&0x1f;
3340 if(copr==0) emit_readword((int)&FCR0,tl);
3341 if(copr==31) emit_readword((int)&FCR31,tl);
3342 }
3343 }
3344 else if (opcode2[i]==6) // CTC1
3345 {
3346 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3347 u_int copr=(source[i]>>11)&0x1f;
3348 assert(sl>=0);
3349 if(copr==31)
3350 {
3351 emit_writeword(sl,(int)&FCR31);
3352 // Set the rounding mode
3353 //FIXME
3354 //char temp=get_reg(i_regs->regmap,-1);
3355 //emit_andimm(sl,3,temp);
3356 //emit_fldcw_indexed((int)&rounding_modes,temp);
3357 }
3358 }
3359}
3360
3361void fconv_assemble_arm(int i,struct regstat *i_regs)
3362{
3363 signed char temp=get_reg(i_regs->regmap,-1);
3364 assert(temp>=0);
3365 // Check cop1 unusable
3366 if(!cop1_usable) {
3367 signed char rs=get_reg(i_regs->regmap,CSREG);
3368 assert(rs>=0);
3369 emit_testimm(rs,0x20000000);
3370 int jaddr=(int)out;
3371 emit_jeq(0);
3372 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3373 cop1_usable=1;
3374 }
3375
3376 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3377 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3378 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3379 emit_flds(temp,15);
3380 emit_ftosizs(15,15); // float->int, truncate
3381 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3382 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3383 emit_fsts(15,temp);
3384 return;
3385 }
3386 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3387 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3388 emit_vldr(temp,7);
3389 emit_ftosizd(7,13); // double->int, truncate
3390 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3391 emit_fsts(13,temp);
3392 return;
3393 }
3394
3395 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3396 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3397 emit_flds(temp,13);
3398 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3399 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3400 emit_fsitos(13,15);
3401 emit_fsts(15,temp);
3402 return;
3403 }
3404 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3405 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3406 emit_flds(temp,13);
3407 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3408 emit_fsitod(13,7);
3409 emit_vstr(7,temp);
3410 return;
3411 }
3412
3413 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3414 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3415 emit_flds(temp,13);
3416 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3417 emit_fcvtds(13,7);
3418 emit_vstr(7,temp);
3419 return;
3420 }
3421 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3422 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3423 emit_vldr(temp,7);
3424 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3425 emit_fcvtsd(7,13);
3426 emit_fsts(13,temp);
3427 return;
3428 }
3429 #endif
3430
3431 // C emulation code
3432
3433 u_int hr,reglist=0;
3434 for(hr=0;hr<HOST_REGS;hr++) {
3435 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3436 }
3437 save_regs(reglist);
3438
3439 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3440 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3441 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3442 emit_call((int)cvt_s_w);
3443 }
3444 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3445 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3446 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3447 emit_call((int)cvt_d_w);
3448 }
3449 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3450 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3451 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3452 emit_call((int)cvt_s_l);
3453 }
3454 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3455 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3456 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3457 emit_call((int)cvt_d_l);
3458 }
3459
3460 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3461 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3462 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3463 emit_call((int)cvt_d_s);
3464 }
3465 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3466 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3467 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3468 emit_call((int)cvt_w_s);
3469 }
3470 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3471 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3472 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3473 emit_call((int)cvt_l_s);
3474 }
3475
3476 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3477 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3478 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3479 emit_call((int)cvt_s_d);
3480 }
3481 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3482 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3483 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3484 emit_call((int)cvt_w_d);
3485 }
3486 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3487 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3488 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3489 emit_call((int)cvt_l_d);
3490 }
3491
3492 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3493 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3494 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3495 emit_call((int)round_l_s);
3496 }
3497 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3498 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3499 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3500 emit_call((int)trunc_l_s);
3501 }
3502 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3503 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3504 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3505 emit_call((int)ceil_l_s);
3506 }
3507 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3508 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3509 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3510 emit_call((int)floor_l_s);
3511 }
3512 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3513 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3514 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3515 emit_call((int)round_w_s);
3516 }
3517 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3518 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3519 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3520 emit_call((int)trunc_w_s);
3521 }
3522 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3523 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3524 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3525 emit_call((int)ceil_w_s);
3526 }
3527 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3528 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3529 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3530 emit_call((int)floor_w_s);
3531 }
3532
3533 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
3534 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3535 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3536 emit_call((int)round_l_d);
3537 }
3538 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
3539 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3540 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3541 emit_call((int)trunc_l_d);
3542 }
3543 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
3544 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3545 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3546 emit_call((int)ceil_l_d);
3547 }
3548 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
3549 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3550 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3551 emit_call((int)floor_l_d);
3552 }
3553 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
3554 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3555 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3556 emit_call((int)round_w_d);
3557 }
3558 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
3559 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3560 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3561 emit_call((int)trunc_w_d);
3562 }
3563 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
3564 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3565 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3566 emit_call((int)ceil_w_d);
3567 }
3568 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
3569 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3570 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3571 emit_call((int)floor_w_d);
3572 }
3573
3574 restore_regs(reglist);
3575}
3576#define fconv_assemble fconv_assemble_arm
3577
3578void fcomp_assemble(int i,struct regstat *i_regs)
3579