some cleanup to reduce confusion
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
29#include "arm_features.h"
30
31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33char *translation_cache;
34#else
35char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
44#define unused __attribute__((unused))
45
46extern int cycle_count;
47extern int last_count;
48extern int pcaddr;
49extern int pending_exception;
50extern int branch_target;
51extern uint64_t readmem_dword;
52extern void *dynarec_local;
53extern u_int mini_ht[32][2];
54
55void indirect_jump_indexed();
56void indirect_jump();
57void do_interrupt();
58void jump_vaddr_r0();
59void jump_vaddr_r1();
60void jump_vaddr_r2();
61void jump_vaddr_r3();
62void jump_vaddr_r4();
63void jump_vaddr_r5();
64void jump_vaddr_r6();
65void jump_vaddr_r7();
66void jump_vaddr_r8();
67void jump_vaddr_r9();
68void jump_vaddr_r10();
69void jump_vaddr_r12();
70
71const u_int jump_vaddr_reg[16] = {
72 (int)jump_vaddr_r0,
73 (int)jump_vaddr_r1,
74 (int)jump_vaddr_r2,
75 (int)jump_vaddr_r3,
76 (int)jump_vaddr_r4,
77 (int)jump_vaddr_r5,
78 (int)jump_vaddr_r6,
79 (int)jump_vaddr_r7,
80 (int)jump_vaddr_r8,
81 (int)jump_vaddr_r9,
82 (int)jump_vaddr_r10,
83 0,
84 (int)jump_vaddr_r12,
85 0,
86 0,
87 0};
88
89void invalidate_addr_r0();
90void invalidate_addr_r1();
91void invalidate_addr_r2();
92void invalidate_addr_r3();
93void invalidate_addr_r4();
94void invalidate_addr_r5();
95void invalidate_addr_r6();
96void invalidate_addr_r7();
97void invalidate_addr_r8();
98void invalidate_addr_r9();
99void invalidate_addr_r10();
100void invalidate_addr_r12();
101
102const u_int invalidate_addr_reg[16] = {
103 (int)invalidate_addr_r0,
104 (int)invalidate_addr_r1,
105 (int)invalidate_addr_r2,
106 (int)invalidate_addr_r3,
107 (int)invalidate_addr_r4,
108 (int)invalidate_addr_r5,
109 (int)invalidate_addr_r6,
110 (int)invalidate_addr_r7,
111 (int)invalidate_addr_r8,
112 (int)invalidate_addr_r9,
113 (int)invalidate_addr_r10,
114 0,
115 (int)invalidate_addr_r12,
116 0,
117 0,
118 0};
119
120static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
121
122/* Linker */
123
124static void set_jump_target(int addr,u_int target)
125{
126 u_char *ptr=(u_char *)addr;
127 u_int *ptr2=(u_int *)ptr;
128 if(ptr[3]==0xe2) {
129 assert((target-(u_int)ptr2-8)<1024);
130 assert((addr&3)==0);
131 assert((target&3)==0);
132 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
133 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
134 }
135 else if(ptr[3]==0x72) {
136 // generated by emit_jno_unlikely
137 if((target-(u_int)ptr2-8)<1024) {
138 assert((addr&3)==0);
139 assert((target&3)==0);
140 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
141 }
142 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
143 assert((addr&3)==0);
144 assert((target&3)==0);
145 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
146 }
147 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
148 }
149 else {
150 assert((ptr[3]&0x0e)==0xa);
151 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
152 }
153}
154
155// This optionally copies the instruction from the target of the branch into
156// the space before the branch. Works, but the difference in speed is
157// usually insignificant.
158#if 0
159static void set_jump_target_fillslot(int addr,u_int target,int copy)
160{
161 u_char *ptr=(u_char *)addr;
162 u_int *ptr2=(u_int *)ptr;
163 assert(!copy||ptr2[-1]==0xe28dd000);
164 if(ptr[3]==0xe2) {
165 assert(!copy);
166 assert((target-(u_int)ptr2-8)<4096);
167 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
168 }
169 else {
170 assert((ptr[3]&0x0e)==0xa);
171 u_int target_insn=*(u_int *)target;
172 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
173 copy=0;
174 }
175 if((target_insn&0x0c100000)==0x04100000) { // Load
176 copy=0;
177 }
178 if(target_insn&0x08000000) {
179 copy=0;
180 }
181 if(copy) {
182 ptr2[-1]=target_insn;
183 target+=4;
184 }
185 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
186 }
187}
188#endif
189
190/* Literal pool */
191static void add_literal(int addr,int val)
192{
193 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
194 literals[literalcount][0]=addr;
195 literals[literalcount][1]=val;
196 literalcount++;
197}
198
199// from a pointer to external jump stub (which was produced by emit_extjump2)
200// find where the jumping insn is
201static void *find_extjump_insn(void *stub)
202{
203 int *ptr=(int *)(stub+4);
204 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
205 u_int offset=*ptr&0xfff;
206 void **l_ptr=(void *)ptr+offset+8;
207 return *l_ptr;
208}
209
210// find where external branch is liked to using addr of it's stub:
211// get address that insn one after stub loads (dyna_linker arg1),
212// treat it as a pointer to branch insn,
213// return addr where that branch jumps to
214static int get_pointer(void *stub)
215{
216 //printf("get_pointer(%x)\n",(int)stub);
217 int *i_ptr=find_extjump_insn(stub);
218 assert((*i_ptr&0x0f000000)==0x0a000000);
219 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
220}
221
222// Find the "clean" entry point from a "dirty" entry point
223// by skipping past the call to verify_code
224static u_int get_clean_addr(int addr)
225{
226 int *ptr=(int *)addr;
227 #ifndef HAVE_ARMV7
228 ptr+=4;
229 #else
230 ptr+=6;
231 #endif
232 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
233 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
234 ptr++;
235 if((*ptr&0xFF000000)==0xea000000) {
236 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
237 }
238 return (u_int)ptr;
239}
240
241static int verify_dirty(u_int *ptr)
242{
243 #ifndef HAVE_ARMV7
244 u_int offset;
245 // get from literal pool
246 assert((*ptr&0xFFFF0000)==0xe59f0000);
247 offset=*ptr&0xfff;
248 u_int source=*(u_int*)((void *)ptr+offset+8);
249 ptr++;
250 assert((*ptr&0xFFFF0000)==0xe59f0000);
251 offset=*ptr&0xfff;
252 u_int copy=*(u_int*)((void *)ptr+offset+8);
253 ptr++;
254 assert((*ptr&0xFFFF0000)==0xe59f0000);
255 offset=*ptr&0xfff;
256 u_int len=*(u_int*)((void *)ptr+offset+8);
257 ptr++;
258 ptr++;
259 #else
260 // ARMv7 movw/movt
261 assert((*ptr&0xFFF00000)==0xe3000000);
262 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
263 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
264 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
265 ptr+=6;
266 #endif
267 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
268 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
269 //printf("verify_dirty: %x %x %x\n",source,copy,len);
270 return !memcmp((void *)source,(void *)copy,len);
271}
272
273// This doesn't necessarily find all clean entry points, just
274// guarantees that it's not dirty
275static int isclean(int addr)
276{
277 #ifndef HAVE_ARMV7
278 u_int *ptr=((u_int *)addr)+4;
279 #else
280 u_int *ptr=((u_int *)addr)+6;
281 #endif
282 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
283 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
284 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
285 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
286 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
287 return 1;
288}
289
290// get source that block at addr was compiled from (host pointers)
291static void get_bounds(int addr,u_int *start,u_int *end)
292{
293 u_int *ptr=(u_int *)addr;
294 #ifndef HAVE_ARMV7
295 u_int offset;
296 // get from literal pool
297 assert((*ptr&0xFFFF0000)==0xe59f0000);
298 offset=*ptr&0xfff;
299 u_int source=*(u_int*)((void *)ptr+offset+8);
300 ptr++;
301 //assert((*ptr&0xFFFF0000)==0xe59f0000);
302 //offset=*ptr&0xfff;
303 //u_int copy=*(u_int*)((void *)ptr+offset+8);
304 ptr++;
305 assert((*ptr&0xFFFF0000)==0xe59f0000);
306 offset=*ptr&0xfff;
307 u_int len=*(u_int*)((void *)ptr+offset+8);
308 ptr++;
309 ptr++;
310 #else
311 // ARMv7 movw/movt
312 assert((*ptr&0xFFF00000)==0xe3000000);
313 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
314 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
315 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
316 ptr+=6;
317 #endif
318 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
319 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
320 *start=source;
321 *end=source+len;
322}
323
324/* Register allocation */
325
326// Note: registers are allocated clean (unmodified state)
327// if you intend to modify the register, you must call dirty_reg().
328static void alloc_reg(struct regstat *cur,int i,signed char reg)
329{
330 int r,hr;
331 int preferred_reg = (reg&7);
332 if(reg==CCREG) preferred_reg=HOST_CCREG;
333 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
334
335 // Don't allocate unused registers
336 if((cur->u>>reg)&1) return;
337
338 // see if it's already allocated
339 for(hr=0;hr<HOST_REGS;hr++)
340 {
341 if(cur->regmap[hr]==reg) return;
342 }
343
344 // Keep the same mapping if the register was already allocated in a loop
345 preferred_reg = loop_reg(i,reg,preferred_reg);
346
347 // Try to allocate the preferred register
348 if(cur->regmap[preferred_reg]==-1) {
349 cur->regmap[preferred_reg]=reg;
350 cur->dirty&=~(1<<preferred_reg);
351 cur->isconst&=~(1<<preferred_reg);
352 return;
353 }
354 r=cur->regmap[preferred_reg];
355 if(r<64&&((cur->u>>r)&1)) {
356 cur->regmap[preferred_reg]=reg;
357 cur->dirty&=~(1<<preferred_reg);
358 cur->isconst&=~(1<<preferred_reg);
359 return;
360 }
361 if(r>=64&&((cur->uu>>(r&63))&1)) {
362 cur->regmap[preferred_reg]=reg;
363 cur->dirty&=~(1<<preferred_reg);
364 cur->isconst&=~(1<<preferred_reg);
365 return;
366 }
367
368 // Clear any unneeded registers
369 // We try to keep the mapping consistent, if possible, because it
370 // makes branches easier (especially loops). So we try to allocate
371 // first (see above) before removing old mappings. If this is not
372 // possible then go ahead and clear out the registers that are no
373 // longer needed.
374 for(hr=0;hr<HOST_REGS;hr++)
375 {
376 r=cur->regmap[hr];
377 if(r>=0) {
378 if(r<64) {
379 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
380 }
381 else
382 {
383 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
384 }
385 }
386 }
387 // Try to allocate any available register, but prefer
388 // registers that have not been used recently.
389 if(i>0) {
390 for(hr=0;hr<HOST_REGS;hr++) {
391 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
392 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
393 cur->regmap[hr]=reg;
394 cur->dirty&=~(1<<hr);
395 cur->isconst&=~(1<<hr);
396 return;
397 }
398 }
399 }
400 }
401 // Try to allocate any available register
402 for(hr=0;hr<HOST_REGS;hr++) {
403 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
404 cur->regmap[hr]=reg;
405 cur->dirty&=~(1<<hr);
406 cur->isconst&=~(1<<hr);
407 return;
408 }
409 }
410
411 // Ok, now we have to evict someone
412 // Pick a register we hopefully won't need soon
413 u_char hsn[MAXREG+1];
414 memset(hsn,10,sizeof(hsn));
415 int j;
416 lsn(hsn,i,&preferred_reg);
417 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
418 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
419 if(i>0) {
420 // Don't evict the cycle count at entry points, otherwise the entry
421 // stub will have to write it.
422 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
423 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
424 for(j=10;j>=3;j--)
425 {
426 // Alloc preferred register if available
427 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
428 for(hr=0;hr<HOST_REGS;hr++) {
429 // Evict both parts of a 64-bit register
430 if((cur->regmap[hr]&63)==r) {
431 cur->regmap[hr]=-1;
432 cur->dirty&=~(1<<hr);
433 cur->isconst&=~(1<<hr);
434 }
435 }
436 cur->regmap[preferred_reg]=reg;
437 return;
438 }
439 for(r=1;r<=MAXREG;r++)
440 {
441 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
442 for(hr=0;hr<HOST_REGS;hr++) {
443 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
444 if(cur->regmap[hr]==r+64) {
445 cur->regmap[hr]=reg;
446 cur->dirty&=~(1<<hr);
447 cur->isconst&=~(1<<hr);
448 return;
449 }
450 }
451 }
452 for(hr=0;hr<HOST_REGS;hr++) {
453 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
454 if(cur->regmap[hr]==r) {
455 cur->regmap[hr]=reg;
456 cur->dirty&=~(1<<hr);
457 cur->isconst&=~(1<<hr);
458 return;
459 }
460 }
461 }
462 }
463 }
464 }
465 }
466 for(j=10;j>=0;j--)
467 {
468 for(r=1;r<=MAXREG;r++)
469 {
470 if(hsn[r]==j) {
471 for(hr=0;hr<HOST_REGS;hr++) {
472 if(cur->regmap[hr]==r+64) {
473 cur->regmap[hr]=reg;
474 cur->dirty&=~(1<<hr);
475 cur->isconst&=~(1<<hr);
476 return;
477 }
478 }
479 for(hr=0;hr<HOST_REGS;hr++) {
480 if(cur->regmap[hr]==r) {
481 cur->regmap[hr]=reg;
482 cur->dirty&=~(1<<hr);
483 cur->isconst&=~(1<<hr);
484 return;
485 }
486 }
487 }
488 }
489 }
490 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
491}
492
493static void alloc_reg64(struct regstat *cur,int i,signed char reg)
494{
495 int preferred_reg = 8+(reg&1);
496 int r,hr;
497
498 // allocate the lower 32 bits
499 alloc_reg(cur,i,reg);
500
501 // Don't allocate unused registers
502 if((cur->uu>>reg)&1) return;
503
504 // see if the upper half is already allocated
505 for(hr=0;hr<HOST_REGS;hr++)
506 {
507 if(cur->regmap[hr]==reg+64) return;
508 }
509
510 // Keep the same mapping if the register was already allocated in a loop
511 preferred_reg = loop_reg(i,reg,preferred_reg);
512
513 // Try to allocate the preferred register
514 if(cur->regmap[preferred_reg]==-1) {
515 cur->regmap[preferred_reg]=reg|64;
516 cur->dirty&=~(1<<preferred_reg);
517 cur->isconst&=~(1<<preferred_reg);
518 return;
519 }
520 r=cur->regmap[preferred_reg];
521 if(r<64&&((cur->u>>r)&1)) {
522 cur->regmap[preferred_reg]=reg|64;
523 cur->dirty&=~(1<<preferred_reg);
524 cur->isconst&=~(1<<preferred_reg);
525 return;
526 }
527 if(r>=64&&((cur->uu>>(r&63))&1)) {
528 cur->regmap[preferred_reg]=reg|64;
529 cur->dirty&=~(1<<preferred_reg);
530 cur->isconst&=~(1<<preferred_reg);
531 return;
532 }
533
534 // Clear any unneeded registers
535 // We try to keep the mapping consistent, if possible, because it
536 // makes branches easier (especially loops). So we try to allocate
537 // first (see above) before removing old mappings. If this is not
538 // possible then go ahead and clear out the registers that are no
539 // longer needed.
540 for(hr=HOST_REGS-1;hr>=0;hr--)
541 {
542 r=cur->regmap[hr];
543 if(r>=0) {
544 if(r<64) {
545 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
546 }
547 else
548 {
549 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
550 }
551 }
552 }
553 // Try to allocate any available register, but prefer
554 // registers that have not been used recently.
555 if(i>0) {
556 for(hr=0;hr<HOST_REGS;hr++) {
557 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
558 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
559 cur->regmap[hr]=reg|64;
560 cur->dirty&=~(1<<hr);
561 cur->isconst&=~(1<<hr);
562 return;
563 }
564 }
565 }
566 }
567 // Try to allocate any available register
568 for(hr=0;hr<HOST_REGS;hr++) {
569 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
570 cur->regmap[hr]=reg|64;
571 cur->dirty&=~(1<<hr);
572 cur->isconst&=~(1<<hr);
573 return;
574 }
575 }
576
577 // Ok, now we have to evict someone
578 // Pick a register we hopefully won't need soon
579 u_char hsn[MAXREG+1];
580 memset(hsn,10,sizeof(hsn));
581 int j;
582 lsn(hsn,i,&preferred_reg);
583 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
584 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
585 if(i>0) {
586 // Don't evict the cycle count at entry points, otherwise the entry
587 // stub will have to write it.
588 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
589 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
590 for(j=10;j>=3;j--)
591 {
592 // Alloc preferred register if available
593 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
594 for(hr=0;hr<HOST_REGS;hr++) {
595 // Evict both parts of a 64-bit register
596 if((cur->regmap[hr]&63)==r) {
597 cur->regmap[hr]=-1;
598 cur->dirty&=~(1<<hr);
599 cur->isconst&=~(1<<hr);
600 }
601 }
602 cur->regmap[preferred_reg]=reg|64;
603 return;
604 }
605 for(r=1;r<=MAXREG;r++)
606 {
607 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
608 for(hr=0;hr<HOST_REGS;hr++) {
609 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
610 if(cur->regmap[hr]==r+64) {
611 cur->regmap[hr]=reg|64;
612 cur->dirty&=~(1<<hr);
613 cur->isconst&=~(1<<hr);
614 return;
615 }
616 }
617 }
618 for(hr=0;hr<HOST_REGS;hr++) {
619 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
620 if(cur->regmap[hr]==r) {
621 cur->regmap[hr]=reg|64;
622 cur->dirty&=~(1<<hr);
623 cur->isconst&=~(1<<hr);
624 return;
625 }
626 }
627 }
628 }
629 }
630 }
631 }
632 for(j=10;j>=0;j--)
633 {
634 for(r=1;r<=MAXREG;r++)
635 {
636 if(hsn[r]==j) {
637 for(hr=0;hr<HOST_REGS;hr++) {
638 if(cur->regmap[hr]==r+64) {
639 cur->regmap[hr]=reg|64;
640 cur->dirty&=~(1<<hr);
641 cur->isconst&=~(1<<hr);
642 return;
643 }
644 }
645 for(hr=0;hr<HOST_REGS;hr++) {
646 if(cur->regmap[hr]==r) {
647 cur->regmap[hr]=reg|64;
648 cur->dirty&=~(1<<hr);
649 cur->isconst&=~(1<<hr);
650 return;
651 }
652 }
653 }
654 }
655 }
656 SysPrintf("This shouldn't happen");exit(1);
657}
658
659// Allocate a temporary register. This is done without regard to
660// dirty status or whether the register we request is on the unneeded list
661// Note: This will only allocate one register, even if called multiple times
662static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
663{
664 int r,hr;
665 int preferred_reg = -1;
666
667 // see if it's already allocated
668 for(hr=0;hr<HOST_REGS;hr++)
669 {
670 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
671 }
672
673 // Try to allocate any available register
674 for(hr=HOST_REGS-1;hr>=0;hr--) {
675 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
676 cur->regmap[hr]=reg;
677 cur->dirty&=~(1<<hr);
678 cur->isconst&=~(1<<hr);
679 return;
680 }
681 }
682
683 // Find an unneeded register
684 for(hr=HOST_REGS-1;hr>=0;hr--)
685 {
686 r=cur->regmap[hr];
687 if(r>=0) {
688 if(r<64) {
689 if((cur->u>>r)&1) {
690 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
691 cur->regmap[hr]=reg;
692 cur->dirty&=~(1<<hr);
693 cur->isconst&=~(1<<hr);
694 return;
695 }
696 }
697 }
698 else
699 {
700 if((cur->uu>>(r&63))&1) {
701 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
702 cur->regmap[hr]=reg;
703 cur->dirty&=~(1<<hr);
704 cur->isconst&=~(1<<hr);
705 return;
706 }
707 }
708 }
709 }
710 }
711
712 // Ok, now we have to evict someone
713 // Pick a register we hopefully won't need soon
714 // TODO: we might want to follow unconditional jumps here
715 // TODO: get rid of dupe code and make this into a function
716 u_char hsn[MAXREG+1];
717 memset(hsn,10,sizeof(hsn));
718 int j;
719 lsn(hsn,i,&preferred_reg);
720 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
721 if(i>0) {
722 // Don't evict the cycle count at entry points, otherwise the entry
723 // stub will have to write it.
724 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
725 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
726 for(j=10;j>=3;j--)
727 {
728 for(r=1;r<=MAXREG;r++)
729 {
730 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
731 for(hr=0;hr<HOST_REGS;hr++) {
732 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
733 if(cur->regmap[hr]==r+64) {
734 cur->regmap[hr]=reg;
735 cur->dirty&=~(1<<hr);
736 cur->isconst&=~(1<<hr);
737 return;
738 }
739 }
740 }
741 for(hr=0;hr<HOST_REGS;hr++) {
742 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
743 if(cur->regmap[hr]==r) {
744 cur->regmap[hr]=reg;
745 cur->dirty&=~(1<<hr);
746 cur->isconst&=~(1<<hr);
747 return;
748 }
749 }
750 }
751 }
752 }
753 }
754 }
755 for(j=10;j>=0;j--)
756 {
757 for(r=1;r<=MAXREG;r++)
758 {
759 if(hsn[r]==j) {
760 for(hr=0;hr<HOST_REGS;hr++) {
761 if(cur->regmap[hr]==r+64) {
762 cur->regmap[hr]=reg;
763 cur->dirty&=~(1<<hr);
764 cur->isconst&=~(1<<hr);
765 return;
766 }
767 }
768 for(hr=0;hr<HOST_REGS;hr++) {
769 if(cur->regmap[hr]==r) {
770 cur->regmap[hr]=reg;
771 cur->dirty&=~(1<<hr);
772 cur->isconst&=~(1<<hr);
773 return;
774 }
775 }
776 }
777 }
778 }
779 SysPrintf("This shouldn't happen");exit(1);
780}
781
782// Allocate a specific ARM register.
783static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
784{
785 int n;
786 int dirty=0;
787
788 // see if it's already allocated (and dealloc it)
789 for(n=0;n<HOST_REGS;n++)
790 {
791 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
792 dirty=(cur->dirty>>n)&1;
793 cur->regmap[n]=-1;
794 }
795 }
796
797 cur->regmap[hr]=reg;
798 cur->dirty&=~(1<<hr);
799 cur->dirty|=dirty<<hr;
800 cur->isconst&=~(1<<hr);
801}
802
803// Alloc cycle count into dedicated register
804static void alloc_cc(struct regstat *cur,int i)
805{
806 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
807}
808
809/* Special alloc */
810
811
812/* Assembler */
813
814static unused char regname[16][4] = {
815 "r0",
816 "r1",
817 "r2",
818 "r3",
819 "r4",
820 "r5",
821 "r6",
822 "r7",
823 "r8",
824 "r9",
825 "r10",
826 "fp",
827 "r12",
828 "sp",
829 "lr",
830 "pc"};
831
832static void output_w32(u_int word)
833{
834 *((u_int *)out)=word;
835 out+=4;
836}
837
838static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
839{
840 assert(rd<16);
841 assert(rn<16);
842 assert(rm<16);
843 return((rn<<16)|(rd<<12)|rm);
844}
845
846static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
847{
848 assert(rd<16);
849 assert(rn<16);
850 assert(imm<256);
851 assert((shift&1)==0);
852 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
853}
854
855static u_int genimm(u_int imm,u_int *encoded)
856{
857 *encoded=0;
858 if(imm==0) return 1;
859 int i=32;
860 while(i>0)
861 {
862 if(imm<256) {
863 *encoded=((i&30)<<7)|imm;
864 return 1;
865 }
866 imm=(imm>>2)|(imm<<30);i-=2;
867 }
868 return 0;
869}
870
871static void genimm_checked(u_int imm,u_int *encoded)
872{
873 u_int ret=genimm(imm,encoded);
874 assert(ret);
875 (void)ret;
876}
877
878static u_int genjmp(u_int addr)
879{
880 int offset=addr-(int)out-8;
881 if(offset<-33554432||offset>=33554432) {
882 if (addr>2) {
883 SysPrintf("genjmp: out of range: %08x\n", offset);
884 exit(1);
885 }
886 return 0;
887 }
888 return ((u_int)offset>>2)&0xffffff;
889}
890
891static void emit_mov(int rs,int rt)
892{
893 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
894 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
895}
896
897static void emit_movs(int rs,int rt)
898{
899 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
900 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
901}
902
903static void emit_add(int rs1,int rs2,int rt)
904{
905 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
906 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
907}
908
909static void emit_adds(int rs1,int rs2,int rt)
910{
911 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
912 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
913}
914
915static void emit_adcs(int rs1,int rs2,int rt)
916{
917 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
918 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
919}
920
921static void emit_sbc(int rs1,int rs2,int rt)
922{
923 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
924 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
925}
926
927static void emit_sbcs(int rs1,int rs2,int rt)
928{
929 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
930 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
931}
932
933static void emit_neg(int rs, int rt)
934{
935 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
936 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
937}
938
939static void emit_negs(int rs, int rt)
940{
941 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
942 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
943}
944
945static void emit_sub(int rs1,int rs2,int rt)
946{
947 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
948 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
949}
950
951static void emit_subs(int rs1,int rs2,int rt)
952{
953 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
954 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
955}
956
957static void emit_zeroreg(int rt)
958{
959 assem_debug("mov %s,#0\n",regname[rt]);
960 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
961}
962
963static void emit_loadlp(u_int imm,u_int rt)
964{
965 add_literal((int)out,imm);
966 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
967 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
968}
969
970static void emit_movw(u_int imm,u_int rt)
971{
972 assert(imm<65536);
973 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
974 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
975}
976
977static void emit_movt(u_int imm,u_int rt)
978{
979 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
980 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
981}
982
983static void emit_movimm(u_int imm,u_int rt)
984{
985 u_int armval;
986 if(genimm(imm,&armval)) {
987 assem_debug("mov %s,#%d\n",regname[rt],imm);
988 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
989 }else if(genimm(~imm,&armval)) {
990 assem_debug("mvn %s,#%d\n",regname[rt],imm);
991 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
992 }else if(imm<65536) {
993 #ifndef HAVE_ARMV7
994 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
995 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
996 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
997 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
998 #else
999 emit_movw(imm,rt);
1000 #endif
1001 }else{
1002 #ifndef HAVE_ARMV7
1003 emit_loadlp(imm,rt);
1004 #else
1005 emit_movw(imm&0x0000FFFF,rt);
1006 emit_movt(imm&0xFFFF0000,rt);
1007 #endif
1008 }
1009}
1010
1011static void emit_pcreladdr(u_int rt)
1012{
1013 assem_debug("add %s,pc,#?\n",regname[rt]);
1014 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1015}
1016
1017static void emit_loadreg(int r, int hr)
1018{
1019 if(r&64) {
1020 SysPrintf("64bit load in 32bit mode!\n");
1021 assert(0);
1022 return;
1023 }
1024 if((r&63)==0)
1025 emit_zeroreg(hr);
1026 else {
1027 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1028 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1029 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1030 if(r==CCREG) addr=(int)&cycle_count;
1031 if(r==CSREG) addr=(int)&Status;
1032 if(r==FSREG) addr=(int)&FCR31;
1033 if(r==INVCP) addr=(int)&invc_ptr;
1034 u_int offset = addr-(u_int)&dynarec_local;
1035 assert(offset<4096);
1036 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1037 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1038 }
1039}
1040
1041static void emit_storereg(int r, int hr)
1042{
1043 if(r&64) {
1044 SysPrintf("64bit store in 32bit mode!\n");
1045 assert(0);
1046 return;
1047 }
1048 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1049 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1050 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1051 if(r==CCREG) addr=(int)&cycle_count;
1052 if(r==FSREG) addr=(int)&FCR31;
1053 u_int offset = addr-(u_int)&dynarec_local;
1054 assert(offset<4096);
1055 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1056 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1057}
1058
1059static void emit_test(int rs, int rt)
1060{
1061 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1062 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1063}
1064
1065static void emit_testimm(int rs,int imm)
1066{
1067 u_int armval;
1068 assem_debug("tst %s,#%d\n",regname[rs],imm);
1069 genimm_checked(imm,&armval);
1070 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1071}
1072
1073static void emit_testeqimm(int rs,int imm)
1074{
1075 u_int armval;
1076 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
1077 genimm_checked(imm,&armval);
1078 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1079}
1080
1081static void emit_not(int rs,int rt)
1082{
1083 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1084 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1085}
1086
1087static void emit_mvnmi(int rs,int rt)
1088{
1089 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1090 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1091}
1092
1093static void emit_and(u_int rs1,u_int rs2,u_int rt)
1094{
1095 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1096 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1097}
1098
1099static void emit_or(u_int rs1,u_int rs2,u_int rt)
1100{
1101 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1102 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1103}
1104
1105static void emit_or_and_set_flags(int rs1,int rs2,int rt)
1106{
1107 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1108 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1109}
1110
1111static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1112{
1113 assert(rs<16);
1114 assert(rt<16);
1115 assert(imm<32);
1116 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1117 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1118}
1119
1120static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1121{
1122 assert(rs<16);
1123 assert(rt<16);
1124 assert(imm<32);
1125 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1126 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1127}
1128
1129static void emit_xor(u_int rs1,u_int rs2,u_int rt)
1130{
1131 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1132 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1133}
1134
1135static void emit_addimm(u_int rs,int imm,u_int rt)
1136{
1137 assert(rs<16);
1138 assert(rt<16);
1139 if(imm!=0) {
1140 u_int armval;
1141 if(genimm(imm,&armval)) {
1142 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1143 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1144 }else if(genimm(-imm,&armval)) {
1145 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
1146 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1147 #ifdef HAVE_ARMV7
1148 }else if(rt!=rs&&(u_int)imm<65536) {
1149 emit_movw(imm&0x0000ffff,rt);
1150 emit_add(rs,rt,rt);
1151 }else if(rt!=rs&&(u_int)-imm<65536) {
1152 emit_movw(-imm&0x0000ffff,rt);
1153 emit_sub(rs,rt,rt);
1154 #endif
1155 }else if((u_int)-imm<65536) {
1156 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1157 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1158 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1159 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1160 }else {
1161 do {
1162 int shift = (ffs(imm) - 1) & ~1;
1163 int imm8 = imm & (0xff << shift);
1164 genimm_checked(imm8,&armval);
1165 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1166 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1167 rs = rt;
1168 imm &= ~imm8;
1169 }
1170 while (imm != 0);
1171 }
1172 }
1173 else if(rs!=rt) emit_mov(rs,rt);
1174}
1175
1176static void emit_addimm_and_set_flags(int imm,int rt)
1177{
1178 assert(imm>-65536&&imm<65536);
1179 u_int armval;
1180 if(genimm(imm,&armval)) {
1181 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1182 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1183 }else if(genimm(-imm,&armval)) {
1184 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1185 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1186 }else if(imm<0) {
1187 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1188 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1189 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1190 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1191 }else{
1192 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1193 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1194 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1195 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1196 }
1197}
1198
1199static void emit_addimm_no_flags(u_int imm,u_int rt)
1200{
1201 emit_addimm(rt,imm,rt);
1202}
1203
1204static void emit_addnop(u_int r)
1205{
1206 assert(r<16);
1207 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1208 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1209}
1210
1211static void emit_adcimm(u_int rs,int imm,u_int rt)
1212{
1213 u_int armval;
1214 genimm_checked(imm,&armval);
1215 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1216 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1217}
1218
1219static void emit_rscimm(int rs,int imm,u_int rt)
1220{
1221 assert(0);
1222 u_int armval;
1223 genimm_checked(imm,&armval);
1224 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1225 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1226}
1227
1228static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1229{
1230 // TODO: if(genimm(imm,&armval)) ...
1231 // else
1232 emit_movimm(imm,HOST_TEMPREG);
1233 emit_adds(HOST_TEMPREG,rsl,rtl);
1234 emit_adcimm(rsh,0,rth);
1235}
1236
1237static void emit_andimm(int rs,int imm,int rt)
1238{
1239 u_int armval;
1240 if(imm==0) {
1241 emit_zeroreg(rt);
1242 }else if(genimm(imm,&armval)) {
1243 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1244 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1245 }else if(genimm(~imm,&armval)) {
1246 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1247 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1248 }else if(imm==65535) {
1249 #ifndef HAVE_ARMV6
1250 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1251 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1252 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1253 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1254 #else
1255 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1256 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1257 #endif
1258 }else{
1259 assert(imm>0&&imm<65535);
1260 #ifndef HAVE_ARMV7
1261 assem_debug("mov r14,#%d\n",imm&0xFF00);
1262 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1263 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1264 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1265 #else
1266 emit_movw(imm,HOST_TEMPREG);
1267 #endif
1268 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1269 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1270 }
1271}
1272
1273static void emit_orimm(int rs,int imm,int rt)
1274{
1275 u_int armval;
1276 if(imm==0) {
1277 if(rs!=rt) emit_mov(rs,rt);
1278 }else if(genimm(imm,&armval)) {
1279 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1280 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1281 }else{
1282 assert(imm>0&&imm<65536);
1283 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1284 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1285 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1286 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1287 }
1288}
1289
1290static void emit_xorimm(int rs,int imm,int rt)
1291{
1292 u_int armval;
1293 if(imm==0) {
1294 if(rs!=rt) emit_mov(rs,rt);
1295 }else if(genimm(imm,&armval)) {
1296 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1297 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1298 }else{
1299 assert(imm>0&&imm<65536);
1300 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1301 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1302 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1303 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1304 }
1305}
1306
1307static void emit_shlimm(int rs,u_int imm,int rt)
1308{
1309 assert(imm>0);
1310 assert(imm<32);
1311 //if(imm==1) ...
1312 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1313 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1314}
1315
1316static void emit_lsls_imm(int rs,int imm,int rt)
1317{
1318 assert(imm>0);
1319 assert(imm<32);
1320 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1321 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1322}
1323
1324static unused void emit_lslpls_imm(int rs,int imm,int rt)
1325{
1326 assert(imm>0);
1327 assert(imm<32);
1328 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1329 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1330}
1331
1332static void emit_shrimm(int rs,u_int imm,int rt)
1333{
1334 assert(imm>0);
1335 assert(imm<32);
1336 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1337 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1338}
1339
1340static void emit_sarimm(int rs,u_int imm,int rt)
1341{
1342 assert(imm>0);
1343 assert(imm<32);
1344 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1345 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1346}
1347
1348static void emit_rorimm(int rs,u_int imm,int rt)
1349{
1350 assert(imm>0);
1351 assert(imm<32);
1352 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1353 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1354}
1355
1356static void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1357{
1358 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1359 assert(imm>0);
1360 assert(imm<32);
1361 //if(imm==1) ...
1362 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1363 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1364 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1365 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1366}
1367
1368static void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1369{
1370 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1371 assert(imm>0);
1372 assert(imm<32);
1373 //if(imm==1) ...
1374 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1375 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1376 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1377 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1378}
1379
1380static void emit_signextend16(int rs,int rt)
1381{
1382 #ifndef HAVE_ARMV6
1383 emit_shlimm(rs,16,rt);
1384 emit_sarimm(rt,16,rt);
1385 #else
1386 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1387 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1388 #endif
1389}
1390
1391static void emit_signextend8(int rs,int rt)
1392{
1393 #ifndef HAVE_ARMV6
1394 emit_shlimm(rs,24,rt);
1395 emit_sarimm(rt,24,rt);
1396 #else
1397 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1398 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1399 #endif
1400}
1401
1402static void emit_shl(u_int rs,u_int shift,u_int rt)
1403{
1404 assert(rs<16);
1405 assert(rt<16);
1406 assert(shift<16);
1407 //if(imm==1) ...
1408 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1409 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1410}
1411
1412static void emit_shr(u_int rs,u_int shift,u_int rt)
1413{
1414 assert(rs<16);
1415 assert(rt<16);
1416 assert(shift<16);
1417 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1418 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1419}
1420
1421static void emit_sar(u_int rs,u_int shift,u_int rt)
1422{
1423 assert(rs<16);
1424 assert(rt<16);
1425 assert(shift<16);
1426 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1427 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1428}
1429
1430static void emit_orrshl(u_int rs,u_int shift,u_int rt)
1431{
1432 assert(rs<16);
1433 assert(rt<16);
1434 assert(shift<16);
1435 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1436 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1437}
1438
1439static void emit_orrshr(u_int rs,u_int shift,u_int rt)
1440{
1441 assert(rs<16);
1442 assert(rt<16);
1443 assert(shift<16);
1444 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1445 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1446}
1447
1448static void emit_cmpimm(int rs,int imm)
1449{
1450 u_int armval;
1451 if(genimm(imm,&armval)) {
1452 assem_debug("cmp %s,#%d\n",regname[rs],imm);
1453 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1454 }else if(genimm(-imm,&armval)) {
1455 assem_debug("cmn %s,#%d\n",regname[rs],imm);
1456 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1457 }else if(imm>0) {
1458 assert(imm<65536);
1459 emit_movimm(imm,HOST_TEMPREG);
1460 assem_debug("cmp %s,r14\n",regname[rs]);
1461 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1462 }else{
1463 assert(imm>-65536);
1464 emit_movimm(-imm,HOST_TEMPREG);
1465 assem_debug("cmn %s,r14\n",regname[rs]);
1466 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1467 }
1468}
1469
1470static void emit_cmovne_imm(int imm,int rt)
1471{
1472 assem_debug("movne %s,#%d\n",regname[rt],imm);
1473 u_int armval;
1474 genimm_checked(imm,&armval);
1475 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1476}
1477
1478static void emit_cmovl_imm(int imm,int rt)
1479{
1480 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1481 u_int armval;
1482 genimm_checked(imm,&armval);
1483 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1484}
1485
1486static void emit_cmovb_imm(int imm,int rt)
1487{
1488 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1489 u_int armval;
1490 genimm_checked(imm,&armval);
1491 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1492}
1493
1494static void emit_cmovs_imm(int imm,int rt)
1495{
1496 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1497 u_int armval;
1498 genimm_checked(imm,&armval);
1499 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1500}
1501
1502static void emit_cmove_reg(int rs,int rt)
1503{
1504 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1505 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1506}
1507
1508static void emit_cmovne_reg(int rs,int rt)
1509{
1510 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1511 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1512}
1513
1514static void emit_cmovl_reg(int rs,int rt)
1515{
1516 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1517 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1518}
1519
1520static void emit_cmovs_reg(int rs,int rt)
1521{
1522 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1523 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1524}
1525
1526static void emit_slti32(int rs,int imm,int rt)
1527{
1528 if(rs!=rt) emit_zeroreg(rt);
1529 emit_cmpimm(rs,imm);
1530 if(rs==rt) emit_movimm(0,rt);
1531 emit_cmovl_imm(1,rt);
1532}
1533
1534static void emit_sltiu32(int rs,int imm,int rt)
1535{
1536 if(rs!=rt) emit_zeroreg(rt);
1537 emit_cmpimm(rs,imm);
1538 if(rs==rt) emit_movimm(0,rt);
1539 emit_cmovb_imm(1,rt);
1540}
1541
1542static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1543{
1544 assert(rsh!=rt);
1545 emit_slti32(rsl,imm,rt);
1546 if(imm>=0)
1547 {
1548 emit_test(rsh,rsh);
1549 emit_cmovne_imm(0,rt);
1550 emit_cmovs_imm(1,rt);
1551 }
1552 else
1553 {
1554 emit_cmpimm(rsh,-1);
1555 emit_cmovne_imm(0,rt);
1556 emit_cmovl_imm(1,rt);
1557 }
1558}
1559
1560static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1561{
1562 assert(rsh!=rt);
1563 emit_sltiu32(rsl,imm,rt);
1564 if(imm>=0)
1565 {
1566 emit_test(rsh,rsh);
1567 emit_cmovne_imm(0,rt);
1568 }
1569 else
1570 {
1571 emit_cmpimm(rsh,-1);
1572 emit_cmovne_imm(1,rt);
1573 }
1574}
1575
1576static void emit_cmp(int rs,int rt)
1577{
1578 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1579 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1580}
1581
1582static void emit_set_gz32(int rs, int rt)
1583{
1584 //assem_debug("set_gz32\n");
1585 emit_cmpimm(rs,1);
1586 emit_movimm(1,rt);
1587 emit_cmovl_imm(0,rt);
1588}
1589
1590static void emit_set_nz32(int rs, int rt)
1591{
1592 //assem_debug("set_nz32\n");
1593 if(rs!=rt) emit_movs(rs,rt);
1594 else emit_test(rs,rs);
1595 emit_cmovne_imm(1,rt);
1596}
1597
1598static void emit_set_gz64_32(int rsh, int rsl, int rt)
1599{
1600 //assem_debug("set_gz64\n");
1601 emit_set_gz32(rsl,rt);
1602 emit_test(rsh,rsh);
1603 emit_cmovne_imm(1,rt);
1604 emit_cmovs_imm(0,rt);
1605}
1606
1607static void emit_set_nz64_32(int rsh, int rsl, int rt)
1608{
1609 //assem_debug("set_nz64\n");
1610 emit_or_and_set_flags(rsh,rsl,rt);
1611 emit_cmovne_imm(1,rt);
1612}
1613
1614static void emit_set_if_less32(int rs1, int rs2, int rt)
1615{
1616 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1617 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1618 emit_cmp(rs1,rs2);
1619 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1620 emit_cmovl_imm(1,rt);
1621}
1622
1623static void emit_set_if_carry32(int rs1, int rs2, int rt)
1624{
1625 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1626 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1627 emit_cmp(rs1,rs2);
1628 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1629 emit_cmovb_imm(1,rt);
1630}
1631
1632static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1633{
1634 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1635 assert(u1!=rt);
1636 assert(u2!=rt);
1637 emit_cmp(l1,l2);
1638 emit_movimm(0,rt);
1639 emit_sbcs(u1,u2,HOST_TEMPREG);
1640 emit_cmovl_imm(1,rt);
1641}
1642
1643static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1644{
1645 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1646 assert(u1!=rt);
1647 assert(u2!=rt);
1648 emit_cmp(l1,l2);
1649 emit_movimm(0,rt);
1650 emit_sbcs(u1,u2,HOST_TEMPREG);
1651 emit_cmovb_imm(1,rt);
1652}
1653
1654static void emit_call(int a)
1655{
1656 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1657 u_int offset=genjmp(a);
1658 output_w32(0xeb000000|offset);
1659}
1660
1661static void emit_jmp(int a)
1662{
1663 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1664 u_int offset=genjmp(a);
1665 output_w32(0xea000000|offset);
1666}
1667
1668static void emit_jne(int a)
1669{
1670 assem_debug("bne %x\n",a);
1671 u_int offset=genjmp(a);
1672 output_w32(0x1a000000|offset);
1673}
1674
1675static void emit_jeq(int a)
1676{
1677 assem_debug("beq %x\n",a);
1678 u_int offset=genjmp(a);
1679 output_w32(0x0a000000|offset);
1680}
1681
1682static void emit_js(int a)
1683{
1684 assem_debug("bmi %x\n",a);
1685 u_int offset=genjmp(a);
1686 output_w32(0x4a000000|offset);
1687}
1688
1689static void emit_jns(int a)
1690{
1691 assem_debug("bpl %x\n",a);
1692 u_int offset=genjmp(a);
1693 output_w32(0x5a000000|offset);
1694}
1695
1696static void emit_jl(int a)
1697{
1698 assem_debug("blt %x\n",a);
1699 u_int offset=genjmp(a);
1700 output_w32(0xba000000|offset);
1701}
1702
1703static void emit_jge(int a)
1704{
1705 assem_debug("bge %x\n",a);
1706 u_int offset=genjmp(a);
1707 output_w32(0xaa000000|offset);
1708}
1709
1710static void emit_jno(int a)
1711{
1712 assem_debug("bvc %x\n",a);
1713 u_int offset=genjmp(a);
1714 output_w32(0x7a000000|offset);
1715}
1716
1717static void emit_jc(int a)
1718{
1719 assem_debug("bcs %x\n",a);
1720 u_int offset=genjmp(a);
1721 output_w32(0x2a000000|offset);
1722}
1723
1724static void emit_jcc(int a)
1725{
1726 assem_debug("bcc %x\n",a);
1727 u_int offset=genjmp(a);
1728 output_w32(0x3a000000|offset);
1729}
1730
1731static void emit_callreg(u_int r)
1732{
1733 assert(r<15);
1734 assem_debug("blx %s\n",regname[r]);
1735 output_w32(0xe12fff30|r);
1736}
1737
1738static void emit_jmpreg(u_int r)
1739{
1740 assem_debug("mov pc,%s\n",regname[r]);
1741 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1742}
1743
1744static void emit_readword_indexed(int offset, int rs, int rt)
1745{
1746 assert(offset>-4096&&offset<4096);
1747 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1748 if(offset>=0) {
1749 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1750 }else{
1751 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1752 }
1753}
1754
1755static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1756{
1757 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1758 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1759}
1760
1761static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1762{
1763 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1764 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1765}
1766
1767static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1768{
1769 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1770 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1771}
1772
1773static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1774{
1775 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1776 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1777}
1778
1779static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1780{
1781 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1782 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1783}
1784
1785static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1786{
1787 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1788 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1789}
1790
1791static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1792{
1793 if(map<0) emit_readword_indexed(addr, rs, rt);
1794 else {
1795 assert(addr==0);
1796 emit_readword_dualindexedx4(rs, map, rt);
1797 }
1798}
1799
1800static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1801{
1802 if(map<0) {
1803 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1804 emit_readword_indexed(addr+4, rs, rl);
1805 }else{
1806 assert(rh!=rs);
1807 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1808 emit_addimm(map,1,map);
1809 emit_readword_indexed_tlb(addr, rs, map, rl);
1810 }
1811}
1812
1813static void emit_movsbl_indexed(int offset, int rs, int rt)
1814{
1815 assert(offset>-256&&offset<256);
1816 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1817 if(offset>=0) {
1818 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1819 }else{
1820 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1821 }
1822}
1823
1824static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1825{
1826 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1827 else {
1828 if(addr==0) {
1829 emit_shlimm(map,2,map);
1830 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1831 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1832 }else{
1833 assert(addr>-256&&addr<256);
1834 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1835 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1836 emit_movsbl_indexed(addr, rt, rt);
1837 }
1838 }
1839}
1840
1841static void emit_movswl_indexed(int offset, int rs, int rt)
1842{
1843 assert(offset>-256&&offset<256);
1844 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1845 if(offset>=0) {
1846 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1847 }else{
1848 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1849 }
1850}
1851
1852static void emit_movzbl_indexed(int offset, int rs, int rt)
1853{
1854 assert(offset>-4096&&offset<4096);
1855 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1856 if(offset>=0) {
1857 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1858 }else{
1859 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1860 }
1861}
1862
1863static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1864{
1865 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1866 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1867}
1868
1869static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1870{
1871 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1872 else {
1873 if(addr==0) {
1874 emit_movzbl_dualindexedx4(rs, map, rt);
1875 }else{
1876 emit_addimm(rs,addr,rt);
1877 emit_movzbl_dualindexedx4(rt, map, rt);
1878 }
1879 }
1880}
1881
1882static void emit_movzwl_indexed(int offset, int rs, int rt)
1883{
1884 assert(offset>-256&&offset<256);
1885 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1886 if(offset>=0) {
1887 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1888 }else{
1889 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1890 }
1891}
1892
1893static void emit_ldrd(int offset, int rs, int rt)
1894{
1895 assert(offset>-256&&offset<256);
1896 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1897 if(offset>=0) {
1898 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1899 }else{
1900 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1901 }
1902}
1903
1904static void emit_readword(int addr, int rt)
1905{
1906 u_int offset = addr-(u_int)&dynarec_local;
1907 assert(offset<4096);
1908 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1909 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1910}
1911
1912static unused void emit_movsbl(int addr, int rt)
1913{
1914 u_int offset = addr-(u_int)&dynarec_local;
1915 assert(offset<256);
1916 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1917 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1918}
1919
1920static unused void emit_movswl(int addr, int rt)
1921{
1922 u_int offset = addr-(u_int)&dynarec_local;
1923 assert(offset<256);
1924 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1925 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1926}
1927
1928static unused void emit_movzbl(int addr, int rt)
1929{
1930 u_int offset = addr-(u_int)&dynarec_local;
1931 assert(offset<4096);
1932 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1933 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1934}
1935
1936static unused void emit_movzwl(int addr, int rt)
1937{
1938 u_int offset = addr-(u_int)&dynarec_local;
1939 assert(offset<256);
1940 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1941 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1942}
1943
1944static void emit_writeword_indexed(int rt, int offset, int rs)
1945{
1946 assert(offset>-4096&&offset<4096);
1947 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1948 if(offset>=0) {
1949 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1950 }else{
1951 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1952 }
1953}
1954
1955static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1956{
1957 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1958 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1959}
1960
1961static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1962{
1963 if(map<0) emit_writeword_indexed(rt, addr, rs);
1964 else {
1965 assert(addr==0);
1966 emit_writeword_dualindexedx4(rt, rs, map);
1967 }
1968}
1969
1970static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1971{
1972 if(map<0) {
1973 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1974 emit_writeword_indexed(rl, addr+4, rs);
1975 }else{
1976 assert(rh>=0);
1977 if(temp!=rs) emit_addimm(map,1,temp);
1978 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1979 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1980 else {
1981 emit_addimm(rs,4,rs);
1982 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1983 }
1984 }
1985}
1986
1987static void emit_writehword_indexed(int rt, int offset, int rs)
1988{
1989 assert(offset>-256&&offset<256);
1990 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1991 if(offset>=0) {
1992 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1993 }else{
1994 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1995 }
1996}
1997
1998static void emit_writebyte_indexed(int rt, int offset, int rs)
1999{
2000 assert(offset>-4096&&offset<4096);
2001 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2002 if(offset>=0) {
2003 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2004 }else{
2005 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2006 }
2007}
2008
2009static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2010{
2011 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2012 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2013}
2014
2015static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2016{
2017 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2018 else {
2019 if(addr==0) {
2020 emit_writebyte_dualindexedx4(rt, rs, map);
2021 }else{
2022 emit_addimm(rs,addr,temp);
2023 emit_writebyte_dualindexedx4(rt, temp, map);
2024 }
2025 }
2026}
2027
2028static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2029{
2030 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2031 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2032}
2033
2034static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2035{
2036 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2037 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2038}
2039
2040static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2041{
2042 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2043 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2044}
2045
2046static void emit_writeword(int rt, int addr)
2047{
2048 u_int offset = addr-(u_int)&dynarec_local;
2049 assert(offset<4096);
2050 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2051 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2052}
2053
2054static unused void emit_writehword(int rt, int addr)
2055{
2056 u_int offset = addr-(u_int)&dynarec_local;
2057 assert(offset<256);
2058 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2059 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2060}
2061
2062static unused void emit_writebyte(int rt, int addr)
2063{
2064 u_int offset = addr-(u_int)&dynarec_local;
2065 assert(offset<4096);
2066 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
2067 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2068}
2069
2070static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2071{
2072 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2073 assert(rs1<16);
2074 assert(rs2<16);
2075 assert(hi<16);
2076 assert(lo<16);
2077 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2078}
2079
2080static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2081{
2082 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2083 assert(rs1<16);
2084 assert(rs2<16);
2085 assert(hi<16);
2086 assert(lo<16);
2087 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2088}
2089
2090static void emit_clz(int rs,int rt)
2091{
2092 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2093 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2094}
2095
2096static void emit_subcs(int rs1,int rs2,int rt)
2097{
2098 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2099 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2100}
2101
2102static void emit_shrcc_imm(int rs,u_int imm,int rt)
2103{
2104 assert(imm>0);
2105 assert(imm<32);
2106 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2107 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2108}
2109
2110static void emit_shrne_imm(int rs,u_int imm,int rt)
2111{
2112 assert(imm>0);
2113 assert(imm<32);
2114 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2115 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2116}
2117
2118static void emit_negmi(int rs, int rt)
2119{
2120 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2121 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2122}
2123
2124static void emit_negsmi(int rs, int rt)
2125{
2126 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2127 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2128}
2129
2130static void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2131{
2132 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2133 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2134}
2135
2136static void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2137{
2138 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2139 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2140}
2141
2142static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2143{
2144 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2145 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2146}
2147
2148static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2149{
2150 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2151 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2152}
2153
2154static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2155{
2156 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2157 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2158}
2159
2160static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2161{
2162 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2163 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2164}
2165
2166static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2167{
2168 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2169 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2170}
2171
2172static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2173{
2174 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2175 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2176}
2177
2178static void emit_teq(int rs, int rt)
2179{
2180 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2181 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2182}
2183
2184static void emit_rsbimm(int rs, int imm, int rt)
2185{
2186 u_int armval;
2187 genimm_checked(imm,&armval);
2188 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2189 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2190}
2191
2192// Load 2 immediates optimizing for small code size
2193static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2194{
2195 emit_movimm(imm1,rt1);
2196 u_int armval;
2197 if(genimm(imm2-imm1,&armval)) {
2198 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2199 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2200 }else if(genimm(imm1-imm2,&armval)) {
2201 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2202 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2203 }
2204 else emit_movimm(imm2,rt2);
2205}
2206
2207// Conditionally select one of two immediates, optimizing for small code size
2208// This will only be called if HAVE_CMOV_IMM is defined
2209static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2210{
2211 u_int armval;
2212 if(genimm(imm2-imm1,&armval)) {
2213 emit_movimm(imm1,rt);
2214 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2215 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2216 }else if(genimm(imm1-imm2,&armval)) {
2217 emit_movimm(imm1,rt);
2218 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2219 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2220 }
2221 else {
2222 #ifndef HAVE_ARMV7
2223 emit_movimm(imm1,rt);
2224 add_literal((int)out,imm2);
2225 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2226 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2227 #else
2228 emit_movw(imm1&0x0000FFFF,rt);
2229 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2230 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2231 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2232 }
2233 emit_movt(imm1&0xFFFF0000,rt);
2234 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2235 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2236 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2237 }
2238 #endif
2239 }
2240}
2241
2242// special case for checking invalid_code
2243static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2244{
2245 assert(imm<128&&imm>=0);
2246 assert(r>=0&&r<16);
2247 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2248 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2249 emit_cmpimm(HOST_TEMPREG,imm);
2250}
2251
2252static void emit_callne(int a)
2253{
2254 assem_debug("blne %x\n",a);
2255 u_int offset=genjmp(a);
2256 output_w32(0x1b000000|offset);
2257}
2258
2259// Used to preload hash table entries
2260static unused void emit_prefetchreg(int r)
2261{
2262 assem_debug("pld %s\n",regname[r]);
2263 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2264}
2265
2266// Special case for mini_ht
2267static void emit_ldreq_indexed(int rs, u_int offset, int rt)
2268{
2269 assert(offset<4096);
2270 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2271 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2272}
2273
2274static unused void emit_bicne_imm(int rs,int imm,int rt)
2275{
2276 u_int armval;
2277 genimm_checked(imm,&armval);
2278 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2279 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2280}
2281
2282static unused void emit_biccs_imm(int rs,int imm,int rt)
2283{
2284 u_int armval;
2285 genimm_checked(imm,&armval);
2286 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2287 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2288}
2289
2290static unused void emit_bicvc_imm(int rs,int imm,int rt)
2291{
2292 u_int armval;
2293 genimm_checked(imm,&armval);
2294 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2295 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2296}
2297
2298static unused void emit_bichi_imm(int rs,int imm,int rt)
2299{
2300 u_int armval;
2301 genimm_checked(imm,&armval);
2302 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2303 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2304}
2305
2306static unused void emit_orrvs_imm(int rs,int imm,int rt)
2307{
2308 u_int armval;
2309 genimm_checked(imm,&armval);
2310 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2311 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2312}
2313
2314static void emit_orrne_imm(int rs,int imm,int rt)
2315{
2316 u_int armval;
2317 genimm_checked(imm,&armval);
2318 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2319 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2320}
2321
2322static void emit_andne_imm(int rs,int imm,int rt)
2323{
2324 u_int armval;
2325 genimm_checked(imm,&armval);
2326 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2327 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2328}
2329
2330static unused void emit_addpl_imm(int rs,int imm,int rt)
2331{
2332 u_int armval;
2333 genimm_checked(imm,&armval);
2334 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2335 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2336}
2337
2338static void emit_jno_unlikely(int a)
2339{
2340 //emit_jno(a);
2341 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2342 output_w32(0x72800000|rd_rn_rm(15,15,0));
2343}
2344
2345static void save_regs_all(u_int reglist)
2346{
2347 int i;
2348 if(!reglist) return;
2349 assem_debug("stmia fp,{");
2350 for(i=0;i<16;i++)
2351 if(reglist&(1<<i))
2352 assem_debug("r%d,",i);
2353 assem_debug("}\n");
2354 output_w32(0xe88b0000|reglist);
2355}
2356
2357static void restore_regs_all(u_int reglist)
2358{
2359 int i;
2360 if(!reglist) return;
2361 assem_debug("ldmia fp,{");
2362 for(i=0;i<16;i++)
2363 if(reglist&(1<<i))
2364 assem_debug("r%d,",i);
2365 assem_debug("}\n");
2366 output_w32(0xe89b0000|reglist);
2367}
2368
2369// Save registers before function call
2370static void save_regs(u_int reglist)
2371{
2372 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
2373 save_regs_all(reglist);
2374}
2375
2376// Restore registers after function call
2377static void restore_regs(u_int reglist)
2378{
2379 reglist&=CALLER_SAVE_REGS;
2380 restore_regs_all(reglist);
2381}
2382
2383/* Stubs/epilogue */
2384
2385static void literal_pool(int n)
2386{
2387 if(!literalcount) return;
2388 if(n) {
2389 if((int)out-literals[0][0]<4096-n) return;
2390 }
2391 u_int *ptr;
2392 int i;
2393 for(i=0;i<literalcount;i++)
2394 {
2395 u_int l_addr=(u_int)out;
2396 int j;
2397 for(j=0;j<i;j++) {
2398 if(literals[j][1]==literals[i][1]) {
2399 //printf("dup %08x\n",literals[i][1]);
2400 l_addr=literals[j][0];
2401 break;
2402 }
2403 }
2404 ptr=(u_int *)literals[i][0];
2405 u_int offset=l_addr-(u_int)ptr-8;
2406 assert(offset<4096);
2407 assert(!(offset&3));
2408 *ptr|=offset;
2409 if(l_addr==(u_int)out) {
2410 literals[i][0]=l_addr; // remember for dupes
2411 output_w32(literals[i][1]);
2412 }
2413 }
2414 literalcount=0;
2415}
2416
2417static void literal_pool_jumpover(int n)
2418{
2419 if(!literalcount) return;
2420 if(n) {
2421 if((int)out-literals[0][0]<4096-n) return;
2422 }
2423 int jaddr=(int)out;
2424 emit_jmp(0);
2425 literal_pool(0);
2426 set_jump_target(jaddr,(int)out);
2427}
2428
2429static void emit_extjump2(u_int addr, int target, int linker)
2430{
2431 u_char *ptr=(u_char *)addr;
2432 assert((ptr[3]&0x0e)==0xa);
2433 (void)ptr;
2434
2435 emit_loadlp(target,0);
2436 emit_loadlp(addr,1);
2437 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
2438 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2439//DEBUG >
2440#ifdef DEBUG_CYCLE_COUNT
2441 emit_readword((int)&last_count,ECX);
2442 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2443 emit_readword((int)&next_interupt,ECX);
2444 emit_writeword(HOST_CCREG,(int)&Count);
2445 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2446 emit_writeword(ECX,(int)&last_count);
2447#endif
2448//DEBUG <
2449 emit_jmp(linker);
2450}
2451
2452static void emit_extjump(int addr, int target)
2453{
2454 emit_extjump2(addr, target, (int)dyna_linker);
2455}
2456
2457static void emit_extjump_ds(int addr, int target)
2458{
2459 emit_extjump2(addr, target, (int)dyna_linker_ds);
2460}
2461
2462// put rt_val into rt, potentially making use of rs with value rs_val
2463static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2464{
2465 u_int armval;
2466 int diff;
2467 if(genimm(rt_val,&armval)) {
2468 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2469 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2470 return;
2471 }
2472 if(genimm(~rt_val,&armval)) {
2473 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2474 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2475 return;
2476 }
2477 diff=rt_val-rs_val;
2478 if(genimm(diff,&armval)) {
2479 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2480 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2481 return;
2482 }else if(genimm(-diff,&armval)) {
2483 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2484 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2485 return;
2486 }
2487 emit_movimm(rt_val,rt);
2488}
2489
2490// return 1 if above function can do it's job cheaply
2491static int is_similar_value(u_int v1,u_int v2)
2492{
2493 u_int xs;
2494 int diff;
2495 if(v1==v2) return 1;
2496 diff=v2-v1;
2497 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
2498 ;
2499 if(xs<0x100) return 1;
2500 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2501 ;
2502 if(xs<0x100) return 1;
2503 return 0;
2504}
2505
2506// trashes r2
2507static void pass_args(int a0, int a1)
2508{
2509 if(a0==1&&a1==0) {
2510 // must swap
2511 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2512 }
2513 else if(a0!=0&&a1==0) {
2514 emit_mov(a1,1);
2515 if (a0>=0) emit_mov(a0,0);
2516 }
2517 else {
2518 if(a0>=0&&a0!=0) emit_mov(a0,0);
2519 if(a1>=0&&a1!=1) emit_mov(a1,1);
2520 }
2521}
2522
2523static void mov_loadtype_adj(int type,int rs,int rt)
2524{
2525 switch(type) {
2526 case LOADB_STUB: emit_signextend8(rs,rt); break;
2527 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2528 case LOADH_STUB: emit_signextend16(rs,rt); break;
2529 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2530 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2531 default: assert(0);
2532 }
2533}
2534
2535#include "pcsxmem.h"
2536#include "pcsxmem_inline.c"
2537
2538static void do_readstub(int n)
2539{
2540 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2541 literal_pool(256);
2542 set_jump_target(stubs[n][1],(int)out);
2543 int type=stubs[n][0];
2544 int i=stubs[n][3];
2545 int rs=stubs[n][4];
2546 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2547 u_int reglist=stubs[n][7];
2548 signed char *i_regmap=i_regs->regmap;
2549 int rt;
2550 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
2551 rt=get_reg(i_regmap,FTEMP);
2552 }else{
2553 rt=get_reg(i_regmap,rt1[i]);
2554 }
2555 assert(rs>=0);
2556 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2557 reglist|=(1<<rs);
2558 for(r=0;r<=12;r++) {
2559 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2560 temp=r; break;
2561 }
2562 }
2563 if(rt>=0&&rt1[i]!=0)
2564 reglist&=~(1<<rt);
2565 if(temp==-1) {
2566 save_regs(reglist);
2567 regs_saved=1;
2568 temp=(rs==0)?2:0;
2569 }
2570 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2571 temp2=1;
2572 emit_readword((int)&mem_rtab,temp);
2573 emit_shrimm(rs,12,temp2);
2574 emit_readword_dualindexedx4(temp,temp2,temp2);
2575 emit_lsls_imm(temp2,1,temp2);
2576 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2577 switch(type) {
2578 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2579 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2580 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2581 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2582 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2583 }
2584 }
2585 if(regs_saved) {
2586 restore_jump=(int)out;
2587 emit_jcc(0); // jump to reg restore
2588 }
2589 else
2590 emit_jcc(stubs[n][2]); // return address
2591
2592 if(!regs_saved)
2593 save_regs(reglist);
2594 int handler=0;
2595 if(type==LOADB_STUB||type==LOADBU_STUB)
2596 handler=(int)jump_handler_read8;
2597 if(type==LOADH_STUB||type==LOADHU_STUB)
2598 handler=(int)jump_handler_read16;
2599 if(type==LOADW_STUB)
2600 handler=(int)jump_handler_read32;
2601 assert(handler!=0);
2602 pass_args(rs,temp2);
2603 int cc=get_reg(i_regmap,CCREG);
2604 if(cc<0)
2605 emit_loadreg(CCREG,2);
2606 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
2607 emit_call(handler);
2608 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2609 mov_loadtype_adj(type,0,rt);
2610 }
2611 if(restore_jump)
2612 set_jump_target(restore_jump,(int)out);
2613 restore_regs(reglist);
2614 emit_jmp(stubs[n][2]); // return address
2615}
2616
2617// return memhandler, or get directly accessable address and return 0
2618static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2619{
2620 u_int l1,l2=0;
2621 l1=((u_int *)table)[addr>>12];
2622 if((l1&(1<<31))==0) {
2623 u_int v=l1<<1;
2624 *addr_host=v+addr;
2625 return 0;
2626 }
2627 else {
2628 l1<<=1;
2629 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2630 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
2631 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
2632 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2633 else
2634 l2=((u_int *)l1)[(addr&0xfff)/4];
2635 if((l2&(1<<31))==0) {
2636 u_int v=l2<<1;
2637 *addr_host=v+(addr&0xfff);
2638 return 0;
2639 }
2640 return l2<<1;
2641 }
2642}
2643
2644static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2645{
2646 int rs=get_reg(regmap,target);
2647 int rt=get_reg(regmap,target);
2648 if(rs<0) rs=get_reg(regmap,-1);
2649 assert(rs>=0);
2650 u_int handler,host_addr=0,is_dynamic,far_call=0;
2651 int cc=get_reg(regmap,CCREG);
2652 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2653 return;
2654 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2655 if (handler==0) {
2656 if(rt<0||rt1[i]==0)
2657 return;
2658 if(addr!=host_addr)
2659 emit_movimm_from(addr,rs,host_addr,rs);
2660 switch(type) {
2661 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2662 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2663 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2664 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2665 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2666 default: assert(0);
2667 }
2668 return;
2669 }
2670 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2671 if(is_dynamic) {
2672 if(type==LOADB_STUB||type==LOADBU_STUB)
2673 handler=(int)jump_handler_read8;
2674 if(type==LOADH_STUB||type==LOADHU_STUB)
2675 handler=(int)jump_handler_read16;
2676 if(type==LOADW_STUB)
2677 handler=(int)jump_handler_read32;
2678 }
2679
2680 // call a memhandler
2681 if(rt>=0&&rt1[i]!=0)
2682 reglist&=~(1<<rt);
2683 save_regs(reglist);
2684 if(target==0)
2685 emit_movimm(addr,0);
2686 else if(rs!=0)
2687 emit_mov(rs,0);
2688 int offset=(int)handler-(int)out-8;
2689 if(offset<-33554432||offset>=33554432) {
2690 // unreachable memhandler, a plugin func perhaps
2691 emit_movimm(handler,12);
2692 far_call=1;
2693 }
2694 if(cc<0)
2695 emit_loadreg(CCREG,2);
2696 if(is_dynamic) {
2697 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2698 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2699 }
2700 else {
2701 emit_readword((int)&last_count,3);
2702 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2703 emit_add(2,3,2);
2704 emit_writeword(2,(int)&Count);
2705 }
2706
2707 if(far_call)
2708 emit_callreg(12);
2709 else
2710 emit_call(handler);
2711
2712 if(rt>=0&&rt1[i]!=0) {
2713 switch(type) {
2714 case LOADB_STUB: emit_signextend8(0,rt); break;
2715 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2716 case LOADH_STUB: emit_signextend16(0,rt); break;
2717 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2718 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2719 default: assert(0);
2720 }
2721 }
2722 restore_regs(reglist);
2723}
2724
2725static void do_writestub(int n)
2726{
2727 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2728 literal_pool(256);
2729 set_jump_target(stubs[n][1],(int)out);
2730 int type=stubs[n][0];
2731 int i=stubs[n][3];
2732 int rs=stubs[n][4];
2733 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2734 u_int reglist=stubs[n][7];
2735 signed char *i_regmap=i_regs->regmap;
2736 int rt,r;
2737 if(itype[i]==C1LS||itype[i]==C2LS) {
2738 rt=get_reg(i_regmap,r=FTEMP);
2739 }else{
2740 rt=get_reg(i_regmap,r=rs2[i]);
2741 }
2742 assert(rs>=0);
2743 assert(rt>=0);
2744 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
2745 int reglist2=reglist|(1<<rs)|(1<<rt);
2746 for(rtmp=0;rtmp<=12;rtmp++) {
2747 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2748 temp=rtmp; break;
2749 }
2750 }
2751 if(temp==-1) {
2752 save_regs(reglist);
2753 regs_saved=1;
2754 for(rtmp=0;rtmp<=3;rtmp++)
2755 if(rtmp!=rs&&rtmp!=rt)
2756 {temp=rtmp;break;}
2757 }
2758 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2759 temp2=3;
2760 emit_readword((int)&mem_wtab,temp);
2761 emit_shrimm(rs,12,temp2);
2762 emit_readword_dualindexedx4(temp,temp2,temp2);
2763 emit_lsls_imm(temp2,1,temp2);
2764 switch(type) {
2765 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2766 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2767 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2768 default: assert(0);
2769 }
2770 if(regs_saved) {
2771 restore_jump=(int)out;
2772 emit_jcc(0); // jump to reg restore
2773 }
2774 else
2775 emit_jcc(stubs[n][2]); // return address (invcode check)
2776
2777 if(!regs_saved)
2778 save_regs(reglist);
2779 int handler=0;
2780 switch(type) {
2781 case STOREB_STUB: handler=(int)jump_handler_write8; break;
2782 case STOREH_STUB: handler=(int)jump_handler_write16; break;
2783 case STOREW_STUB: handler=(int)jump_handler_write32; break;
2784 }
2785 assert(handler!=0);
2786 pass_args(rs,rt);
2787 if(temp2!=3)
2788 emit_mov(temp2,3);
2789 int cc=get_reg(i_regmap,CCREG);
2790 if(cc<0)
2791 emit_loadreg(CCREG,2);
2792 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
2793 // returns new cycle_count
2794 emit_call(handler);
2795 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
2796 if(cc<0)
2797 emit_storereg(CCREG,2);
2798 if(restore_jump)
2799 set_jump_target(restore_jump,(int)out);
2800 restore_regs(reglist);
2801 ra=stubs[n][2];
2802 emit_jmp(ra);
2803}
2804
2805static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2806{
2807 int rs=get_reg(regmap,-1);
2808 int rt=get_reg(regmap,target);
2809 assert(rs>=0);
2810 assert(rt>=0);
2811 u_int handler,host_addr=0;
2812 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
2813 if (handler==0) {
2814 if(addr!=host_addr)
2815 emit_movimm_from(addr,rs,host_addr,rs);
2816 switch(type) {
2817 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2818 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2819 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2820 default: assert(0);
2821 }
2822 return;
2823 }
2824
2825 // call a memhandler
2826 save_regs(reglist);
2827 pass_args(rs,rt);
2828 int cc=get_reg(regmap,CCREG);
2829 if(cc<0)
2830 emit_loadreg(CCREG,2);
2831 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2832 emit_movimm(handler,3);
2833 // returns new cycle_count
2834 emit_call((int)jump_handler_write_h);
2835 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
2836 if(cc<0)
2837 emit_storereg(CCREG,2);
2838 restore_regs(reglist);
2839}
2840
2841static void do_unalignedwritestub(int n)
2842{
2843 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2844 literal_pool(256);
2845 set_jump_target(stubs[n][1],(int)out);
2846
2847 int i=stubs[n][3];
2848 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2849 int addr=stubs[n][5];
2850 u_int reglist=stubs[n][7];
2851 signed char *i_regmap=i_regs->regmap;
2852 int temp2=get_reg(i_regmap,FTEMP);
2853 int rt;
2854 rt=get_reg(i_regmap,rs2[i]);
2855 assert(rt>=0);
2856 assert(addr>=0);
2857 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2858 reglist|=(1<<addr);
2859 reglist&=~(1<<temp2);
2860
2861#if 1
2862 // don't bother with it and call write handler
2863 save_regs(reglist);
2864 pass_args(addr,rt);
2865 int cc=get_reg(i_regmap,CCREG);
2866 if(cc<0)
2867 emit_loadreg(CCREG,2);
2868 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
2869 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2870 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
2871 if(cc<0)
2872 emit_storereg(CCREG,2);
2873 restore_regs(reglist);
2874 emit_jmp(stubs[n][2]); // return address
2875#else
2876 emit_andimm(addr,0xfffffffc,temp2);
2877 emit_writeword(temp2,(int)&address);
2878
2879 save_regs(reglist);
2880 emit_shrimm(addr,16,1);
2881 int cc=get_reg(i_regmap,CCREG);
2882 if(cc<0) {
2883 emit_loadreg(CCREG,2);
2884 }
2885 emit_movimm((u_int)readmem,0);
2886 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2887 emit_call((int)&indirect_jump_indexed);
2888 restore_regs(reglist);
2889
2890 emit_readword((int)&readmem_dword,temp2);
2891 int temp=addr; //hmh
2892 emit_shlimm(addr,3,temp);
2893 emit_andimm(temp,24,temp);
2894#ifdef BIG_ENDIAN_MIPS
2895 if (opcode[i]==0x2e) // SWR
2896#else
2897 if (opcode[i]==0x2a) // SWL
2898#endif
2899 emit_xorimm(temp,24,temp);
2900 emit_movimm(-1,HOST_TEMPREG);
2901 if (opcode[i]==0x2a) { // SWL
2902 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2903 emit_orrshr(rt,temp,temp2);
2904 }else{
2905 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2906 emit_orrshl(rt,temp,temp2);
2907 }
2908 emit_readword((int)&address,addr);
2909 emit_writeword(temp2,(int)&word);
2910 //save_regs(reglist); // don't need to, no state changes
2911 emit_shrimm(addr,16,1);
2912 emit_movimm((u_int)writemem,0);
2913 //emit_call((int)&indirect_jump_indexed);
2914 emit_mov(15,14);
2915 emit_readword_dualindexedx4(0,1,15);
2916 emit_readword((int)&Count,HOST_TEMPREG);
2917 emit_readword((int)&next_interupt,2);
2918 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2919 emit_writeword(2,(int)&last_count);
2920 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2921 if(cc<0) {
2922 emit_storereg(CCREG,HOST_TEMPREG);
2923 }
2924 restore_regs(reglist);
2925 emit_jmp(stubs[n][2]); // return address
2926#endif
2927}
2928
2929static void do_invstub(int n)
2930{
2931 literal_pool(20);
2932 u_int reglist=stubs[n][3];
2933 set_jump_target(stubs[n][1],(int)out);
2934 save_regs(reglist);
2935 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2936 emit_call((int)&invalidate_addr);
2937 restore_regs(reglist);
2938 emit_jmp(stubs[n][2]); // return address
2939}
2940
2941int do_dirty_stub(int i)
2942{
2943 assem_debug("do_dirty_stub %x\n",start+i*4);
2944 u_int addr=(u_int)source;
2945 // Careful about the code output here, verify_dirty needs to parse it.
2946 #ifndef HAVE_ARMV7
2947 emit_loadlp(addr,1);
2948 emit_loadlp((int)copy,2);
2949 emit_loadlp(slen*4,3);
2950 #else
2951 emit_movw(addr&0x0000FFFF,1);
2952 emit_movw(((u_int)copy)&0x0000FFFF,2);
2953 emit_movt(addr&0xFFFF0000,1);
2954 emit_movt(((u_int)copy)&0xFFFF0000,2);
2955 emit_movw(slen*4,3);
2956 #endif
2957 emit_movimm(start+i*4,0);
2958 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2959 int entry=(int)out;
2960 load_regs_entry(i);
2961 if(entry==(int)out) entry=instr_addr[i];
2962 emit_jmp(instr_addr[i]);
2963 return entry;
2964}
2965
2966static void do_dirty_stub_ds()
2967{
2968 // Careful about the code output here, verify_dirty needs to parse it.
2969 #ifndef HAVE_ARMV7
2970 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2971 emit_loadlp((int)copy,2);
2972 emit_loadlp(slen*4,3);
2973 #else
2974 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2975 emit_movw(((u_int)copy)&0x0000FFFF,2);
2976 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2977 emit_movt(((u_int)copy)&0xFFFF0000,2);
2978 emit_movw(slen*4,3);
2979 #endif
2980 emit_movimm(start+1,0);
2981 emit_call((int)&verify_code_ds);
2982}
2983
2984static void do_cop1stub(int n)
2985{
2986 literal_pool(256);
2987 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
2988 set_jump_target(stubs[n][1],(int)out);
2989 int i=stubs[n][3];
2990// int rs=stubs[n][4];
2991 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2992 int ds=stubs[n][6];
2993 if(!ds) {
2994 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2995 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2996 }
2997 //else {printf("fp exception in delay slot\n");}
2998 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2999 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3000 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3001 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3002 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3003}
3004
3005/* Special assem */
3006
3007static void shift_assemble_arm(int i,struct regstat *i_regs)
3008{
3009 if(rt1[i]) {
3010 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3011 {
3012 signed char s,t,shift;
3013 t=get_reg(i_regs->regmap,rt1[i]);
3014 s=get_reg(i_regs->regmap,rs1[i]);
3015 shift=get_reg(i_regs->regmap,rs2[i]);
3016 if(t>=0){
3017 if(rs1[i]==0)
3018 {
3019 emit_zeroreg(t);
3020 }
3021 else if(rs2[i]==0)
3022 {
3023 assert(s>=0);
3024 if(s!=t) emit_mov(s,t);
3025 }
3026 else
3027 {
3028 emit_andimm(shift,31,HOST_TEMPREG);
3029 if(opcode2[i]==4) // SLLV
3030 {
3031 emit_shl(s,HOST_TEMPREG,t);
3032 }
3033 if(opcode2[i]==6) // SRLV
3034 {
3035 emit_shr(s,HOST_TEMPREG,t);
3036 }
3037 if(opcode2[i]==7) // SRAV
3038 {
3039 emit_sar(s,HOST_TEMPREG,t);
3040 }
3041 }
3042 }
3043 } else { // DSLLV/DSRLV/DSRAV
3044 signed char sh,sl,th,tl,shift;
3045 th=get_reg(i_regs->regmap,rt1[i]|64);
3046 tl=get_reg(i_regs->regmap,rt1[i]);
3047 sh=get_reg(i_regs->regmap,rs1[i]|64);
3048 sl=get_reg(i_regs->regmap,rs1[i]);
3049 shift=get_reg(i_regs->regmap,rs2[i]);
3050 if(tl>=0){
3051 if(rs1[i]==0)
3052 {
3053 emit_zeroreg(tl);
3054 if(th>=0) emit_zeroreg(th);
3055 }
3056 else if(rs2[i]==0)
3057 {
3058 assert(sl>=0);
3059 if(sl!=tl) emit_mov(sl,tl);
3060 if(th>=0&&sh!=th) emit_mov(sh,th);
3061 }
3062 else
3063 {
3064 // FIXME: What if shift==tl ?
3065 assert(shift!=tl);
3066 int temp=get_reg(i_regs->regmap,-1);
3067 int real_th=th;
3068 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3069 assert(sl>=0);
3070 assert(sh>=0);
3071 emit_andimm(shift,31,HOST_TEMPREG);
3072 if(opcode2[i]==0x14) // DSLLV
3073 {
3074 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3075 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3076 emit_orrshr(sl,HOST_TEMPREG,th);
3077 emit_andimm(shift,31,HOST_TEMPREG);
3078 emit_testimm(shift,32);
3079 emit_shl(sl,HOST_TEMPREG,tl);
3080 if(th>=0) emit_cmovne_reg(tl,th);
3081 emit_cmovne_imm(0,tl);
3082 }
3083 if(opcode2[i]==0x16) // DSRLV
3084 {
3085 assert(th>=0);
3086 emit_shr(sl,HOST_TEMPREG,tl);
3087 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3088 emit_orrshl(sh,HOST_TEMPREG,tl);
3089 emit_andimm(shift,31,HOST_TEMPREG);
3090 emit_testimm(shift,32);
3091 emit_shr(sh,HOST_TEMPREG,th);
3092 emit_cmovne_reg(th,tl);
3093 if(real_th>=0) emit_cmovne_imm(0,th);
3094 }
3095 if(opcode2[i]==0x17) // DSRAV
3096 {
3097 assert(th>=0);
3098 emit_shr(sl,HOST_TEMPREG,tl);
3099 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3100 if(real_th>=0) {
3101 assert(temp>=0);
3102 emit_sarimm(th,31,temp);
3103 }
3104 emit_orrshl(sh,HOST_TEMPREG,tl);
3105 emit_andimm(shift,31,HOST_TEMPREG);
3106 emit_testimm(shift,32);
3107 emit_sar(sh,HOST_TEMPREG,th);
3108 emit_cmovne_reg(th,tl);
3109 if(real_th>=0) emit_cmovne_reg(temp,th);
3110 }
3111 }
3112 }
3113 }
3114 }
3115}
3116
3117static void speculate_mov(int rs,int rt)
3118{
3119 if(rt!=0) {
3120 smrv_strong_next|=1<<rt;
3121 smrv[rt]=smrv[rs];
3122 }
3123}
3124
3125static void speculate_mov_weak(int rs,int rt)
3126{
3127 if(rt!=0) {
3128 smrv_weak_next|=1<<rt;
3129 smrv[rt]=smrv[rs];
3130 }
3131}
3132
3133static void speculate_register_values(int i)
3134{
3135 if(i==0) {
3136 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3137 // gp,sp are likely to stay the same throughout the block
3138 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3139 smrv_weak_next=~smrv_strong_next;
3140 //printf(" llr %08x\n", smrv[4]);
3141 }
3142 smrv_strong=smrv_strong_next;
3143 smrv_weak=smrv_weak_next;
3144 switch(itype[i]) {
3145 case ALU:
3146 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3147 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3148 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3149 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3150 else {
3151 smrv_strong_next&=~(1<<rt1[i]);
3152 smrv_weak_next&=~(1<<rt1[i]);
3153 }
3154 break;
3155 case SHIFTIMM:
3156 smrv_strong_next&=~(1<<rt1[i]);
3157 smrv_weak_next&=~(1<<rt1[i]);
3158 // fallthrough
3159 case IMM16:
3160 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3161 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3162 if(hr>=0) {
3163 if(get_final_value(hr,i,&value))
3164 smrv[rt1[i]]=value;
3165 else smrv[rt1[i]]=constmap[i][hr];
3166 smrv_strong_next|=1<<rt1[i];
3167 }
3168 }
3169 else {
3170 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3171 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3172 }
3173 break;
3174 case LOAD:
3175 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3176 // special case for BIOS
3177 smrv[rt1[i]]=0xa0000000;
3178 smrv_strong_next|=1<<rt1[i];
3179 break;
3180 }
3181 // fallthrough
3182 case SHIFT:
3183 case LOADLR:
3184 case MOV:
3185 smrv_strong_next&=~(1<<rt1[i]);
3186 smrv_weak_next&=~(1<<rt1[i]);
3187 break;
3188 case COP0:
3189 case COP2:
3190 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3191 smrv_strong_next&=~(1<<rt1[i]);
3192 smrv_weak_next&=~(1<<rt1[i]);
3193 }
3194 break;
3195 case C2LS:
3196 if (opcode[i]==0x32) { // LWC2
3197 smrv_strong_next&=~(1<<rt1[i]);
3198 smrv_weak_next&=~(1<<rt1[i]);
3199 }
3200 break;
3201 }
3202#if 0
3203 int r=4;
3204 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3205 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3206#endif
3207}
3208
3209enum {
3210 MTYPE_8000 = 0,
3211 MTYPE_8020,
3212 MTYPE_0000,
3213 MTYPE_A000,
3214 MTYPE_1F80,
3215};
3216
3217static int get_ptr_mem_type(u_int a)
3218{
3219 if(a < 0x00200000) {
3220 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3221 // return wrong, must use memhandler for BIOS self-test to pass
3222 // 007 does similar stuff from a00 mirror, weird stuff
3223 return MTYPE_8000;
3224 return MTYPE_0000;
3225 }
3226 if(0x1f800000 <= a && a < 0x1f801000)
3227 return MTYPE_1F80;
3228 if(0x80200000 <= a && a < 0x80800000)
3229 return MTYPE_8020;
3230 if(0xa0000000 <= a && a < 0xa0200000)
3231 return MTYPE_A000;
3232 return MTYPE_8000;
3233}
3234
3235static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3236{
3237 int jaddr=0,type=0;
3238 int mr=rs1[i];
3239 if(((smrv_strong|smrv_weak)>>mr)&1) {
3240 type=get_ptr_mem_type(smrv[mr]);
3241 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3242 }
3243 else {
3244 // use the mirror we are running on
3245 type=get_ptr_mem_type(start);
3246 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3247 }
3248
3249 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3250 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3251 addr=*addr_reg_override=HOST_TEMPREG;
3252 type=0;
3253 }
3254 else if(type==MTYPE_0000) { // RAM 0 mirror
3255 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3256 addr=*addr_reg_override=HOST_TEMPREG;
3257 type=0;
3258 }
3259 else if(type==MTYPE_A000) { // RAM A mirror
3260 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3261 addr=*addr_reg_override=HOST_TEMPREG;
3262 type=0;
3263 }
3264 else if(type==MTYPE_1F80) { // scratchpad
3265 if (psxH == (void *)0x1f800000) {
3266 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3267 emit_cmpimm(HOST_TEMPREG,0x1000);
3268 jaddr=(int)out;
3269 emit_jc(0);
3270 }
3271 else {
3272 // do usual RAM check, jump will go to the right handler
3273 type=0;
3274 }
3275 }
3276
3277 if(type==0)
3278 {
3279 emit_cmpimm(addr,RAM_SIZE);
3280 jaddr=(int)out;
3281 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3282 // Hint to branch predictor that the branch is unlikely to be taken
3283 if(rs1[i]>=28)
3284 emit_jno_unlikely(0);
3285 else
3286 #endif
3287 emit_jno(0);
3288 if(ram_offset!=0) {
3289 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3290 addr=*addr_reg_override=HOST_TEMPREG;
3291 }
3292 }
3293
3294 return jaddr;
3295}
3296
3297#define shift_assemble shift_assemble_arm
3298
3299static void loadlr_assemble_arm(int i,struct regstat *i_regs)
3300{
3301 int s,th,tl,temp,temp2,addr,map=-1;
3302 int offset;
3303 int jaddr=0;
3304 int memtarget=0,c=0;
3305 int fastload_reg_override=0;
3306 u_int hr,reglist=0;
3307 th=get_reg(i_regs->regmap,rt1[i]|64);
3308 tl=get_reg(i_regs->regmap,rt1[i]);
3309 s=get_reg(i_regs->regmap,rs1[i]);
3310 temp=get_reg(i_regs->regmap,-1);
3311 temp2=get_reg(i_regs->regmap,FTEMP);
3312 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3313 assert(addr<0);
3314 offset=imm[i];
3315 for(hr=0;hr<HOST_REGS;hr++) {
3316 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3317 }
3318 reglist|=1<<temp;
3319 if(offset||s<0||c) addr=temp2;
3320 else addr=s;
3321 if(s>=0) {
3322 c=(i_regs->wasconst>>s)&1;
3323 if(c) {
3324 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3325 }
3326 }
3327 if(!c) {
3328 #ifdef RAM_OFFSET
3329 map=get_reg(i_regs->regmap,ROREG);
3330 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3331 #endif
3332 emit_shlimm(addr,3,temp);
3333 if (opcode[i]==0x22||opcode[i]==0x26) {
3334 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3335 }else{
3336 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3337 }
3338 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3339 }
3340 else {
3341 if(ram_offset&&memtarget) {
3342 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
3343 fastload_reg_override=HOST_TEMPREG;
3344 }
3345 if (opcode[i]==0x22||opcode[i]==0x26) {
3346 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3347 }else{
3348 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3349 }
3350 }
3351 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3352 if(!c||memtarget) {
3353 int a=temp2;
3354 if(fastload_reg_override) a=fastload_reg_override;
3355 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3356 emit_readword_indexed_tlb(0,a,map,temp2);
3357 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3358 }
3359 else
3360 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3361 if(rt1[i]) {
3362 assert(tl>=0);
3363 emit_andimm(temp,24,temp);
3364#ifdef BIG_ENDIAN_MIPS
3365 if (opcode[i]==0x26) // LWR
3366#else
3367 if (opcode[i]==0x22) // LWL
3368#endif
3369 emit_xorimm(temp,24,temp);
3370 emit_movimm(-1,HOST_TEMPREG);
3371 if (opcode[i]==0x26) {
3372 emit_shr(temp2,temp,temp2);
3373 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3374 }else{
3375 emit_shl(temp2,temp,temp2);
3376 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3377 }
3378 emit_or(temp2,tl,tl);
3379 }
3380 //emit_storereg(rt1[i],tl); // DEBUG
3381 }
3382 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3383 // FIXME: little endian, fastload_reg_override
3384 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3385 if(!c||memtarget) {
3386 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3387 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3388 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3389 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3390 }
3391 else
3392 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3393 if(rt1[i]) {
3394 assert(th>=0);
3395 assert(tl>=0);
3396 emit_testimm(temp,32);
3397 emit_andimm(temp,24,temp);
3398 if (opcode[i]==0x1A) { // LDL
3399 emit_rsbimm(temp,32,HOST_TEMPREG);
3400 emit_shl(temp2h,temp,temp2h);
3401 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3402 emit_movimm(-1,HOST_TEMPREG);
3403 emit_shl(temp2,temp,temp2);
3404 emit_cmove_reg(temp2h,th);
3405 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3406 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3407 emit_orreq(temp2,tl,tl);
3408 emit_orrne(temp2,th,th);
3409 }
3410 if (opcode[i]==0x1B) { // LDR
3411 emit_xorimm(temp,24,temp);
3412 emit_rsbimm(temp,32,HOST_TEMPREG);
3413 emit_shr(temp2,temp,temp2);
3414 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3415 emit_movimm(-1,HOST_TEMPREG);
3416 emit_shr(temp2h,temp,temp2h);
3417 emit_cmovne_reg(temp2,tl);
3418 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3419 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3420 emit_orrne(temp2h,th,th);
3421 emit_orreq(temp2h,tl,tl);
3422 }
3423 }
3424 }
3425}
3426#define loadlr_assemble loadlr_assemble_arm
3427
3428static void cop0_assemble(int i,struct regstat *i_regs)
3429{
3430 if(opcode2[i]==0) // MFC0
3431 {
3432 signed char t=get_reg(i_regs->regmap,rt1[i]);
3433 char copr=(source[i]>>11)&0x1f;
3434 //assert(t>=0); // Why does this happen? OOT is weird
3435 if(t>=0&&rt1[i]!=0) {
3436 emit_readword((int)&reg_cop0+copr*4,t);
3437 }
3438 }
3439 else if(opcode2[i]==4) // MTC0
3440 {
3441 signed char s=get_reg(i_regs->regmap,rs1[i]);
3442 char copr=(source[i]>>11)&0x1f;
3443 assert(s>=0);
3444 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3445 if(copr==9||copr==11||copr==12||copr==13) {
3446 emit_readword((int)&last_count,HOST_TEMPREG);
3447 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3448 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3449 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
3450 emit_writeword(HOST_CCREG,(int)&Count);
3451 }
3452 // What a mess. The status register (12) can enable interrupts,
3453 // so needs a special case to handle a pending interrupt.
3454 // The interrupt must be taken immediately, because a subsequent
3455 // instruction might disable interrupts again.
3456 if(copr==12||copr==13) {
3457 if (is_delayslot) {
3458 // burn cycles to cause cc_interrupt, which will
3459 // reschedule next_interupt. Relies on CCREG from above.
3460 assem_debug("MTC0 DS %d\n", copr);
3461 emit_writeword(HOST_CCREG,(int)&last_count);
3462 emit_movimm(0,HOST_CCREG);
3463 emit_storereg(CCREG,HOST_CCREG);
3464 emit_loadreg(rs1[i],1);
3465 emit_movimm(copr,0);
3466 emit_call((int)pcsx_mtc0_ds);
3467 emit_loadreg(rs1[i],s);
3468 return;
3469 }
3470 emit_movimm(start+i*4+4,HOST_TEMPREG);
3471 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
3472 emit_movimm(0,HOST_TEMPREG);
3473 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
3474 }
3475 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3476 //else
3477 if(s==HOST_CCREG)
3478 emit_loadreg(rs1[i],1);
3479 else if(s!=1)
3480 emit_mov(s,1);
3481 emit_movimm(copr,0);
3482 emit_call((int)pcsx_mtc0);
3483 if(copr==9||copr==11||copr==12||copr==13) {
3484 emit_readword((int)&Count,HOST_CCREG);
3485 emit_readword((int)&next_interupt,HOST_TEMPREG);
3486 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
3487 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3488 emit_writeword(HOST_TEMPREG,(int)&last_count);
3489 emit_storereg(CCREG,HOST_CCREG);
3490 }
3491 if(copr==12||copr==13) {
3492 assert(!is_delayslot);
3493 emit_readword((int)&pending_exception,14);
3494 emit_test(14,14);
3495 emit_jne((int)&do_interrupt);
3496 }
3497 emit_loadreg(rs1[i],s);
3498 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3499 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3500 cop1_usable=0;
3501 }
3502 else
3503 {
3504 assert(opcode2[i]==0x10);
3505 if((source[i]&0x3f)==0x10) // RFE
3506 {
3507 emit_readword((int)&Status,0);
3508 emit_andimm(0,0x3c,1);
3509 emit_andimm(0,~0xf,0);
3510 emit_orrshr_imm(1,2,0);
3511 emit_writeword(0,(int)&Status);
3512 }
3513 }
3514}
3515
3516static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3517{
3518 switch (copr) {
3519 case 1:
3520 case 3:
3521 case 5:
3522 case 8:
3523 case 9:
3524 case 10:
3525 case 11:
3526 emit_readword((int)&reg_cop2d[copr],tl);
3527 emit_signextend16(tl,tl);
3528 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3529 break;
3530 case 7:
3531 case 16:
3532 case 17:
3533 case 18:
3534 case 19:
3535 emit_readword((int)&reg_cop2d[copr],tl);
3536 emit_andimm(tl,0xffff,tl);
3537 emit_writeword(tl,(int)&reg_cop2d[copr]);
3538 break;
3539 case 15:
3540 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3541 emit_writeword(tl,(int)&reg_cop2d[copr]);
3542 break;
3543 case 28:
3544 case 29:
3545 emit_readword((int)&reg_cop2d[9],temp);
3546 emit_testimm(temp,0x8000); // do we need this?
3547 emit_andimm(temp,0xf80,temp);
3548 emit_andne_imm(temp,0,temp);
3549 emit_shrimm(temp,7,tl);
3550 emit_readword((int)&reg_cop2d[10],temp);
3551 emit_testimm(temp,0x8000);
3552 emit_andimm(temp,0xf80,temp);
3553 emit_andne_imm(temp,0,temp);
3554 emit_orrshr_imm(temp,2,tl);
3555 emit_readword((int)&reg_cop2d[11],temp);
3556 emit_testimm(temp,0x8000);
3557 emit_andimm(temp,0xf80,temp);
3558 emit_andne_imm(temp,0,temp);
3559 emit_orrshl_imm(temp,3,tl);
3560 emit_writeword(tl,(int)&reg_cop2d[copr]);
3561 break;
3562 default:
3563 emit_readword((int)&reg_cop2d[copr],tl);
3564 break;
3565 }
3566}
3567
3568static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3569{
3570 switch (copr) {
3571 case 15:
3572 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3573 emit_writeword(sl,(int)&reg_cop2d[copr]);
3574 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3575 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3576 emit_writeword(sl,(int)&reg_cop2d[14]);
3577 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3578 break;
3579 case 28:
3580 emit_andimm(sl,0x001f,temp);
3581 emit_shlimm(temp,7,temp);
3582 emit_writeword(temp,(int)&reg_cop2d[9]);
3583 emit_andimm(sl,0x03e0,temp);
3584 emit_shlimm(temp,2,temp);
3585 emit_writeword(temp,(int)&reg_cop2d[10]);
3586 emit_andimm(sl,0x7c00,temp);
3587 emit_shrimm(temp,3,temp);
3588 emit_writeword(temp,(int)&reg_cop2d[11]);
3589 emit_writeword(sl,(int)&reg_cop2d[28]);
3590 break;
3591 case 30:
3592 emit_movs(sl,temp);
3593 emit_mvnmi(temp,temp);
3594#ifdef HAVE_ARMV5
3595 emit_clz(temp,temp);
3596#else
3597 emit_movs(temp,HOST_TEMPREG);
3598 emit_movimm(0,temp);
3599 emit_jeq((int)out+4*4);
3600 emit_addpl_imm(temp,1,temp);
3601 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3602 emit_jns((int)out-2*4);
3603#endif
3604 emit_writeword(sl,(int)&reg_cop2d[30]);
3605 emit_writeword(temp,(int)&reg_cop2d[31]);
3606 break;
3607 case 31:
3608 break;
3609 default:
3610 emit_writeword(sl,(int)&reg_cop2d[copr]);
3611 break;
3612 }
3613}
3614
3615static void cop2_assemble(int i,struct regstat *i_regs)
3616{
3617 u_int copr=(source[i]>>11)&0x1f;
3618 signed char temp=get_reg(i_regs->regmap,-1);
3619 if (opcode2[i]==0) { // MFC2
3620 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3621 if(tl>=0&&rt1[i]!=0)
3622 cop2_get_dreg(copr,tl,temp);
3623 }
3624 else if (opcode2[i]==4) { // MTC2
3625 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3626 cop2_put_dreg(copr,sl,temp);
3627 }
3628 else if (opcode2[i]==2) // CFC2
3629 {
3630 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3631 if(tl>=0&&rt1[i]!=0)
3632 emit_readword((int)&reg_cop2c[copr],tl);
3633 }
3634 else if (opcode2[i]==6) // CTC2
3635 {
3636 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3637 switch(copr) {
3638 case 4:
3639 case 12:
3640 case 20:
3641 case 26:
3642 case 27:
3643 case 29:
3644 case 30:
3645 emit_signextend16(sl,temp);
3646 break;
3647 case 31:
3648 //value = value & 0x7ffff000;
3649 //if (value & 0x7f87e000) value |= 0x80000000;
3650 emit_shrimm(sl,12,temp);
3651 emit_shlimm(temp,12,temp);
3652 emit_testimm(temp,0x7f000000);
3653 emit_testeqimm(temp,0x00870000);
3654 emit_testeqimm(temp,0x0000e000);
3655 emit_orrne_imm(temp,0x80000000,temp);
3656 break;
3657 default:
3658 temp=sl;
3659 break;
3660 }
3661 emit_writeword(temp,(int)&reg_cop2c[copr]);
3662 assert(sl>=0);
3663 }
3664}
3665
3666static void c2op_prologue(u_int op,u_int reglist)
3667{
3668 save_regs_all(reglist);
3669#ifdef PCNT
3670 emit_movimm(op,0);
3671 emit_call((int)pcnt_gte_start);
3672#endif
3673 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3674}
3675
3676static void c2op_epilogue(u_int op,u_int reglist)
3677{
3678#ifdef PCNT
3679 emit_movimm(op,0);
3680 emit_call((int)pcnt_gte_end);
3681#endif
3682 restore_regs_all(reglist);
3683}
3684
3685static void c2op_call_MACtoIR(int lm,int need_flags)
3686{
3687 if(need_flags)
3688 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
3689 else
3690 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
3691}
3692
3693static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3694{
3695 emit_call((int)func);
3696 // func is C code and trashes r0
3697 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3698 if(need_flags||need_ir)
3699 c2op_call_MACtoIR(lm,need_flags);
3700 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
3701}
3702
3703static void c2op_assemble(int i,struct regstat *i_regs)
3704{
3705 u_int c2op=source[i]&0x3f;
3706 u_int hr,reglist_full=0,reglist;
3707 int need_flags,need_ir;
3708 for(hr=0;hr<HOST_REGS;hr++) {
3709 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
3710 }
3711 reglist=reglist_full&CALLER_SAVE_REGS;
3712
3713 if (gte_handlers[c2op]!=NULL) {
3714 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
3715 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
3716 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3717 source[i],gte_unneeded[i+1],need_flags,need_ir);
3718 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3719 need_flags=0;
3720 int shift = (source[i] >> 19) & 1;
3721 int lm = (source[i] >> 10) & 1;
3722 switch(c2op) {
3723#ifndef DRC_DBG
3724 case GTE_MVMVA: {
3725#ifdef HAVE_ARMV5
3726 int v = (source[i] >> 15) & 3;
3727 int cv = (source[i] >> 13) & 3;
3728 int mx = (source[i] >> 17) & 3;
3729 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
3730 c2op_prologue(c2op,reglist);
3731 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3732 if(v<3)
3733 emit_ldrd(v*8,0,4);
3734 else {
3735 emit_movzwl_indexed(9*4,0,4); // gteIR
3736 emit_movzwl_indexed(10*4,0,6);
3737 emit_movzwl_indexed(11*4,0,5);
3738 emit_orrshl_imm(6,16,4);
3739 }
3740 if(mx<3)
3741 emit_addimm(0,32*4+mx*8*4,6);
3742 else
3743 emit_readword((int)&zeromem_ptr,6);
3744 if(cv<3)
3745 emit_addimm(0,32*4+(cv*8+5)*4,7);
3746 else
3747 emit_readword((int)&zeromem_ptr,7);
3748#ifdef __ARM_NEON__
3749 emit_movimm(source[i],1); // opcode
3750 emit_call((int)gteMVMVA_part_neon);
3751 if(need_flags) {
3752 emit_movimm(lm,1);
3753 emit_call((int)gteMACtoIR_flags_neon);
3754 }
3755#else
3756 if(cv==3&&shift)
3757 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3758 else {
3759 emit_movimm(shift,1);
3760 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3761 }
3762 if(need_flags||need_ir)
3763 c2op_call_MACtoIR(lm,need_flags);
3764#endif
3765#else /* if not HAVE_ARMV5 */
3766 c2op_prologue(c2op,reglist);
3767 emit_movimm(source[i],1); // opcode
3768 emit_writeword(1,(int)&psxRegs.code);
3769 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3770#endif
3771 break;
3772 }
3773 case GTE_OP:
3774 c2op_prologue(c2op,reglist);
3775 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
3776 if(need_flags||need_ir) {
3777 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3778 c2op_call_MACtoIR(lm,need_flags);
3779 }
3780 break;
3781 case GTE_DPCS:
3782 c2op_prologue(c2op,reglist);
3783 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3784 break;
3785 case GTE_INTPL:
3786 c2op_prologue(c2op,reglist);
3787 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3788 break;
3789 case GTE_SQR:
3790 c2op_prologue(c2op,reglist);
3791 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
3792 if(need_flags||need_ir) {
3793 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3794 c2op_call_MACtoIR(lm,need_flags);
3795 }
3796 break;
3797 case GTE_DCPL:
3798 c2op_prologue(c2op,reglist);
3799 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3800 break;
3801 case GTE_GPF:
3802 c2op_prologue(c2op,reglist);
3803 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3804 break;
3805 case GTE_GPL:
3806 c2op_prologue(c2op,reglist);
3807 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3808 break;
3809#endif
3810 default:
3811 c2op_prologue(c2op,reglist);
3812#ifdef DRC_DBG
3813 emit_movimm(source[i],1); // opcode
3814 emit_writeword(1,(int)&psxRegs.code);
3815#endif
3816 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3817 break;
3818 }
3819 c2op_epilogue(c2op,reglist);
3820 }
3821}
3822
3823static void cop1_unusable(int i,struct regstat *i_regs)
3824{
3825 // XXX: should just just do the exception instead
3826 if(!cop1_usable) {
3827 int jaddr=(int)out;
3828 emit_jmp(0);
3829 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3830 cop1_usable=1;
3831 }
3832}
3833
3834static void cop1_assemble(int i,struct regstat *i_regs)
3835{
3836 cop1_unusable(i, i_regs);
3837}
3838
3839static void fconv_assemble_arm(int i,struct regstat *i_regs)
3840{
3841 cop1_unusable(i, i_regs);
3842}
3843#define fconv_assemble fconv_assemble_arm
3844
3845static void fcomp_assemble(int i,struct regstat *i_regs)
3846{
3847 cop1_unusable(i, i_regs);
3848}
3849
3850static void float_assemble(int i,struct regstat *i_regs)
3851{
3852 cop1_unusable(i, i_regs);
3853}
3854
3855static void multdiv_assemble_arm(int i,struct regstat *i_regs)
3856{
3857 // case 0x18: MULT
3858 // case 0x19: MULTU
3859 // case 0x1A: DIV
3860 // case 0x1B: DIVU
3861 // case 0x1C: DMULT
3862 // case 0x1D: DMULTU
3863 // case 0x1E: DDIV
3864 // case 0x1F: DDIVU
3865 if(rs1[i]&&rs2[i])
3866 {
3867 if((opcode2[i]&4)==0) // 32-bit
3868 {
3869 if(opcode2[i]==0x18) // MULT
3870 {
3871 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3872 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3873 signed char hi=get_reg(i_regs->regmap,HIREG);
3874 signed char lo=get_reg(i_regs->regmap,LOREG);
3875 assert(m1>=0);
3876 assert(m2>=0);
3877 assert(hi>=0);
3878 assert(lo>=0);
3879 emit_smull(m1,m2,hi,lo);
3880 }
3881 if(opcode2[i]==0x19) // MULTU
3882 {
3883 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3884 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3885 signed char hi=get_reg(i_regs->regmap,HIREG);
3886 signed char lo=get_reg(i_regs->regmap,LOREG);
3887 assert(m1>=0);
3888 assert(m2>=0);
3889 assert(hi>=0);
3890 assert(lo>=0);
3891 emit_umull(m1,m2,hi,lo);
3892 }
3893 if(opcode2[i]==0x1A) // DIV
3894 {
3895 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3896 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3897 assert(d1>=0);
3898 assert(d2>=0);
3899 signed char quotient=get_reg(i_regs->regmap,LOREG);
3900 signed char remainder=get_reg(i_regs->regmap,HIREG);
3901 assert(quotient>=0);
3902 assert(remainder>=0);
3903 emit_movs(d1,remainder);
3904 emit_movimm(0xffffffff,quotient);
3905 emit_negmi(quotient,quotient); // .. quotient and ..
3906 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
3907 emit_movs(d2,HOST_TEMPREG);
3908 emit_jeq((int)out+52); // Division by zero
3909 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
3910#ifdef HAVE_ARMV5
3911 emit_clz(HOST_TEMPREG,quotient);
3912 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
3913#else
3914 emit_movimm(0,quotient);
3915 emit_addpl_imm(quotient,1,quotient);
3916 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3917 emit_jns((int)out-2*4);
3918#endif
3919 emit_orimm(quotient,1<<31,quotient);
3920 emit_shr(quotient,quotient,quotient);
3921 emit_cmp(remainder,HOST_TEMPREG);
3922 emit_subcs(remainder,HOST_TEMPREG,remainder);
3923 emit_adcs(quotient,quotient,quotient);
3924 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
3925 emit_jcc((int)out-16); // -4
3926 emit_teq(d1,d2);
3927 emit_negmi(quotient,quotient);
3928 emit_test(d1,d1);
3929 emit_negmi(remainder,remainder);
3930 }
3931 if(opcode2[i]==0x1B) // DIVU
3932 {
3933 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3934 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3935 assert(d1>=0);
3936 assert(d2>=0);
3937 signed char quotient=get_reg(i_regs->regmap,LOREG);
3938 signed char remainder=get_reg(i_regs->regmap,HIREG);
3939 assert(quotient>=0);
3940 assert(remainder>=0);
3941 emit_mov(d1,remainder);
3942 emit_movimm(0xffffffff,quotient); // div0 case
3943 emit_test(d2,d2);
3944 emit_jeq((int)out+40); // Division by zero
3945#ifdef HAVE_ARMV5
3946 emit_clz(d2,HOST_TEMPREG);
3947 emit_movimm(1<<31,quotient);
3948 emit_shl(d2,HOST_TEMPREG,d2);
3949#else
3950 emit_movimm(0,HOST_TEMPREG);
3951 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3952 emit_lslpls_imm(d2,1,d2);
3953 emit_jns((int)out-2*4);
3954 emit_movimm(1<<31,quotient);
3955#endif
3956 emit_shr(quotient,HOST_TEMPREG,quotient);
3957 emit_cmp(remainder,d2);
3958 emit_subcs(remainder,d2,remainder);
3959 emit_adcs(quotient,quotient,quotient);
3960 emit_shrcc_imm(d2,1,d2);
3961 emit_jcc((int)out-16); // -4
3962 }
3963 }
3964 else // 64-bit
3965 assert(0);
3966 }
3967 else
3968 {
3969 // Multiply by zero is zero.
3970 // MIPS does not have a divide by zero exception.
3971 // The result is undefined, we return zero.
3972 signed char hr=get_reg(i_regs->regmap,HIREG);
3973 signed char lr=get_reg(i_regs->regmap,LOREG);
3974 if(hr>=0) emit_zeroreg(hr);
3975 if(lr>=0) emit_zeroreg(lr);
3976 }
3977}
3978#define multdiv_assemble multdiv_assemble_arm
3979
3980static void do_preload_rhash(int r) {
3981 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
3982 // register. On ARM the hash can be done with a single instruction (below)
3983}
3984
3985static void do_preload_rhtbl(int ht) {
3986 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
3987}
3988
3989static void do_rhash(int rs,int rh) {
3990 emit_andimm(rs,0xf8,rh);
3991}
3992
3993static void do_miniht_load(int ht,int rh) {
3994 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
3995 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
3996}
3997
3998static void do_miniht_jump(int rs,int rh,int ht) {
3999 emit_cmp(rh,rs);
4000 emit_ldreq_indexed(ht,4,15);
4001 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4002 emit_mov(rs,7);
4003 emit_jmp(jump_vaddr_reg[7]);
4004 #else
4005 emit_jmp(jump_vaddr_reg[rs]);
4006 #endif
4007}
4008
4009static void do_miniht_insert(u_int return_address,int rt,int temp) {
4010 #ifndef HAVE_ARMV7
4011 emit_movimm(return_address,rt); // PC into link register
4012 add_to_linker((int)out,return_address,1);
4013 emit_pcreladdr(temp);
4014 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4015 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4016 #else
4017 emit_movw(return_address&0x0000FFFF,rt);
4018 add_to_linker((int)out,return_address,1);
4019 emit_pcreladdr(temp);
4020 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4021 emit_movt(return_address&0xFFFF0000,rt);
4022 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4023 #endif
4024}
4025
4026static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4027{
4028 //if(dirty_pre==dirty) return;
4029 int hr,reg;
4030 for(hr=0;hr<HOST_REGS;hr++) {
4031 if(hr!=EXCLUDE_REG) {
4032 reg=pre[hr];
4033 if(((~u)>>(reg&63))&1) {
4034 if(reg>0) {
4035 if(((dirty_pre&~dirty)>>hr)&1) {
4036 if(reg>0&&reg<34) {
4037 emit_storereg(reg,hr);
4038 if( ((is32_pre&~uu)>>reg)&1 ) {
4039 emit_sarimm(hr,31,HOST_TEMPREG);
4040 emit_storereg(reg|64,HOST_TEMPREG);
4041 }
4042 }
4043 else if(reg>=64) {
4044 emit_storereg(reg,hr);
4045 }
4046 }
4047 }
4048 }
4049 }
4050 }
4051}
4052
4053
4054/* using strd could possibly help but you'd have to allocate registers in pairs
4055static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4056{
4057 int hr;
4058 int wrote=-1;
4059 for(hr=HOST_REGS-1;hr>=0;hr--) {
4060 if(hr!=EXCLUDE_REG) {
4061 if(pre[hr]!=entry[hr]) {
4062 if(pre[hr]>=0) {
4063 if((dirty>>hr)&1) {
4064 if(get_reg(entry,pre[hr])<0) {
4065 if(pre[hr]<64) {
4066 if(!((u>>pre[hr])&1)) {
4067 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4068 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4069 emit_sarimm(hr,31,hr+1);
4070 emit_strdreg(pre[hr],hr);
4071 }
4072 else
4073 emit_storereg(pre[hr],hr);
4074 }else{
4075 emit_storereg(pre[hr],hr);
4076 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4077 emit_sarimm(hr,31,hr);
4078 emit_storereg(pre[hr]|64,hr);
4079 }
4080 }
4081 }
4082 }else{
4083 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4084 emit_storereg(pre[hr],hr);
4085 }
4086 }
4087 wrote=hr;
4088 }
4089 }
4090 }
4091 }
4092 }
4093 }
4094 for(hr=0;hr<HOST_REGS;hr++) {
4095 if(hr!=EXCLUDE_REG) {
4096 if(pre[hr]!=entry[hr]) {
4097 if(pre[hr]>=0) {
4098 int nr;
4099 if((nr=get_reg(entry,pre[hr]))>=0) {
4100 emit_mov(hr,nr);
4101 }
4102 }
4103 }
4104 }
4105 }
4106}
4107#define wb_invalidate wb_invalidate_arm
4108*/
4109
4110static void mark_clear_cache(void *target)
4111{
4112 u_long offset = (char *)target - (char *)BASE_ADDR;
4113 u_int mask = 1u << ((offset >> 12) & 31);
4114 if (!(needs_clear_cache[offset >> 17] & mask)) {
4115 char *start = (char *)((u_long)target & ~4095ul);
4116 start_tcache_write(start, start + 4096);
4117 needs_clear_cache[offset >> 17] |= mask;
4118 }
4119}
4120
4121// Clearing the cache is rather slow on ARM Linux, so mark the areas
4122// that need to be cleared, and then only clear these areas once.
4123static void do_clear_cache()
4124{
4125 int i,j;
4126 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4127 {
4128 u_int bitmap=needs_clear_cache[i];
4129 if(bitmap) {
4130 u_int start,end;
4131 for(j=0;j<32;j++)
4132 {
4133 if(bitmap&(1<<j)) {
4134 start=(u_int)BASE_ADDR+i*131072+j*4096;
4135 end=start+4095;
4136 j++;
4137 while(j<32) {
4138 if(bitmap&(1<<j)) {
4139 end+=4096;
4140 j++;
4141 }else{
4142 end_tcache_write((void *)start,(void *)end);
4143 break;
4144 }
4145 }
4146 }
4147 }
4148 needs_clear_cache[i]=0;
4149 }
4150 }
4151}
4152
4153// CPU-architecture-specific initialization
4154static void arch_init() {
4155}
4156
4157// vim:shiftwidth=2:expandtab