svpdyn: initial buggy iram caching
[picodrive.git] / Pico / carthw / svp / compiler.c
1 // 187 blocks, 12072 bytes
2 // 14 IRAM blocks
3
4 #include "../../PicoInt.h"
5
6 #define TCACHE_SIZE (256*1024)
7 static unsigned short *block_table[0x5090/2];
8 static unsigned short *block_table_iram[15][0x800/2];
9 static unsigned short *tcache = NULL;
10 static unsigned short *tcache_ptr = NULL;
11
12 static int had_jump = 0;
13 static int nblocks = 0;
14 static int iram_context = 0;
15
16 #define EMBED_INTERPRETER
17 #define ssp1601_reset ssp1601_reset_local
18 #define ssp1601_run ssp1601_run_local
19
20 static unsigned int interp_get_pc(void);
21
22 #define GET_PC interp_get_pc
23 #define GET_PPC_OFFS() (interp_get_pc()*2 - 2)
24 #define SET_PC(d) { had_jump = 1; rPC = d; }            /* must return to dispatcher after this */
25 //#define GET_PC() (PC - (unsigned short *)svp->iram_rom)
26 //#define GET_PPC_OFFS() ((unsigned int)PC - (unsigned int)svp->iram_rom - 2)
27 //#define SET_PC(d) PC = (unsigned short *)svp->iram_rom + d
28
29 #include "ssp16.c"
30
31 // -----------------------------------------------------
32
33 static unsigned int crctable[256] =
34 {
35  0x00000000L, 0x77073096L, 0xEE0E612CL, 0x990951BAL,
36  0x076DC419L, 0x706AF48FL, 0xE963A535L, 0x9E6495A3L,
37  0x0EDB8832L, 0x79DCB8A4L, 0xE0D5E91EL, 0x97D2D988L,
38  0x09B64C2BL, 0x7EB17CBDL, 0xE7B82D07L, 0x90BF1D91L,
39  0x1DB71064L, 0x6AB020F2L, 0xF3B97148L, 0x84BE41DEL,
40  0x1ADAD47DL, 0x6DDDE4EBL, 0xF4D4B551L, 0x83D385C7L,
41  0x136C9856L, 0x646BA8C0L, 0xFD62F97AL, 0x8A65C9ECL,
42  0x14015C4FL, 0x63066CD9L, 0xFA0F3D63L, 0x8D080DF5L,
43  0x3B6E20C8L, 0x4C69105EL, 0xD56041E4L, 0xA2677172L,
44  0x3C03E4D1L, 0x4B04D447L, 0xD20D85FDL, 0xA50AB56BL,
45  0x35B5A8FAL, 0x42B2986CL, 0xDBBBC9D6L, 0xACBCF940L,
46  0x32D86CE3L, 0x45DF5C75L, 0xDCD60DCFL, 0xABD13D59L,
47  0x26D930ACL, 0x51DE003AL, 0xC8D75180L, 0xBFD06116L,
48  0x21B4F4B5L, 0x56B3C423L, 0xCFBA9599L, 0xB8BDA50FL,
49  0x2802B89EL, 0x5F058808L, 0xC60CD9B2L, 0xB10BE924L,
50  0x2F6F7C87L, 0x58684C11L, 0xC1611DABL, 0xB6662D3DL,
51  0x76DC4190L, 0x01DB7106L, 0x98D220BCL, 0xEFD5102AL,
52  0x71B18589L, 0x06B6B51FL, 0x9FBFE4A5L, 0xE8B8D433L,
53  0x7807C9A2L, 0x0F00F934L, 0x9609A88EL, 0xE10E9818L,
54  0x7F6A0DBBL, 0x086D3D2DL, 0x91646C97L, 0xE6635C01L,
55  0x6B6B51F4L, 0x1C6C6162L, 0x856530D8L, 0xF262004EL,
56  0x6C0695EDL, 0x1B01A57BL, 0x8208F4C1L, 0xF50FC457L,
57  0x65B0D9C6L, 0x12B7E950L, 0x8BBEB8EAL, 0xFCB9887CL,
58  0x62DD1DDFL, 0x15DA2D49L, 0x8CD37CF3L, 0xFBD44C65L,
59  0x4DB26158L, 0x3AB551CEL, 0xA3BC0074L, 0xD4BB30E2L,
60  0x4ADFA541L, 0x3DD895D7L, 0xA4D1C46DL, 0xD3D6F4FBL,
61  0x4369E96AL, 0x346ED9FCL, 0xAD678846L, 0xDA60B8D0L,
62  0x44042D73L, 0x33031DE5L, 0xAA0A4C5FL, 0xDD0D7CC9L,
63  0x5005713CL, 0x270241AAL, 0xBE0B1010L, 0xC90C2086L,
64  0x5768B525L, 0x206F85B3L, 0xB966D409L, 0xCE61E49FL,
65  0x5EDEF90EL, 0x29D9C998L, 0xB0D09822L, 0xC7D7A8B4L,
66  0x59B33D17L, 0x2EB40D81L, 0xB7BD5C3BL, 0xC0BA6CADL,
67  0xEDB88320L, 0x9ABFB3B6L, 0x03B6E20CL, 0x74B1D29AL,
68  0xEAD54739L, 0x9DD277AFL, 0x04DB2615L, 0x73DC1683L,
69  0xE3630B12L, 0x94643B84L, 0x0D6D6A3EL, 0x7A6A5AA8L,
70  0xE40ECF0BL, 0x9309FF9DL, 0x0A00AE27L, 0x7D079EB1L,
71  0xF00F9344L, 0x8708A3D2L, 0x1E01F268L, 0x6906C2FEL,
72  0xF762575DL, 0x806567CBL, 0x196C3671L, 0x6E6B06E7L,
73  0xFED41B76L, 0x89D32BE0L, 0x10DA7A5AL, 0x67DD4ACCL,
74  0xF9B9DF6FL, 0x8EBEEFF9L, 0x17B7BE43L, 0x60B08ED5L,
75  0xD6D6A3E8L, 0xA1D1937EL, 0x38D8C2C4L, 0x4FDFF252L,
76  0xD1BB67F1L, 0xA6BC5767L, 0x3FB506DDL, 0x48B2364BL,
77  0xD80D2BDAL, 0xAF0A1B4CL, 0x36034AF6L, 0x41047A60L,
78  0xDF60EFC3L, 0xA867DF55L, 0x316E8EEFL, 0x4669BE79L,
79  0xCB61B38CL, 0xBC66831AL, 0x256FD2A0L, 0x5268E236L,
80  0xCC0C7795L, 0xBB0B4703L, 0x220216B9L, 0x5505262FL,
81  0xC5BA3BBEL, 0xB2BD0B28L, 0x2BB45A92L, 0x5CB36A04L,
82  0xC2D7FFA7L, 0xB5D0CF31L, 0x2CD99E8BL, 0x5BDEAE1DL,
83  0x9B64C2B0L, 0xEC63F226L, 0x756AA39CL, 0x026D930AL,
84  0x9C0906A9L, 0xEB0E363FL, 0x72076785L, 0x05005713L,
85  0x95BF4A82L, 0xE2B87A14L, 0x7BB12BAEL, 0x0CB61B38L,
86  0x92D28E9BL, 0xE5D5BE0DL, 0x7CDCEFB7L, 0x0BDBDF21L,
87  0x86D3D2D4L, 0xF1D4E242L, 0x68DDB3F8L, 0x1FDA836EL,
88  0x81BE16CDL, 0xF6B9265BL, 0x6FB077E1L, 0x18B74777L,
89  0x88085AE6L, 0xFF0F6A70L, 0x66063BCAL, 0x11010B5CL,
90  0x8F659EFFL, 0xF862AE69L, 0x616BFFD3L, 0x166CCF45L,
91  0xA00AE278L, 0xD70DD2EEL, 0x4E048354L, 0x3903B3C2L,
92  0xA7672661L, 0xD06016F7L, 0x4969474DL, 0x3E6E77DBL,
93  0xAED16A4AL, 0xD9D65ADCL, 0x40DF0B66L, 0x37D83BF0L,
94  0xA9BCAE53L, 0xDEBB9EC5L, 0x47B2CF7FL, 0x30B5FFE9L,
95  0xBDBDF21CL, 0xCABAC28AL, 0x53B39330L, 0x24B4A3A6L,
96  0xBAD03605L, 0xCDD70693L, 0x54DE5729L, 0x23D967BFL,
97  0xB3667A2EL, 0xC4614AB8L, 0x5D681B02L, 0x2A6F2B94L,
98  0xB40BBE37L, 0xC30C8EA1L, 0x5A05DF1BL, 0x2D02EF8DL
99 };
100
101 static u32 chksum_crc32 (unsigned char *block, unsigned int length)
102 {
103    register u32 crc;
104    unsigned long i;
105
106    crc = 0xFFFFFFFF;
107    for (i = 0; i < length; i++)
108    {
109       crc = ((crc >> 8) & 0x00FFFFFF) ^ crctable[(crc ^ *block++) & 0xFF];
110    }
111    return (crc ^ 0xFFFFFFFF);
112 }
113
114 //static int iram_crcs[32] = { 0, };
115
116 // -----------------------------------------------------
117
118 static unsigned char iram_context_map[] =
119 {
120          0, 0, 0, 0, 1, 0, 0, 0, // 04
121          0, 0, 0, 0, 0, 0, 2, 0, // 0e
122          0, 0, 0, 0, 0, 3, 0, 4, // 15 17
123          5, 0, 0, 6, 0, 7, 0, 0, // 18 1b 1d
124          8, 9, 0, 0, 0,10, 0, 0, // 20 21 25
125          0, 0, 0, 0, 0, 0, 0, 0,
126          0, 0,11, 0, 0,12, 0, 0, // 32 35
127         13,14, 0, 0, 0, 0, 0, 0  // 38 39
128 };
129
130 static unsigned int checksums[] =
131 {
132         0,
133         0xfa9ddfb2,
134         0x229c80b6,
135         0x3af0c3d3,
136         0x98fc4552,
137         0x5ecacdbc,
138         0xa6931962,
139         0x53930b10,
140         0x69524552,
141         0xcb1ccdaf,
142         0x995068c7,
143         0x48b97f4d,
144         0xe8c61b74,
145         0xafa2e81a,
146         0x4e3e071a
147 };
148
149
150 static int get_iram_context(void)
151 {
152         unsigned char *ir = (unsigned char *)svp->iram_rom;
153         int val1, val = ir[0x083^1] + ir[0x4FA^1] + ir[0x5F7^1] + ir[0x47B^1];
154         int crc = chksum_crc32(svp->iram_rom, 0x800);
155         val1 = iram_context_map[(val>>1)&0x3f];
156
157         if (crc != checksums[val1]) {
158                 printf("val: %02x PC=%04x\n", (val>>1)&0x3f, rPC);
159                 elprintf(EL_ANOMALY, "bad crc: %08x vs %08x", crc, checksums[val1]);
160                 //debug_dump2file(name, svp->iram_rom, 0x800);
161                 exit(1);
162         }
163         elprintf(EL_ANOMALY, "iram_context: %02i", val1);
164         return val1;
165 }
166
167 #define PROGRAM(x) ((unsigned short *)svp->iram_rom)[x]
168
169 static u32 interp_get_pc(void)
170 {
171         unsigned short *pc1 = PC;
172         int i;
173         while (pc1[-1] != 0xfe01) pc1--;                // goto current block start
174         for (i = 0; i < 0x5090/2; i++)
175                 if (block_table[i] == pc1) break;
176
177         if (i == 0x5090/2)
178         {
179                 for (i = 0; i < 0x800/2; i++)
180                         if (block_table_iram[iram_context][i] == pc1) break;
181
182                 if (i == 0x800/2) {
183                         printf("block not found!\n");
184                         exit(1);
185                 }
186         }
187
188         return i + (PC - pc1);
189 }
190
191 static void *translate_block(int pc)
192 {
193         int tmp, op, op1, icount = 0;
194         void *ret;
195
196         ret = tcache_ptr;
197
198         //printf("translate %04x -> %04x\n", pc<<1, (tcache_ptr-tcache)<<1);
199         for (;;)
200         {
201                 op = PROGRAM(pc++);
202                 op1 = op >> 9;
203                 *tcache_ptr++ = op;
204                 icount++;
205                 // need immediate?
206                 if ((op1 & 0xf) == 4 || (op1 & 0xf) == 6) {
207                         tmp = PROGRAM(pc++);
208                         *tcache_ptr++ = tmp;    // immediate
209                 }
210                 if (op1 == 0x24 || op1 == 0x26 || // call, bra
211                         ((op1 == 0 || op1 == 1 || op1 == 4 || op1 == 5 || op1 == 9 || op1 == 0x25) &&
212                                 (op & 0xf0) == 0x60)) { // ld PC
213                         break;
214                 }
215         }
216         *tcache_ptr++ = 0xfe01; // end of block
217         //printf("  %i inst\n", icount);
218
219         if (tcache_ptr - tcache > TCACHE_SIZE/2) {
220                 printf("tcache overflow!\n");
221                 fflush(stdout);
222                 exit(1);
223         }
224
225         // stats
226         nblocks++;
227         //if (pc >= 0x400)
228         printf("%i blocks, %i bytes\n", nblocks, (tcache_ptr - tcache)*2);
229
230         return ret;
231 }
232
233
234
235 // -----------------------------------------------------
236
237 int ssp1601_dyn_init(void)
238 {
239         tcache = tcache_ptr = malloc(TCACHE_SIZE);
240         memset(tcache, 0, sizeof(TCACHE_SIZE));
241         memset(block_table, 0, sizeof(block_table));
242         memset(block_table_iram, 0, sizeof(block_table_iram));
243         *tcache_ptr++ = 0xfe01;
244
245         return 0;
246 }
247
248
249 void ssp1601_dyn_reset(ssp1601_t *ssp)
250 {
251         ssp1601_reset_local(ssp);
252 }
253
254
255 void ssp1601_dyn_run(int cycles)
256 {
257         while (cycles > 0)
258         {
259                 int pc_old = rPC;
260                 if (rPC < 0x800/2)
261                 {
262                         if (iram_dirty) {
263                                 iram_context = get_iram_context();
264                                 iram_dirty = 0;
265                         }
266                         if (block_table_iram[iram_context][rPC] == NULL)
267                                 block_table_iram[iram_context][rPC] = translate_block(rPC);
268                         PC = block_table_iram[iram_context][rPC];
269                 }
270                 else
271                 {
272                         if (block_table[rPC] == NULL)
273                                 block_table[rPC] = translate_block(rPC);
274                         PC = block_table[rPC];
275                 }
276
277                 had_jump = 0;
278
279                 //printf("enter @ %04x, PC=%04x\n", (PC - tcache)<<1, rPC<<1);
280                 ssp1601_run_local(0x10000);
281                 cycles -= 0x10000 - g_cycles;
282
283                 if (!had_jump) {
284                         // no jumps
285                         if (pc_old < 0x800/2)
286                                 rPC += (PC - block_table_iram[iram_context][pc_old]) - 1;
287                         else
288                                 rPC += (PC - block_table[pc_old]) - 1;
289                 }
290                 //printf("end   @ %04x, PC=%04x\n", (PC - tcache)<<1, rPC<<1);
291 /*
292                 if (pc_old < 0x400) {
293                         // flush IRAM cache
294                         tcache_ptr = block_table[pc_old];
295                         block_table[pc_old] = NULL;
296                         nblocks--;
297                 }
298                 if (pc_old >= 0x400 && rPC < 0x400)
299                 {
300                         int i, crc = chksum_crc32(svp->iram_rom, 0x800);
301                         for (i = 0; i < 32; i++)
302                                 if (iram_crcs[i] == crc) break;
303                         if (i == 32) {
304                                 char name[32];
305                                 for (i = 0; i < 32 && iram_crcs[i]; i++);
306                                 iram_crcs[i] = crc;
307                                 printf("%i IRAMs\n", i+1);
308                                 sprintf(name, "ir%08x.bin", crc);
309                                 debug_dump2file(name, svp->iram_rom, 0x800);
310                         }
311                         printf("CRC %08x %08x\n", crc, iram_id);
312                 }
313 */
314         }
315 //      debug_dump2file("tcache.bin", tcache, (tcache_ptr - tcache) << 1);
316 //      exit(1);
317 }
318