cefadd21b561ffa3ab824f4b159b90a769d84ca7
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / emu_if.c
1 /*
2  * (C) GraÅžvydas "notaz" Ignotas, 2010-2011
3  *
4  * This work is licensed under the terms of GNU GPL version 2 or later.
5  * See the COPYING file in the top-level directory.
6  */
7
8 #include <stdio.h>
9
10 #include "emu_if.h"
11 #include "pcsxmem.h"
12 #include "../psxhle.h"
13 #include "../psxinterpreter.h"
14 #include "../psxcounters.h"
15 #include "../psxevents.h"
16 #include "../psxbios.h"
17 #include "../r3000a.h"
18 #include "../gte_arm.h"
19 #include "../gte_neon.h"
20 #include "compiler_features.h"
21 #include "arm_features.h"
22 #define FLAGLESS
23 #include "../gte.h"
24 #if defined(NDRC_THREAD) && !defined(DRC_DISABLE) && !defined(LIGHTREC)
25 #include "../../frontend/libretro-rthreads.h"
26 #include "features/features_cpu.h"
27 #include "retro_timers.h"
28 #endif
29 #ifdef _3DS
30 #include <3ds_utils.h>
31 #endif
32
33 #ifndef ARRAY_SIZE
34 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
35 #endif
36
37 //#define evprintf printf
38 #define evprintf(...)
39
40 static void ari64_thread_sync(void);
41
42 void ndrc_freeze(void *f, int mode)
43 {
44         const char header_save[8] = "ariblks";
45         uint32_t addrs[1024 * 4];
46         int32_t size = 0;
47         int bytes;
48         char header[8];
49
50         ari64_thread_sync();
51
52         if (mode != 0) { // save
53                 size = new_dynarec_save_blocks(addrs, sizeof(addrs));
54                 if (size == 0)
55                         return;
56
57                 SaveFuncs.write(f, header_save, sizeof(header_save));
58                 SaveFuncs.write(f, &size, sizeof(size));
59                 SaveFuncs.write(f, addrs, size);
60         }
61         else {
62                 bytes = SaveFuncs.read(f, header, sizeof(header));
63                 if (bytes != sizeof(header) || strcmp(header, header_save)) {
64                         if (bytes > 0)
65                                 SaveFuncs.seek(f, -bytes, SEEK_CUR);
66                         return;
67                 }
68                 SaveFuncs.read(f, &size, sizeof(size));
69                 if (size <= 0)
70                         return;
71                 if (size > sizeof(addrs)) {
72                         bytes = size - sizeof(addrs);
73                         SaveFuncs.seek(f, bytes, SEEK_CUR);
74                         size = sizeof(addrs);
75                 }
76                 bytes = SaveFuncs.read(f, addrs, size);
77                 if (bytes != size)
78                         return;
79
80                 if (psxCpu != &psxInt)
81                         new_dynarec_load_blocks(addrs, size);
82         }
83
84         //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded");
85 }
86
87 void ndrc_clear_full(void)
88 {
89         ari64_thread_sync();
90         new_dynarec_clear_full();
91 }
92
93 #if !defined(DRC_DISABLE) && !defined(LIGHTREC)
94 #include "linkage_offsets.h"
95
96 static void ari64_thread_init(void);
97 static int  ari64_thread_check_range(unsigned int start, unsigned int end);
98
99 void pcsx_mtc0(psxRegisters *regs, u32 reg, u32 val)
100 {
101         evprintf("MTC0 %d #%x @%08x %u\n", reg, val, regs->pc, regs->cycle);
102         MTC0(regs, reg, val);
103         gen_interupt(&regs->CP0);
104 }
105
106 void pcsx_mtc0_ds(psxRegisters *regs, u32 reg, u32 val)
107 {
108         evprintf("MTC0 %d #%x @%08x %u\n", reg, val, regs->pc, regs->cycle);
109         MTC0(regs, reg, val);
110 }
111
112 /* GTE stuff */
113 void *gte_handlers[64];
114
115 void *gte_handlers_nf[64] = {
116         NULL      , gteRTPS_nf , NULL       , NULL      , NULL     , NULL       , gteNCLIP_nf, NULL      , // 00
117         NULL      , NULL       , NULL       , NULL      , gteOP_nf , NULL       , NULL       , NULL      , // 08
118         gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL       , gteNCDT_nf , NULL      , // 10
119         NULL      , NULL       , NULL       , gteNCCS_nf, gteCC_nf , NULL       , gteNCS_nf  , NULL      , // 18
120         gteNCT_nf , NULL       , NULL       , NULL      , NULL     , NULL       , NULL       , NULL      , // 20
121         gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL      , NULL     , gteAVSZ3_nf, gteAVSZ4_nf, NULL      , // 28 
122         gteRTPT_nf, NULL       , NULL       , NULL      , NULL     , NULL       , NULL       , NULL      , // 30
123         NULL      , NULL       , NULL       , NULL      , NULL     , gteGPF_nf  , gteGPL_nf  , gteNCCT_nf, // 38
124 };
125
126 const char *gte_regnames[64] = {
127         NULL  , "RTPS" , NULL   , NULL  , NULL , NULL   , "NCLIP", NULL  , // 00
128         NULL  , NULL   , NULL   , NULL  , "OP" , NULL   , NULL   , NULL  , // 08
129         "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL   , "NCDT" , NULL  , // 10
130         NULL  , NULL   , NULL   , "NCCS", "CC" , NULL   , "NCS"  , NULL  , // 18
131         "NCT" , NULL   , NULL   , NULL  , NULL , NULL   , NULL   , NULL  , // 20
132         "SQR" , "DCPL" , "DPCT" , NULL  , NULL , "AVSZ3", "AVSZ4", NULL  , // 28 
133         "RTPT", NULL   , NULL   , NULL  , NULL , NULL   , NULL   , NULL  , // 30
134         NULL  , NULL   , NULL   , NULL  , NULL , "GPF"  , "GPL"  , "NCCT", // 38
135 };
136
137 #define GCBIT(x) \
138         (1ll << (32+x))
139 #define GDBIT(x) \
140         (1ll << (x))
141 #define GCBITS3(b0,b1,b2) \
142         (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
143 #define GDBITS2(b0,b1) \
144         (GDBIT(b0) | GDBIT(b1))
145 #define GDBITS3(b0,b1,b2) \
146         (GDBITS2(b0,b1) | GDBIT(b2))
147 #define GDBITS4(b0,b1,b2,b3) \
148         (GDBITS3(b0,b1,b2) | GDBIT(b3))
149 #define GDBITS5(b0,b1,b2,b3,b4) \
150         (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
151 #define GDBITS6(b0,b1,b2,b3,b4,b5) \
152         (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
153 #define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
154         (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
155 #define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
156         (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
157 #define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
158         (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
159 #define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
160         (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
161
162 const uint64_t gte_reg_reads[64] = {
163         [GTE_RTPS]  = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
164         [GTE_NCLIP] =                        GDBITS3(12,13,14),
165         [GTE_OP]    = GCBITS3(0,2,4)       | GDBITS3(9,10,11),
166         [GTE_DPCS]  = GCBITS3(21,22,23)    | GDBITS4(6,8,21,22),
167         [GTE_INTPL] = GCBITS3(21,22,23)    | GDBITS7(6,8,9,10,11,21,22),
168         [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS9(0,1,2,3,4,5,9,10,11), // XXX: maybe decode further?
169         [GTE_NCDS]  = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
170         [GTE_CDP]   = 0x00ffe00000000000ll | GDBITS7(6,8,9,10,11,21,22),
171         [GTE_NCDT]  = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
172         [GTE_NCCS]  = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
173         [GTE_CC]    = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
174         [GTE_NCS]   = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
175         [GTE_NCT]   = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
176         [GTE_SQR]   =                        GDBITS3(9,10,11),
177         [GTE_DCPL]  = GCBITS3(21,22,23)    | GDBITS7(6,8,9,10,11,21,22),
178         [GTE_DPCT]  = GCBITS3(21,22,23)    | GDBITS4(8,20,21,22),
179         [GTE_AVSZ3] = GCBIT(29)            | GDBITS3(17,18,19),
180         [GTE_AVSZ4] = GCBIT(30)            | GDBITS4(16,17,18,19),
181         [GTE_RTPT]  = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
182         [GTE_GPF]   =                        GDBITS7(6,8,9,10,11,21,22),
183         [GTE_GPL]   =                        GDBITS10(6,8,9,10,11,21,22,25,26,27),
184         [GTE_NCCT]  = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
185 };
186
187 // note: this excludes gteFLAG that is always written to
188 const uint64_t gte_reg_writes[64] = {
189         [GTE_RTPS]  = 0x0f0f7f00ll,
190         [GTE_NCLIP] = GDBIT(24),
191         [GTE_OP]    = GDBITS6(9,10,11,25,26,27),
192         [GTE_DPCS]  = GDBITS9(9,10,11,20,21,22,25,26,27),
193         [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
194         [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
195         [GTE_NCDS]  = GDBITS9(9,10,11,20,21,22,25,26,27),
196         [GTE_CDP]   = GDBITS9(9,10,11,20,21,22,25,26,27),
197         [GTE_NCDT]  = GDBITS9(9,10,11,20,21,22,25,26,27),
198         [GTE_NCCS]  = GDBITS9(9,10,11,20,21,22,25,26,27),
199         [GTE_CC]    = GDBITS9(9,10,11,20,21,22,25,26,27),
200         [GTE_NCS]   = GDBITS9(9,10,11,20,21,22,25,26,27),
201         [GTE_NCT]   = GDBITS9(9,10,11,20,21,22,25,26,27),
202         [GTE_SQR]   = GDBITS6(9,10,11,25,26,27),
203         [GTE_DCPL]  = GDBITS9(9,10,11,20,21,22,25,26,27),
204         [GTE_DPCT]  = GDBITS9(9,10,11,20,21,22,25,26,27),
205         [GTE_AVSZ3] = GDBITS2(7,24),
206         [GTE_AVSZ4] = GDBITS2(7,24),
207         [GTE_RTPT]  = 0x0f0f7f00ll,
208         [GTE_GPF]   = GDBITS9(9,10,11,20,21,22,25,26,27),
209         [GTE_GPL]   = GDBITS9(9,10,11,20,21,22,25,26,27),
210         [GTE_NCCT]  = GDBITS9(9,10,11,20,21,22,25,26,27),
211 };
212
213 static void ari64_reset()
214 {
215         ari64_thread_sync();
216         new_dyna_pcsx_mem_reset();
217         new_dynarec_invalidate_all_pages();
218         new_dyna_pcsx_mem_load_state();
219 }
220
221 // execute until predefined leave points
222 // (HLE softcall exit and BIOS fastboot end)
223 static void ari64_execute_until(psxRegisters *regs)
224 {
225         void *drc_local = (char *)regs - LO_psxRegs;
226
227         assert(drc_local == dynarec_local);
228         evprintf("+exec %08x, %u->%u (%d)\n", regs->pc, regs->cycle,
229                 regs->next_interupt, regs->next_interupt - regs->cycle);
230
231         new_dyna_start(drc_local);
232
233         evprintf("-exec %08x, %u->%u (%d) stop %d \n", regs->pc, regs->cycle,
234                 regs->next_interupt, regs->next_interupt - regs->cycle, regs->stop);
235 }
236
237 static void ari64_execute(struct psxRegisters *regs)
238 {
239         while (!regs->stop) {
240                 schedule_timeslice(regs);
241                 ari64_execute_until(regs);
242                 evprintf("drc left @%08x\n", regs->pc);
243         }
244 }
245
246 static void ari64_execute_block(struct psxRegisters *regs, enum blockExecCaller caller)
247 {
248         if (caller == EXEC_CALLER_BOOT)
249                 regs->stop++;
250
251         regs->next_interupt = regs->cycle + 1;
252         ari64_execute_until(regs);
253
254         if (caller == EXEC_CALLER_BOOT)
255                 regs->stop--;
256 }
257
258 static void ari64_clear(u32 addr, u32 size)
259 {
260         u32 end = addr + size * 4; /* PCSX uses DMA units (words) */
261
262         evprintf("ari64_clear %08x %04x\n", addr, size * 4);
263
264         if (!new_dynarec_quick_check_range(addr, end) &&
265             !ari64_thread_check_range(addr, end))
266                 return;
267
268         ari64_thread_sync();
269         new_dynarec_invalidate_range(addr, end);
270 }
271
272 static void ari64_on_ext_change(int ram_replaced, int other_cpu_emu_exec)
273 {
274         if (ram_replaced)
275                 ari64_reset();
276         else if (other_cpu_emu_exec)
277                 new_dyna_pcsx_mem_load_state();
278 }
279
280 static void ari64_notify(enum R3000Anote note, void *data) {
281         switch (note)
282         {
283         case R3000ACPU_NOTIFY_CACHE_UNISOLATED:
284         case R3000ACPU_NOTIFY_CACHE_ISOLATED:
285                 new_dyna_pcsx_mem_isolate(note == R3000ACPU_NOTIFY_CACHE_ISOLATED);
286                 break;
287         case R3000ACPU_NOTIFY_BEFORE_SAVE:
288                 break;
289         case R3000ACPU_NOTIFY_AFTER_LOAD:
290                 ari64_on_ext_change(data == NULL, 0);
291                 psxInt.Notify(note, data);
292                 break;
293         }
294 }
295
296 static void ari64_apply_config()
297 {
298         int thread_changed;
299
300         ari64_thread_sync();
301         intApplyConfig();
302
303         if (Config.DisableStalls)
304                 ndrc_g.hacks |= NDHACK_NO_STALLS;
305         else
306                 ndrc_g.hacks &= ~NDHACK_NO_STALLS;
307
308         thread_changed = ((ndrc_g.hacks | ndrc_g.hacks_pergame) ^ ndrc_g.hacks_old)
309                 & (NDHACK_THREAD_FORCE | NDHACK_THREAD_FORCE_ON);
310         if (Config.cycle_multiplier != ndrc_g.cycle_multiplier_old
311             || (ndrc_g.hacks | ndrc_g.hacks_pergame) != ndrc_g.hacks_old)
312         {
313                 new_dynarec_clear_full();
314         }
315         if (thread_changed)
316                 ari64_thread_init();
317 }
318
319 #ifdef NDRC_THREAD
320 static void clear_local_cache(void)
321 {
322 #if defined(__arm__) || defined(__aarch64__)
323         if (ndrc_g.thread.dirty_start) {
324                 // see "Ensuring the visibility of updates to instructions"
325                 // in v7/v8 reference manuals (DDI0406, DDI0487 etc.)
326 #if defined(__aarch64__) || defined(HAVE_ARMV8)
327                 // the actual clean/invalidate is broadcast to all cores,
328                 // the manual only prescribes an isb
329                 __asm__ volatile("isb");
330 //#elif defined(_3DS)
331 //              ctr_invalidate_icache();
332 #else
333                 // while on v6 this is always required, on v7 it depends on
334                 // "Multiprocessing Extensions" being present, but that is difficult
335                 // to detect so do it always for now
336                 new_dyna_clear_cache(ndrc_g.thread.dirty_start, ndrc_g.thread.dirty_end);
337 #endif
338                 ndrc_g.thread.dirty_start = ndrc_g.thread.dirty_end = 0;
339         }
340 #endif
341 }
342
343 static void mixed_execute_block(struct psxRegisters *regs, enum blockExecCaller caller)
344 {
345         psxInt.ExecuteBlock(regs, caller);
346 }
347
348 static void mixed_clear(u32 addr, u32 size)
349 {
350         ari64_clear(addr, size);
351         psxInt.Clear(addr, size);
352 }
353
354 static void mixed_notify(enum R3000Anote note, void *data)
355 {
356         ari64_notify(note, data);
357         psxInt.Notify(note, data);
358 }
359
360 static R3000Acpu psxMixedCpu = {
361         NULL /* Init */, NULL /* Reset */, NULL /* Execute */,
362         mixed_execute_block,
363         mixed_clear,
364         mixed_notify,
365         NULL /* ApplyConfig */, NULL /* Shutdown */
366 };
367
368 static noinline void ari64_execute_threaded_slow(struct psxRegisters *regs,
369         enum blockExecCaller block_caller)
370 {
371         if (ndrc_g.thread.busy_addr == ~0u) {
372                 memcpy(ndrc_smrv_regs, regs->GPR.r, sizeof(ndrc_smrv_regs));
373                 slock_lock(ndrc_g.thread.lock);
374                 ndrc_g.thread.busy_addr = regs->pc;
375                 slock_unlock(ndrc_g.thread.lock);
376                 scond_signal(ndrc_g.thread.cond);
377         }
378
379         //ari64_notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL);
380         psxInt.Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL);
381         assert(psxCpu == &psxRec);
382         psxCpu = &psxMixedCpu;
383         for (;;)
384         {
385                 mixed_execute_block(regs, block_caller);
386
387                 if (ndrc_g.thread.busy_addr == ~0u)
388                         break;
389                 if (block_caller == EXEC_CALLER_HLE) {
390                         if (!psxBiosSoftcallEnded())
391                                 continue;
392                         break;
393                 }
394                 else if (block_caller == EXEC_CALLER_BOOT) {
395                         if (!psxExecuteBiosEnded())
396                                 continue;
397                         break;
398                 }
399                 if (regs->stop)
400                         break;
401         }
402         psxCpu = &psxRec;
403
404         psxInt.Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL);
405         //ari64_notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL);
406         ari64_on_ext_change(0, 1);
407 }
408
409 static void ari64_execute_threaded_once(struct psxRegisters *regs,
410         enum blockExecCaller block_caller)
411 {
412         void *drc_local = (char *)regs - LO_psxRegs;
413         struct ht_entry *hash_table =
414                 *(void **)((char *)drc_local + LO_hash_table_ptr);
415         void *target;
416
417         if (likely(ndrc_g.thread.busy_addr == ~0u)) {
418                 target = ndrc_get_addr_ht_param(hash_table, regs->pc,
419                                 ndrc_cm_no_compile);
420                 if (target) {
421                         clear_local_cache();
422                         new_dyna_start_at(drc_local, target);
423                         return;
424                 }
425         }
426         ari64_execute_threaded_slow(regs, block_caller);
427 }
428
429 static void ari64_execute_threaded(struct psxRegisters *regs)
430 {
431         schedule_timeslice(regs);
432         while (!regs->stop)
433         {
434                 ari64_execute_threaded_once(regs, EXEC_CALLER_OTHER);
435
436                 if ((s32)(regs->cycle - regs->next_interupt) >= 0)
437                         schedule_timeslice(regs);
438         }
439 }
440
441 static void ari64_execute_threaded_block(struct psxRegisters *regs,
442         enum blockExecCaller caller)
443 {
444         if (caller == EXEC_CALLER_BOOT)
445                 regs->stop++;
446
447         regs->next_interupt = regs->cycle + 1;
448
449         ari64_execute_threaded_once(regs, caller);
450         if (regs->cpuInRecursion) {
451                 // must sync since we are returning to compiled code
452                 ari64_thread_sync();
453         }
454
455         if (caller == EXEC_CALLER_BOOT)
456                 regs->stop--;
457 }
458
459 static void ari64_thread_sync(void)
460 {
461         if (!ndrc_g.thread.lock || ndrc_g.thread.busy_addr == ~0u)
462                 return;
463         for (;;) {
464                 slock_lock(ndrc_g.thread.lock);
465                 slock_unlock(ndrc_g.thread.lock);
466                 if (ndrc_g.thread.busy_addr == ~0)
467                         break;
468                 retro_sleep(0);
469         }
470 }
471
472 static int ari64_thread_check_range(unsigned int start, unsigned int end)
473 {
474         u32 addr = ndrc_g.thread.busy_addr;
475         if (addr == ~0u)
476                 return 0;
477
478         addr &= 0x1fffffff;
479         start &= 0x1fffffff;
480         end &= 0x1fffffff;
481         if (addr >= end)
482                 return 0;
483         if (addr + MAXBLOCK * 4 <= start)
484                 return 0;
485
486         //SysPrintf("%x hits %x-%x\n", addr, start, end);
487         return 1;
488 }
489
490 static void ari64_compile_thread(void *unused)
491 {
492         struct ht_entry *hash_table =
493                 *(void **)((char *)dynarec_local + LO_hash_table_ptr);
494         void *target;
495         u32 addr;
496
497         slock_lock(ndrc_g.thread.lock);
498         while (!ndrc_g.thread.exit)
499         {
500                 addr = *(volatile unsigned int *)&ndrc_g.thread.busy_addr;
501                 if (addr == ~0u)
502                         scond_wait(ndrc_g.thread.cond, ndrc_g.thread.lock);
503                 addr = *(volatile unsigned int *)&ndrc_g.thread.busy_addr;
504                 if (addr == ~0u || ndrc_g.thread.exit)
505                         continue;
506
507                 target = ndrc_get_addr_ht_param(hash_table, addr,
508                                 ndrc_cm_compile_in_thread);
509                 //printf("c  %08x -> %p\n", addr, target);
510                 ndrc_g.thread.busy_addr = ~0u;
511         }
512         slock_unlock(ndrc_g.thread.lock);
513         (void)target;
514 }
515
516 static void ari64_thread_shutdown(void)
517 {
518         psxRec.Execute = ari64_execute;
519         psxRec.ExecuteBlock = ari64_execute_block;
520
521         if (ndrc_g.thread.lock)
522                 slock_lock(ndrc_g.thread.lock);
523         ndrc_g.thread.exit = 1;
524         if (ndrc_g.thread.lock)
525                 slock_unlock(ndrc_g.thread.lock);
526         if (ndrc_g.thread.cond)
527                 scond_signal(ndrc_g.thread.cond);
528         if (ndrc_g.thread.handle) {
529                 sthread_join(ndrc_g.thread.handle);
530                 ndrc_g.thread.handle = NULL;
531         }
532         if (ndrc_g.thread.cond) {
533                 scond_free(ndrc_g.thread.cond);
534                 ndrc_g.thread.cond = NULL;
535         }
536         if (ndrc_g.thread.lock) {
537                 slock_free(ndrc_g.thread.lock);
538                 ndrc_g.thread.lock = NULL;
539         }
540         ndrc_g.thread.busy_addr = ~0u;
541 }
542
543 static void ari64_thread_init(void)
544 {
545         int enable;
546
547         if (ndrc_g.hacks_pergame & NDHACK_THREAD_FORCE)
548                 enable = 0;
549         else if (ndrc_g.hacks & NDHACK_THREAD_FORCE)
550                 enable = ndrc_g.hacks & NDHACK_THREAD_FORCE_ON;
551         else {
552                 u32 cpu_count = cpu_features_get_core_amount();
553                 enable = cpu_count > 1;
554 #ifdef _3DS
555                 // bad for old3ds, reprotedly no improvement for new3ds
556                 enable = 0;
557 #endif
558         }
559
560         if (!ndrc_g.thread.handle == !enable)
561                 return;
562
563         ari64_thread_shutdown();
564         ndrc_g.thread.exit = 0;
565         ndrc_g.thread.busy_addr = ~0u;
566
567         if (enable) {
568                 ndrc_g.thread.lock = slock_new();
569                 ndrc_g.thread.cond = scond_new();
570         }
571         if (ndrc_g.thread.lock && ndrc_g.thread.cond)
572                 ndrc_g.thread.handle = pcsxr_sthread_create(ari64_compile_thread, PCSXRT_DRC);
573         if (ndrc_g.thread.handle) {
574                 psxRec.Execute = ari64_execute_threaded;
575                 psxRec.ExecuteBlock = ari64_execute_threaded_block;
576         }
577         else {
578                 // clean up potential incomplete init
579                 ari64_thread_shutdown();
580         }
581         SysPrintf("compiler thread %sabled\n", ndrc_g.thread.handle ? "en" : "dis");
582 }
583 #else // if !NDRC_THREAD
584 static void ari64_thread_init(void) {}
585 static void ari64_thread_shutdown(void) {}
586 static int ari64_thread_check_range(unsigned int start, unsigned int end) { return 0; }
587 #endif
588
589 static int ari64_init()
590 {
591         static u32 scratch_buf[8*8*2] __attribute__((aligned(64)));
592         size_t i;
593
594         new_dynarec_init();
595         new_dyna_pcsx_mem_init();
596
597         for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
598                 if (psxCP2[i] != gteNULL)
599                         gte_handlers[i] = psxCP2[i];
600
601 #if defined(__arm__) && !defined(DRC_DBG)
602         gte_handlers[0x06] = gteNCLIP_arm;
603 #ifdef HAVE_ARMV5
604         gte_handlers_nf[0x01] = gteRTPS_nf_arm;
605         gte_handlers_nf[0x30] = gteRTPT_nf_arm;
606 #endif
607 #ifdef __ARM_NEON__
608         // compiler's _nf version is still a lot slower than neon
609         // _nf_arm RTPS is roughly the same, RTPT slower
610         gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
611         gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
612 #endif
613 #endif
614 #ifdef DRC_DBG
615         memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
616 #endif
617         psxH_ptr = psxH;
618         zeromem_ptr = zero_mem;
619         scratch_buf_ptr = scratch_buf; // for gte_neon.S
620
621         ndrc_g.cycle_multiplier_old = Config.cycle_multiplier;
622         ndrc_g.hacks_old = ndrc_g.hacks | ndrc_g.hacks_pergame;
623         ari64_apply_config();
624         ari64_thread_init();
625
626         return 0;
627 }
628
629 static void ari64_shutdown()
630 {
631         ari64_thread_shutdown();
632         new_dynarec_cleanup();
633         new_dyna_pcsx_mem_shutdown();
634 }
635
636 R3000Acpu psxRec = {
637         ari64_init,
638         ari64_reset,
639         ari64_execute,
640         ari64_execute_block,
641         ari64_clear,
642         ari64_notify,
643         ari64_apply_config,
644         ari64_shutdown
645 };
646
647 #else // if DRC_DISABLE
648
649 struct ndrc_globals ndrc_g; // dummy
650 void new_dynarec_init() {}
651 void new_dyna_start(void *context) {}
652 void new_dynarec_cleanup() {}
653 void new_dynarec_clear_full() {}
654 void new_dynarec_invalidate_all_pages() {}
655 void new_dynarec_invalidate_range(unsigned int start, unsigned int end) {}
656 void new_dyna_pcsx_mem_init(void) {}
657 void new_dyna_pcsx_mem_reset(void) {}
658 void new_dyna_pcsx_mem_load_state(void) {}
659 void new_dyna_pcsx_mem_isolate(int enable) {}
660 void new_dyna_pcsx_mem_shutdown(void) {}
661 int  new_dynarec_save_blocks(void *save, int size) { return 0; }
662 void new_dynarec_load_blocks(const void *save, int size) {}
663
664 #endif // DRC_DISABLE
665
666 #ifndef NDRC_THREAD
667 static void ari64_thread_sync(void) {}
668 #endif
669
670 #ifdef DRC_DBG
671
672 #include <stddef.h>
673 static FILE *f;
674 u32 irq_test_cycle;
675 u32 handler_cycle;
676 u32 last_io_addr;
677
678 void dump_mem(const char *fname, void *mem, size_t size)
679 {
680         FILE *f1 = fopen(fname, "wb");
681         if (f1 == NULL)
682                 f1 = fopen(strrchr(fname, '/') + 1, "wb");
683         fwrite(mem, 1, size, f1);
684         fclose(f1);
685 }
686
687 static u32 memcheck_read(u32 a)
688 {
689         if ((a >> 16) == 0x1f80)
690                 // scratchpad/IO
691                 return *(u32 *)(psxH + (a & 0xfffc));
692
693         if ((a >> 16) == 0x1f00)
694                 // parallel
695                 return *(u32 *)(psxP + (a & 0xfffc));
696
697 //      if ((a & ~0xe0600000) < 0x200000)
698         // RAM
699         return *(u32 *)(psxM + (a & 0x1ffffc));
700 }
701
702 #if 0
703 void do_insn_trace(void)
704 {
705         static psxRegisters oldregs;
706         static u32 event_cycles_o[PSXINT_COUNT];
707         u32 *allregs_p = (void *)&psxRegs;
708         u32 *allregs_o = (void *)&oldregs;
709         u32 io_data;
710         int i;
711         u8 byte;
712
713         //last_io_addr = 0x5e2c8;
714         if (f == NULL)
715                 f = fopen("tracelog", "wb");
716
717         // log reg changes
718         oldregs.code = psxRegs.code; // don't care
719         for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
720                 if (allregs_p[i] != allregs_o[i]) {
721                         fwrite(&i, 1, 1, f);
722                         fwrite(&allregs_p[i], 1, 4, f);
723                         allregs_o[i] = allregs_p[i];
724                 }
725         }
726         // log event changes
727         for (i = 0; i < PSXINT_COUNT; i++) {
728                 if (psxRegs.event_cycles[i] != event_cycles_o[i]) {
729                         byte = 0xf8;
730                         fwrite(&byte, 1, 1, f);
731                         fwrite(&i, 1, 1, f);
732                         fwrite(&psxRegs.event_cycles[i], 1, 4, f);
733                         event_cycles_o[i] = psxRegs.event_cycles[i];
734                 }
735         }
736         #define SAVE_IF_CHANGED(code_, name_) { \
737                 static u32 old_##name_ = 0xbad0c0de; \
738                 if (old_##name_ != name_) { \
739                         byte = code_; \
740                         fwrite(&byte, 1, 1, f); \
741                         fwrite(&name_, 1, 4, f); \
742                         old_##name_ = name_; \
743                 } \
744         }
745         SAVE_IF_CHANGED(0xfb, irq_test_cycle);
746         SAVE_IF_CHANGED(0xfc, handler_cycle);
747         SAVE_IF_CHANGED(0xfd, last_io_addr);
748         io_data = memcheck_read(last_io_addr);
749         SAVE_IF_CHANGED(0xfe, io_data);
750         byte = 0xff;
751         fwrite(&byte, 1, 1, f);
752
753 #if 0
754         if (psxRegs.cycle == 190230) {
755                 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000);
756                 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000);
757                 printf("dumped\n");
758                 exit(1);
759         }
760 #endif
761 }
762 #endif
763
764 static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
765         "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
766         "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
767         "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
768         "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
769         "lo",  "hi",
770         "C0_0",  "C0_1",  "C0_2",  "C0_3",  "C0_4",  "C0_5",  "C0_6",  "C0_7",
771         "C0_8",  "C0_9",  "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15",
772         "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23",
773         "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31",
774
775         "C2D0",  "C2D1",  "C2D2",  "C2D3",  "C2D4",  "C2D5",  "C2D6",  "C2D7",
776         "C2D8",  "C2D9",  "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15",
777         "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23",
778         "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31",
779
780         "C2C0",  "C2C1",  "C2C2",  "C2C3",  "C2C4",  "C2C5",  "C2C6",  "C2C7",
781         "C2C8",  "C2C9",  "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15",
782         "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23",
783         "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31",
784
785         "PC", "code", "cycle", "interrupt",
786 };
787
788 static struct {
789         int reg;
790         u32 val, val_expect;
791         u32 pc, cycle;
792 } miss_log[64];
793 static int miss_log_i;
794 #define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0]))
795 #define miss_log_mask (miss_log_len-1)
796
797 static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle)
798 {
799         miss_log[miss_log_i].reg = reg;
800         miss_log[miss_log_i].val = val;
801         miss_log[miss_log_i].val_expect = val_expect;
802         miss_log[miss_log_i].pc = pc;
803         miss_log[miss_log_i].cycle = cycle;
804         miss_log_i = (miss_log_i + 1) & miss_log_mask;
805 }
806
807 void breakme() {}
808
809 void do_insn_cmp(void)
810 {
811         extern int last_count;
812         static psxRegisters rregs;
813         static u32 mem_addr, mem_val;
814         static u32 irq_test_cycle_intr;
815         static u32 handler_cycle_intr;
816         u32 *allregs_p = (void *)&psxRegs;
817         u32 *allregs_e = (void *)&rregs;
818         u32 badregs_mask = 0;
819         static u32 ppc, failcount;
820         static u32 badregs_mask_prev;
821         int i, ret, bad = 0, fatal = 0, which_event = -1;
822         u32 ev_cycles = 0;
823         u8 code;
824
825         if (f == NULL)
826                 f = fopen("tracelog", "rb");
827
828         while (1) {
829                 if ((ret = fread(&code, 1, 1, f)) <= 0)
830                         break;
831                 if (ret <= 0)
832                         break;
833                 if (code == 0xff)
834                         break;
835                 switch (code) {
836                 case 0xf8:
837                         which_event = 0;
838                         fread(&which_event, 1, 1, f);
839                         fread(&ev_cycles, 1, 4, f);
840                         continue;
841                 case 0xfb:
842                         fread(&irq_test_cycle_intr, 1, 4, f);
843                         continue;
844                 case 0xfc:
845                         fread(&handler_cycle_intr, 1, 4, f);
846                         continue;
847                 case 0xfd:
848                         fread(&mem_addr, 1, 4, f);
849                         continue;
850                 case 0xfe:
851                         fread(&mem_val, 1, 4, f);
852                         continue;
853                 }
854                 assert(code < offsetof(psxRegisters, intCycle) / 4);
855                 fread(&allregs_e[code], 1, 4, f);
856         }
857
858         if (ret <= 0) {
859                 printf("EOF?\n");
860                 exit(1);
861         }
862
863         psxRegs.code = rregs.code; // don't care
864         psxRegs.cycle += last_count;
865         //psxRegs.cycle = rregs.cycle; // needs reload in _cmp
866         psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
867
868         //if (psxRegs.cycle == 166172) breakme();
869
870         if (which_event >= 0 && psxRegs.event_cycles[which_event] != ev_cycles) {
871                 printf("bad ev_cycles #%d: %u %u / %u\n", which_event,
872                         psxRegs.event_cycles[which_event], ev_cycles, psxRegs.cycle);
873                 fatal = 1;
874         }
875
876         if (irq_test_cycle > irq_test_cycle_intr) {
877                 printf("bad irq_test_cycle: %u %u\n", irq_test_cycle, irq_test_cycle_intr);
878                 fatal = 1;
879         }
880
881         if (handler_cycle != handler_cycle_intr) {
882                 printf("bad handler_cycle: %u %u\n", handler_cycle, handler_cycle_intr);
883                 fatal = 1;
884         }
885
886         if (mem_val != memcheck_read(mem_addr)) {
887                 printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
888                 fatal = 1;
889         }
890
891         if (!fatal && !memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle))) {
892                 failcount = 0;
893                 goto ok;
894         }
895
896         for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
897                 if (allregs_p[i] != allregs_e[i]) {
898                         miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
899                         bad++;
900                         if (i >= 32)
901                                 fatal = 1;
902                         else
903                                 badregs_mask |= 1u << i;
904                 }
905         }
906
907         if (badregs_mask_prev & badregs_mask)
908                 failcount++;
909         else
910                 failcount = 0;
911
912         if (!fatal && psxRegs.pc == rregs.pc && bad < 6 && failcount < 24) {
913                 static int last_mcycle;
914                 if (last_mcycle != psxRegs.cycle >> 20) {
915                         printf("%u\n", psxRegs.cycle);
916                         last_mcycle = psxRegs.cycle >> 20;
917                 }
918                 goto ok;
919         }
920
921         for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask)
922                 printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
923                         regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val,
924                         miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle);
925         printf("-- %d\n", bad);
926         for (i = 0; i < 8; i++)
927                 printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
928                         i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
929         printf("PC: %08x/%08x, cycle %u, next %u\n", psxRegs.pc, ppc,
930                 psxRegs.cycle, psxRegs.next_interupt);
931         //dump_mem("/tmp/psxram.dump", psxM, 0x200000);
932         //dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
933         exit(1);
934 ok:
935         //psxRegs.cycle = rregs.cycle + 2; // sync timing
936         ppc = psxRegs.pc;
937         badregs_mask_prev = badregs_mask;
938 }
939
940 #endif // DRC_DBG