[3DS] Enable threading for the SPU
authorJustin Weiss <justin@justinweiss.com>
Sun, 30 Aug 2020 03:48:12 +0000 (20:48 -0700)
committerJustin Weiss <justin@justinweiss.com>
Sun, 11 Oct 2020 06:36:23 +0000 (23:36 -0700)
Makefile.libretro
frontend/3ds/3ds_utils.h
frontend/3ds/pthread.h
frontend/3ds/semaphore.h [new file with mode: 0644]

index 3f642ae..741ecfa 100644 (file)
@@ -206,7 +206,7 @@ else ifeq ($(platform), ctr)
        CC = $(DEVKITARM)/bin/arm-none-eabi-gcc$(EXE_EXT)
        CXX = $(DEVKITARM)/bin/arm-none-eabi-g++$(EXE_EXT)
        AR = $(DEVKITARM)/bin/arm-none-eabi-ar$(EXE_EXT)
-       CFLAGS += -DARM11 -D_3DS -DNO_OS -DNO_DYLIB -DNO_SOCKET -DGPU_UNAI_USE_FLOATMATH -DGPU_UNAI_USE_FLOAT_DIV_MULTINV
+       CFLAGS += -DARM11 -D_3DS -DNO_OS -DNO_DYLIB -DNO_SOCKET -DTHREAD_ENABLED -DGPU_UNAI_USE_FLOATMATH -DGPU_UNAI_USE_FLOAT_DIV_MULTINV
        CFLAGS += -march=armv6k -mtune=mpcore -mfloat-abi=hard -marm -mfpu=vfp -mtp=soft
        CFLAGS += -Wall -mword-relocations -fcommon
        CFLAGS += -fomit-frame-pointer -ffast-math -funroll-loops
index 7f5c64a..9211a9e 100644 (file)
@@ -5,6 +5,10 @@
 #include <stdbool.h>
 #include <3ds.h>
 
+#ifdef OS_HEAP_AREA_BEGIN // defined in libctru 2.0+
+#define USE_CTRULIB_2 1
+#endif
+
 #define MEMOP_PROT      6
 #define MEMOP_MAP       4
 #define MEMOP_UNMAP     5
index 4c58fe5..76f1681 100644 (file)
-
-#ifndef _3DS_PTHREAD_WRAP__
-#define _3DS_PTHREAD_WRAP__
-
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
+/* Copyright  (C) 2010-2020 The RetroArch team
+ *
+ * ---------------------------------------------------------------------------------------
+ * The following license statement only applies to this file (gx_pthread.h).
+ * ---------------------------------------------------------------------------------------
+ *
+ * Permission is hereby granted, free of charge,
+ * to any person obtaining a copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _CTR_PTHREAD_WRAP_CTR_
+#define _CTR_PTHREAD_WRAP_CTR_
 
 #include "3ds_utils.h"
 
-#define CTR_PTHREAD_STACK_SIZE 0x10000
-#define FALSE 0
+#include <time.h>
+#include <errno.h>
 
-typedef struct {
-  uint32_t semaphore;
-  LightLock lock;
-  uint32_t waiting;
-} cond_t;
+#define STACKSIZE (4 * 1024)
+#define FALSE 0
 
-#if !defined(PTHREAD_SCOPE_PROCESS)
+#ifndef PTHREAD_SCOPE_PROCESS
 /* An earlier version of devkitARM does not define the pthread types. Can remove in r54+. */
 
-typedef uint32_t pthread_t;
-typedef int pthread_attr_t;
+typedef Thread     pthread_t;
+typedef LightLock  pthread_mutex_t;
+typedef void*      pthread_mutexattr_t;
+typedef int        pthread_attr_t;
+typedef LightEvent pthread_cond_t;
+typedef int        pthread_condattr_t;
+#endif
+
+#ifndef USE_CTRULIB_2
+/* Backported CondVar API from libctru 2.0, and under its license:
+   https://github.com/devkitPro/libctru
+   Slightly modified for compatibility with older libctru. */
+
+typedef s32 CondVar;
+
+static inline Result syncArbitrateAddress(s32* addr, ArbitrationType type, s32 value)
+{
+   return svcArbitrateAddress(__sync_get_arbiter(), (u32)addr, type, value, 0);
+}
+
+static inline Result syncArbitrateAddressWithTimeout(s32* addr, ArbitrationType type, s32 value, s64 timeout_ns)
+{
+   return svcArbitrateAddress(__sync_get_arbiter(), (u32)addr, type, value, timeout_ns);
+}
+
+static inline void __dmb(void)
+{
+       __asm__ __volatile__("mcr p15, 0, %[val], c7, c10, 5" :: [val] "r" (0) : "memory");
+}
+
+static inline void CondVar_BeginWait(CondVar* cv, LightLock* lock)
+{
+       s32 val;
+       do
+               val = __ldrex(cv) - 1;
+       while (__strex(cv, val));
+       LightLock_Unlock(lock);
+}
+
+static inline bool CondVar_EndWait(CondVar* cv, s32 num_threads)
+{
+       bool hasWaiters;
+       s32 val;
+
+       do {
+               val = __ldrex(cv);
+               hasWaiters = val < 0;
+               if (hasWaiters)
+               {
+                       if (num_threads < 0)
+                               val = 0;
+                       else if (val <= -num_threads)
+                               val += num_threads;
+                       else
+                               val = 0;
+               }
+       } while (__strex(cv, val));
+
+       return hasWaiters;
+}
+
+static inline void CondVar_Init(CondVar* cv)
+{
+       *cv = 0;
+}
+
+static inline void CondVar_Wait(CondVar* cv, LightLock* lock)
+{
+       CondVar_BeginWait(cv, lock);
+       syncArbitrateAddress(cv, ARBITRATION_WAIT_IF_LESS_THAN, 0);
+       LightLock_Lock(lock);
+}
+
+static inline int CondVar_WaitTimeout(CondVar* cv, LightLock* lock, s64 timeout_ns)
+{
+       CondVar_BeginWait(cv, lock);
+
+       bool timedOut = false;
+       Result rc = syncArbitrateAddressWithTimeout(cv, ARBITRATION_WAIT_IF_LESS_THAN_TIMEOUT, 0, timeout_ns);
+       if (R_DESCRIPTION(rc) == RD_TIMEOUT)
+       {
+               timedOut = CondVar_EndWait(cv, 1);
+               __dmb();
+       }
+
+       LightLock_Lock(lock);
+       return timedOut;
+}
 
-typedef LightLock pthread_mutex_t;
-typedef int pthread_mutexattr_t;
+static inline void CondVar_WakeUp(CondVar* cv, s32 num_threads)
+{
+       __dmb();
+       if (CondVar_EndWait(cv, num_threads))
+               syncArbitrateAddress(cv, ARBITRATION_SIGNAL, num_threads);
+       else
+               __dmb();
+}
 
-typedef uint32_t pthread_cond_t;
-typedef int pthread_condattr_t;
+static inline void CondVar_Signal(CondVar* cv)
+{
+       CondVar_WakeUp(cv, 1);
+}
 
+static inline void CondVar_Broadcast(CondVar* cv)
+{
+       CondVar_WakeUp(cv, ARBITRATION_SIGNAL_ALL);
+}
+/* End libctru 2.0 backport */
 #endif
 
+/* libctru threads return void but pthreads return void pointer */
+static bool mutex_inited = false;
+static LightLock safe_double_thread_launch;
+static void *(*start_routine_jump)(void*);
+
+static void ctr_thread_launcher(void* data)
+{
+       void *(*start_routine_jump_safe)(void*) = start_routine_jump;
+       LightLock_Unlock(&safe_double_thread_launch);
+       start_routine_jump_safe(data);
+}
+
 static inline int pthread_create(pthread_t *thread,
       const pthread_attr_t *attr, void *(*start_routine)(void*), void *arg)
 {
+   s32 prio = 0;
+   Thread new_ctr_thread;
    int procnum = -2; // use default cpu
    bool isNew3DS;
+
    APT_CheckNew3DS(&isNew3DS);
 
    if (isNew3DS)
-     procnum = 2;
+      procnum = 2;
+
+   if (!mutex_inited)
+   {
+      LightLock_Init(&safe_double_thread_launch);
+      mutex_inited = true;
+   }
+
+   /*Must wait if attempting to launch 2 threads at once to prevent corruption of function pointer*/
+   while (LightLock_TryLock(&safe_double_thread_launch) != 0);
+
+   svcGetThreadPriority(&prio, CUR_THREAD_HANDLE);
+
+   start_routine_jump = start_routine;
+   new_ctr_thread     = threadCreate(ctr_thread_launcher, arg, STACKSIZE, prio - 1, procnum, FALSE);
 
-   *thread = threadCreate(start_routine, arg, CTR_PTHREAD_STACK_SIZE, 0x25, procnum, FALSE);
+   if (!new_ctr_thread)
+   {
+      LightLock_Unlock(&safe_double_thread_launch);
+      return EAGAIN;
+   }
+
+   *thread = (pthread_t)new_ctr_thread;
    return 0;
 }
 
-
-static inline int pthread_join(pthread_t thread, void **retval)
+static inline pthread_t pthread_self(void)
 {
-   (void)retval;
+   return (pthread_t)threadGetCurrent();
+}
 
-   if(threadJoin(thread, INT64_MAX))
-      return -1;
+static inline int pthread_mutex_init(pthread_mutex_t *mutex,
+      const pthread_mutexattr_t *attr)
+{
+   LightLock_Init((LightLock *)mutex);
+   return 0;
+}
 
-   threadFree(thread);
+static inline int pthread_mutex_destroy(pthread_mutex_t *mutex)
+{
+   /*Nothing to destroy*/
+   return 0;
+}
 
+static inline int pthread_mutex_lock(pthread_mutex_t *mutex)
+{
+   LightLock_Lock((LightLock *)mutex);
    return 0;
 }
 
+static inline int pthread_mutex_unlock(pthread_mutex_t *mutex)
+{
+   LightLock_Unlock((LightLock *)mutex);
+   return 0;
+}
 
 static inline void pthread_exit(void *retval)
-{   
+{
+   /*Yes the pointer to int cast is not ideal*/
+   /*threadExit((int)retval);*/
    (void)retval;
 
    threadExit(0);
 }
 
-static inline int pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr) {
-  LightLock_Init(mutex);
-  return 0;
+static inline int pthread_detach(pthread_t thread)
+{
+   threadDetach((Thread)thread);
+   return 0;
 }
 
-static inline int pthread_mutex_lock(pthread_mutex_t *mutex) {
-  LightLock_Lock(mutex);
-  return 0;
-}
+static inline int pthread_join(pthread_t thread, void **retval)
+{
+   /*retval is ignored*/
+   if(threadJoin((Thread)thread, INT64_MAX))
+      return -1;
 
-static inline int pthread_mutex_unlock(pthread_mutex_t *mutex) {
-  LightLock_Unlock(mutex);
-  return 0;
-}
+   threadFree((Thread)thread);
 
-static inline int pthread_mutex_destroy(pthread_mutex_t *mutex) {
-  return 0;
+   return 0;
 }
 
-static inline int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr) {
-  cond_t *cond_data = calloc(1, sizeof(cond_t));
-  if (!cond_data)
-    goto error;
-
-  if (svcCreateSemaphore(&cond_data->semaphore, 0, 1))
-    goto error;
-
-  LightLock_Init(&cond_data->lock);
-  cond_data->waiting = 0;
-  *cond = cond_data;
-  return 0;
+static inline int pthread_mutex_trylock(pthread_mutex_t *mutex)
+{
+   return LightLock_TryLock((LightLock *)mutex);
+}
 
- error:
-  svcCloseHandle(cond_data->semaphore);
-  if (cond_data)
-    free(cond_data);
-  return -1;
+static inline int pthread_cond_wait(pthread_cond_t *cond,
+      pthread_mutex_t *mutex)
+{
+   CondVar_Wait((CondVar *)cond, (LightLock *)mutex);
+   return 0;
 }
 
-static inline int pthread_cond_signal(pthread_cond_t *cond) {
-  int32_t count;
-  cond_t *cond_data = (cond_t *)*cond;
-  LightLock_Lock(&cond_data->lock);
-  if (cond_data->waiting) {
-    cond_data->waiting--;
-    svcReleaseSemaphore(&count, cond_data->semaphore, 1);
-  }
-  LightLock_Unlock(&cond_data->lock);
-  return 0;
+static inline int pthread_cond_timedwait(pthread_cond_t *cond,
+      pthread_mutex_t *mutex, const struct timespec *abstime)
+{
+   struct timespec now = {0};
+   /* Missing clock_gettime*/
+   struct timeval tm;
+   int retval = 0;
+
+   gettimeofday(&tm, NULL);
+   now.tv_sec = tm.tv_sec;
+   now.tv_nsec = tm.tv_usec * 1000;
+   s64 timeout = (abstime->tv_sec - now.tv_sec) * 1000000000 + (abstime->tv_nsec - now.tv_nsec);
+
+   if (timeout < 0)
+   {
+      retval = ETIMEDOUT;
+   }
+   else if (CondVar_WaitTimeout((CondVar *)cond, (LightLock *)mutex, timeout))
+   {
+      retval = ETIMEDOUT;
+   }
+
+   return retval;
 }
 
-static inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *lock) {
-  cond_t *cond_data = (cond_t *)*cond;
-  LightLock_Lock(&cond_data->lock);
-  cond_data->waiting++;
-  LightLock_Unlock(lock);
-  LightLock_Unlock(&cond_data->lock);
-  svcWaitSynchronization(cond_data->semaphore, INT64_MAX);
-  LightLock_Lock(lock);
-  return 0;
+static inline int pthread_cond_init(pthread_cond_t *cond,
+      const pthread_condattr_t *attr)
+{
+   CondVar_Init((CondVar *)cond);
+   return 0;
 }
 
-static inline int pthread_cond_destroy(pthread_cond_t *cond) {
-  if (*cond) {
-    cond_t *cond_data = (cond_t *)*cond;
+static inline int pthread_cond_signal(pthread_cond_t *cond)
+{
+   CondVar_Signal((CondVar *)cond);
+   return 0;
+}
 
-    svcCloseHandle(cond_data->semaphore);
-    free(*cond);
-  }
-  *cond = 0;
-  return 0;
+static inline int pthread_cond_broadcast(pthread_cond_t *cond)
+{
+   CondVar_Broadcast((CondVar *)cond);
+   return 0;
 }
 
+static inline int pthread_cond_destroy(pthread_cond_t *cond)
+{
+   /*Nothing to destroy*/
+   return 0;
+}
 
-#endif //_3DS_PTHREAD_WRAP__
+static inline int pthread_equal(pthread_t t1, pthread_t t2)
+{
+       if (threadGetHandle((Thread)t1) == threadGetHandle((Thread)t2))
+               return 1;
+       return 0;
+}
 
+#endif
diff --git a/frontend/3ds/semaphore.h b/frontend/3ds/semaphore.h
new file mode 100644 (file)
index 0000000..6eddd98
--- /dev/null
@@ -0,0 +1,35 @@
+
+#ifndef _3DS_SEMAPHORE_WRAP__
+#define _3DS_SEMAPHORE_WRAP__
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "3ds_utils.h"
+
+typedef uint32_t sem_t;
+
+static inline int sem_init(sem_t *sem, int pshared, unsigned int value)
+{
+   return svcCreateSemaphore(sem, value, INT32_MAX);
+}
+
+static inline int sem_post(sem_t *sem)
+{
+   int32_t count;
+   return svcReleaseSemaphore(&count, *sem, 1);
+}
+
+static inline int sem_wait(sem_t *sem)
+{
+   return svcWaitSynchronization(*sem, INT64_MAX);
+}
+
+static inline int sem_destroy(sem_t *sem)
+{
+   return svcCloseHandle(*sem);
+}
+
+#endif //_3DS_SEMAPHORE_WRAP__
+