gpu_neon: fix some missing ebuf updates
[pcsx_rearmed.git] / deps / libretro-common / libco / amd64.c
CommitLineData
3719602c
PC
1/*
2 libco.amd64 (2009-10-12)
3 author: byuu
4 license: public domain
5*/
6
7#define LIBCO_C
8#include <libco.h>
9#include <assert.h>
10#include <stdlib.h>
11
12#if defined(__GNUC__) && !defined(_WIN32) && !defined(__cplusplus)
13#define CO_USE_INLINE_ASM
14#endif
15
16#ifdef __cplusplus
17extern "C" {
18#endif
19
20static thread_local long long co_active_buffer[64];
21static thread_local cothread_t co_active_handle = 0;
22#ifndef CO_USE_INLINE_ASM
23static void (*co_swap)(cothread_t, cothread_t) = 0;
24#endif
25
26#ifdef _WIN32
27/* ABI: Win64 */
28 /* On windows handle is allocated by malloc and there it's guaranteed to
29 have at least 16-byte alignment. Hence we don't need to align
30 it in order to use movaps. */
31static unsigned char co_swap_function[] = {
32 0x48, 0x89, 0x22, /* mov [rdx],rsp */
33 0x48, 0x8b, 0x21, /* mov rsp,[rcx] */
34 0x58, /* pop rax */
35 0x48, 0x89, 0x6a, 0x08, /* mov [rdx+ 8],rbp */
36 0x48, 0x89, 0x72, 0x10, /* mov [rdx+16],rsi */
37 0x48, 0x89, 0x7a, 0x18, /* mov [rdx+24],rdi */
38 0x48, 0x89, 0x5a, 0x20, /* mov [rdx+32],rbx */
39 0x4c, 0x89, 0x62, 0x28, /* mov [rdx+40],r12 */
40 0x4c, 0x89, 0x6a, 0x30, /* mov [rdx+48],r13 */
41 0x4c, 0x89, 0x72, 0x38, /* mov [rdx+56],r14 */
42 0x4c, 0x89, 0x7a, 0x40, /* mov [rdx+64],r15 */
43 #if !defined(LIBCO_NO_SSE)
44 0x0f, 0x29, 0x72, 0x50, /* movaps [rdx+ 80],xmm6 */
45 0x0f, 0x29, 0x7a, 0x60, /* movaps [rdx+ 96],xmm7 */
46 0x44, 0x0f, 0x29, 0x42, 0x70, /* movaps [rdx+112],xmm8 */
47 0x48, 0x83, 0xc2, 0x70, /* add rdx,112 */
48 0x44, 0x0f, 0x29, 0x4a, 0x10, /* movaps [rdx+ 16],xmm9 */
49 0x44, 0x0f, 0x29, 0x52, 0x20, /* movaps [rdx+ 32],xmm10 */
50 0x44, 0x0f, 0x29, 0x5a, 0x30, /* movaps [rdx+ 48],xmm11 */
51 0x44, 0x0f, 0x29, 0x62, 0x40, /* movaps [rdx+ 64],xmm12 */
52 0x44, 0x0f, 0x29, 0x6a, 0x50, /* movaps [rdx+ 80],xmm13 */
53 0x44, 0x0f, 0x29, 0x72, 0x60, /* movaps [rdx+ 96],xmm14 */
54 0x44, 0x0f, 0x29, 0x7a, 0x70, /* movaps [rdx+112],xmm15 */
55 #endif
56 0x48, 0x8b, 0x69, 0x08, /* mov rbp,[rcx+ 8] */
57 0x48, 0x8b, 0x71, 0x10, /* mov rsi,[rcx+16] */
58 0x48, 0x8b, 0x79, 0x18, /* mov rdi,[rcx+24] */
59 0x48, 0x8b, 0x59, 0x20, /* mov rbx,[rcx+32] */
60 0x4c, 0x8b, 0x61, 0x28, /* mov r12,[rcx+40] */
61 0x4c, 0x8b, 0x69, 0x30, /* mov r13,[rcx+48] */
62 0x4c, 0x8b, 0x71, 0x38, /* mov r14,[rcx+56] */
63 0x4c, 0x8b, 0x79, 0x40, /* mov r15,[rcx+64] */
64 #if !defined(LIBCO_NO_SSE)
65 0x0f, 0x28, 0x71, 0x50, /* movaps xmm6, [rcx+ 80] */
66 0x0f, 0x28, 0x79, 0x60, /* movaps xmm7, [rcx+ 96] */
67 0x44, 0x0f, 0x28, 0x41, 0x70, /* movaps xmm8, [rcx+112] */
68 0x48, 0x83, 0xc1, 0x70, /* add rcx,112 */
69 0x44, 0x0f, 0x28, 0x49, 0x10, /* movaps xmm9, [rcx+ 16] */
70 0x44, 0x0f, 0x28, 0x51, 0x20, /* movaps xmm10,[rcx+ 32] */
71 0x44, 0x0f, 0x28, 0x59, 0x30, /* movaps xmm11,[rcx+ 48] */
72 0x44, 0x0f, 0x28, 0x61, 0x40, /* movaps xmm12,[rcx+ 64] */
73 0x44, 0x0f, 0x28, 0x69, 0x50, /* movaps xmm13,[rcx+ 80] */
74 0x44, 0x0f, 0x28, 0x71, 0x60, /* movaps xmm14,[rcx+ 96] */
75 0x44, 0x0f, 0x28, 0x79, 0x70, /* movaps xmm15,[rcx+112] */
76 #endif
77 0xff, 0xe0, /* jmp rax */
78};
79
80#include <windows.h>
81
82static void co_init(void)
83{
84 DWORD old_privileges;
85 VirtualProtect(co_swap_function,
86 sizeof(co_swap_function), PAGE_EXECUTE_READWRITE, &old_privileges);
87}
88#else
89/* ABI: SystemV */
90#ifndef CO_USE_INLINE_ASM
91static unsigned char co_swap_function[] = {
92 0x48, 0x89, 0x26, /* mov [rsi],rsp */
93 0x48, 0x8b, 0x27, /* mov rsp,[rdi] */
94 0x58, /* pop rax */
95 0x48, 0x89, 0x6e, 0x08, /* mov [rsi+0x08],rbp */
96 0x48, 0x89, 0x5e, 0x10, /* mov [rsi+0x10],rbx */
97 0x4c, 0x89, 0x66, 0x18, /* mov [rsi+0x18],r12 */
98 0x4c, 0x89, 0x6e, 0x20, /* mov [rsi+0x20],r13 */
99 0x4c, 0x89, 0x76, 0x28, /* mov [rsi+0x28],r14 */
100 0x4c, 0x89, 0x7e, 0x30, /* mov [rsi+0x30],r15 */
101 0x48, 0x8b, 0x6f, 0x08, /* mov rbp,[rdi+0x08] */
102 0x48, 0x8b, 0x5f, 0x10, /* mov rbx,[rdi+0x10] */
103 0x4c, 0x8b, 0x67, 0x18, /* mov r12,[rdi+0x18] */
104 0x4c, 0x8b, 0x6f, 0x20, /* mov r13,[rdi+0x20] */
105 0x4c, 0x8b, 0x77, 0x28, /* mov r14,[rdi+0x28] */
106 0x4c, 0x8b, 0x7f, 0x30, /* mov r15,[rdi+0x30] */
107 0xff, 0xe0, /* jmp rax */
108};
109
110#include <unistd.h>
111#include <sys/mman.h>
112
113static void co_init(void)
114{
115 unsigned long long addr = (unsigned long long)co_swap_function;
116 unsigned long long base = addr - (addr % sysconf(_SC_PAGESIZE));
117 unsigned long long size = (addr - base) + sizeof(co_swap_function);
118 mprotect((void*)base, size, PROT_READ | PROT_WRITE | PROT_EXEC);
119}
120#else
121static void co_init(void) {}
122#endif
123#endif
124
125static void crash(void)
126{
127 assert(0); /* called only if cothread_t entrypoint returns */
128}
129
130cothread_t co_active(void)
131{
132 if (!co_active_handle)
133 co_active_handle = &co_active_buffer;
134 return co_active_handle;
135}
136
137cothread_t co_create(unsigned int size, void (*entrypoint)(void))
138{
139 cothread_t handle;
140#ifndef CO_USE_INLINE_ASM
141 if (!co_swap)
142 {
143 co_init();
144 co_swap = (void (*)(cothread_t, cothread_t))co_swap_function;
145 }
146#endif
147
148 if (!co_active_handle)
149 co_active_handle = &co_active_buffer;
150 size += 512; /* allocate additional space for storage */
151 size &= ~15; /* align stack to 16-byte boundary */
152
153#ifdef __GENODE__
154 if ((handle = (cothread_t)genode_alloc_secondary_stack(size)))
155 {
156 long long *p = (long long*)((char*)handle); /* OS returns top of stack */
157 *--p = (long long)crash; /* crash if entrypoint returns */
158 *--p = (long long)entrypoint; /* start of function */
159 *(long long*)handle = (long long)p; /* stack pointer */
160 }
161#else
162 if ((handle = (cothread_t)malloc(size)))
163 {
164 long long *p = (long long*)((char*)handle + size); /* seek to top of stack */
165 *--p = (long long)crash; /* crash if entrypoint returns */
166 *--p = (long long)entrypoint; /* start of function */
167 *(long long*)handle = (long long)p; /* stack pointer */
168 }
169#endif
170
171 return handle;
172}
173
174void co_delete(cothread_t handle)
175{
176#ifdef __GENODE__
177 genode_free_secondary_stack(handle);
178#else
179 free(handle);
180#endif
181}
182
183#ifndef CO_USE_INLINE_ASM
184void co_switch(cothread_t handle)
185{
186 register cothread_t co_previous_handle = co_active_handle;
187 co_swap(co_active_handle = handle, co_previous_handle);
188}
189#else
190#ifdef __APPLE__
191#define ASM_PREFIX "_"
192#else
193#define ASM_PREFIX ""
194#endif
195__asm__(
196".intel_syntax noprefix \n"
197".globl " ASM_PREFIX "co_switch \n"
198ASM_PREFIX "co_switch: \n"
199"mov rsi, [rip+" ASM_PREFIX "co_active_handle]\n"
200"mov [rsi],rsp \n"
201"mov [rsi+0x08],rbp \n"
202"mov [rsi+0x10],rbx \n"
203"mov [rsi+0x18],r12 \n"
204"mov [rsi+0x20],r13 \n"
205"mov [rsi+0x28],r14 \n"
206"mov [rsi+0x30],r15 \n"
207"mov [rip+" ASM_PREFIX "co_active_handle], rdi\n"
208"mov rsp,[rdi] \n"
209"mov rbp,[rdi+0x08] \n"
210"mov rbx,[rdi+0x10] \n"
211"mov r12,[rdi+0x18] \n"
212"mov r13,[rdi+0x20] \n"
213"mov r14,[rdi+0x28] \n"
214"mov r15,[rdi+0x30] \n"
215"ret \n"
216".att_syntax \n"
217);
218#endif
219
220#ifdef __cplusplus
221}
222#endif