| 1 | /* |
| 2 | libco.amd64 (2009-10-12) |
| 3 | author: byuu |
| 4 | license: public domain |
| 5 | */ |
| 6 | |
| 7 | #define LIBCO_C |
| 8 | #include <libco.h> |
| 9 | #include <assert.h> |
| 10 | #include <stdlib.h> |
| 11 | |
| 12 | #if defined(__GNUC__) && !defined(_WIN32) && !defined(__cplusplus) |
| 13 | #define CO_USE_INLINE_ASM |
| 14 | #endif |
| 15 | |
| 16 | #ifdef __cplusplus |
| 17 | extern "C" { |
| 18 | #endif |
| 19 | |
| 20 | static thread_local long long co_active_buffer[64]; |
| 21 | static thread_local cothread_t co_active_handle = 0; |
| 22 | #ifndef CO_USE_INLINE_ASM |
| 23 | static void (*co_swap)(cothread_t, cothread_t) = 0; |
| 24 | #endif |
| 25 | |
| 26 | #ifdef _WIN32 |
| 27 | /* ABI: Win64 */ |
| 28 | /* On windows handle is allocated by malloc and there it's guaranteed to |
| 29 | have at least 16-byte alignment. Hence we don't need to align |
| 30 | it in order to use movaps. */ |
| 31 | static unsigned char co_swap_function[] = { |
| 32 | 0x48, 0x89, 0x22, /* mov [rdx],rsp */ |
| 33 | 0x48, 0x8b, 0x21, /* mov rsp,[rcx] */ |
| 34 | 0x58, /* pop rax */ |
| 35 | 0x48, 0x89, 0x6a, 0x08, /* mov [rdx+ 8],rbp */ |
| 36 | 0x48, 0x89, 0x72, 0x10, /* mov [rdx+16],rsi */ |
| 37 | 0x48, 0x89, 0x7a, 0x18, /* mov [rdx+24],rdi */ |
| 38 | 0x48, 0x89, 0x5a, 0x20, /* mov [rdx+32],rbx */ |
| 39 | 0x4c, 0x89, 0x62, 0x28, /* mov [rdx+40],r12 */ |
| 40 | 0x4c, 0x89, 0x6a, 0x30, /* mov [rdx+48],r13 */ |
| 41 | 0x4c, 0x89, 0x72, 0x38, /* mov [rdx+56],r14 */ |
| 42 | 0x4c, 0x89, 0x7a, 0x40, /* mov [rdx+64],r15 */ |
| 43 | #if !defined(LIBCO_NO_SSE) |
| 44 | 0x0f, 0x29, 0x72, 0x50, /* movaps [rdx+ 80],xmm6 */ |
| 45 | 0x0f, 0x29, 0x7a, 0x60, /* movaps [rdx+ 96],xmm7 */ |
| 46 | 0x44, 0x0f, 0x29, 0x42, 0x70, /* movaps [rdx+112],xmm8 */ |
| 47 | 0x48, 0x83, 0xc2, 0x70, /* add rdx,112 */ |
| 48 | 0x44, 0x0f, 0x29, 0x4a, 0x10, /* movaps [rdx+ 16],xmm9 */ |
| 49 | 0x44, 0x0f, 0x29, 0x52, 0x20, /* movaps [rdx+ 32],xmm10 */ |
| 50 | 0x44, 0x0f, 0x29, 0x5a, 0x30, /* movaps [rdx+ 48],xmm11 */ |
| 51 | 0x44, 0x0f, 0x29, 0x62, 0x40, /* movaps [rdx+ 64],xmm12 */ |
| 52 | 0x44, 0x0f, 0x29, 0x6a, 0x50, /* movaps [rdx+ 80],xmm13 */ |
| 53 | 0x44, 0x0f, 0x29, 0x72, 0x60, /* movaps [rdx+ 96],xmm14 */ |
| 54 | 0x44, 0x0f, 0x29, 0x7a, 0x70, /* movaps [rdx+112],xmm15 */ |
| 55 | #endif |
| 56 | 0x48, 0x8b, 0x69, 0x08, /* mov rbp,[rcx+ 8] */ |
| 57 | 0x48, 0x8b, 0x71, 0x10, /* mov rsi,[rcx+16] */ |
| 58 | 0x48, 0x8b, 0x79, 0x18, /* mov rdi,[rcx+24] */ |
| 59 | 0x48, 0x8b, 0x59, 0x20, /* mov rbx,[rcx+32] */ |
| 60 | 0x4c, 0x8b, 0x61, 0x28, /* mov r12,[rcx+40] */ |
| 61 | 0x4c, 0x8b, 0x69, 0x30, /* mov r13,[rcx+48] */ |
| 62 | 0x4c, 0x8b, 0x71, 0x38, /* mov r14,[rcx+56] */ |
| 63 | 0x4c, 0x8b, 0x79, 0x40, /* mov r15,[rcx+64] */ |
| 64 | #if !defined(LIBCO_NO_SSE) |
| 65 | 0x0f, 0x28, 0x71, 0x50, /* movaps xmm6, [rcx+ 80] */ |
| 66 | 0x0f, 0x28, 0x79, 0x60, /* movaps xmm7, [rcx+ 96] */ |
| 67 | 0x44, 0x0f, 0x28, 0x41, 0x70, /* movaps xmm8, [rcx+112] */ |
| 68 | 0x48, 0x83, 0xc1, 0x70, /* add rcx,112 */ |
| 69 | 0x44, 0x0f, 0x28, 0x49, 0x10, /* movaps xmm9, [rcx+ 16] */ |
| 70 | 0x44, 0x0f, 0x28, 0x51, 0x20, /* movaps xmm10,[rcx+ 32] */ |
| 71 | 0x44, 0x0f, 0x28, 0x59, 0x30, /* movaps xmm11,[rcx+ 48] */ |
| 72 | 0x44, 0x0f, 0x28, 0x61, 0x40, /* movaps xmm12,[rcx+ 64] */ |
| 73 | 0x44, 0x0f, 0x28, 0x69, 0x50, /* movaps xmm13,[rcx+ 80] */ |
| 74 | 0x44, 0x0f, 0x28, 0x71, 0x60, /* movaps xmm14,[rcx+ 96] */ |
| 75 | 0x44, 0x0f, 0x28, 0x79, 0x70, /* movaps xmm15,[rcx+112] */ |
| 76 | #endif |
| 77 | 0xff, 0xe0, /* jmp rax */ |
| 78 | }; |
| 79 | |
| 80 | #include <windows.h> |
| 81 | |
| 82 | static void co_init(void) |
| 83 | { |
| 84 | DWORD old_privileges; |
| 85 | VirtualProtect(co_swap_function, |
| 86 | sizeof(co_swap_function), PAGE_EXECUTE_READWRITE, &old_privileges); |
| 87 | } |
| 88 | #else |
| 89 | /* ABI: SystemV */ |
| 90 | #ifndef CO_USE_INLINE_ASM |
| 91 | static unsigned char co_swap_function[] = { |
| 92 | 0x48, 0x89, 0x26, /* mov [rsi],rsp */ |
| 93 | 0x48, 0x8b, 0x27, /* mov rsp,[rdi] */ |
| 94 | 0x58, /* pop rax */ |
| 95 | 0x48, 0x89, 0x6e, 0x08, /* mov [rsi+0x08],rbp */ |
| 96 | 0x48, 0x89, 0x5e, 0x10, /* mov [rsi+0x10],rbx */ |
| 97 | 0x4c, 0x89, 0x66, 0x18, /* mov [rsi+0x18],r12 */ |
| 98 | 0x4c, 0x89, 0x6e, 0x20, /* mov [rsi+0x20],r13 */ |
| 99 | 0x4c, 0x89, 0x76, 0x28, /* mov [rsi+0x28],r14 */ |
| 100 | 0x4c, 0x89, 0x7e, 0x30, /* mov [rsi+0x30],r15 */ |
| 101 | 0x48, 0x8b, 0x6f, 0x08, /* mov rbp,[rdi+0x08] */ |
| 102 | 0x48, 0x8b, 0x5f, 0x10, /* mov rbx,[rdi+0x10] */ |
| 103 | 0x4c, 0x8b, 0x67, 0x18, /* mov r12,[rdi+0x18] */ |
| 104 | 0x4c, 0x8b, 0x6f, 0x20, /* mov r13,[rdi+0x20] */ |
| 105 | 0x4c, 0x8b, 0x77, 0x28, /* mov r14,[rdi+0x28] */ |
| 106 | 0x4c, 0x8b, 0x7f, 0x30, /* mov r15,[rdi+0x30] */ |
| 107 | 0xff, 0xe0, /* jmp rax */ |
| 108 | }; |
| 109 | |
| 110 | #include <unistd.h> |
| 111 | #include <sys/mman.h> |
| 112 | |
| 113 | static void co_init(void) |
| 114 | { |
| 115 | unsigned long long addr = (unsigned long long)co_swap_function; |
| 116 | unsigned long long base = addr - (addr % sysconf(_SC_PAGESIZE)); |
| 117 | unsigned long long size = (addr - base) + sizeof(co_swap_function); |
| 118 | mprotect((void*)base, size, PROT_READ | PROT_WRITE | PROT_EXEC); |
| 119 | } |
| 120 | #else |
| 121 | static void co_init(void) {} |
| 122 | #endif |
| 123 | #endif |
| 124 | |
| 125 | static void crash(void) |
| 126 | { |
| 127 | assert(0); /* called only if cothread_t entrypoint returns */ |
| 128 | } |
| 129 | |
| 130 | cothread_t co_active(void) |
| 131 | { |
| 132 | if (!co_active_handle) |
| 133 | co_active_handle = &co_active_buffer; |
| 134 | return co_active_handle; |
| 135 | } |
| 136 | |
| 137 | cothread_t co_create(unsigned int size, void (*entrypoint)(void)) |
| 138 | { |
| 139 | cothread_t handle; |
| 140 | #ifndef CO_USE_INLINE_ASM |
| 141 | if (!co_swap) |
| 142 | { |
| 143 | co_init(); |
| 144 | co_swap = (void (*)(cothread_t, cothread_t))co_swap_function; |
| 145 | } |
| 146 | #endif |
| 147 | |
| 148 | if (!co_active_handle) |
| 149 | co_active_handle = &co_active_buffer; |
| 150 | size += 512; /* allocate additional space for storage */ |
| 151 | size &= ~15; /* align stack to 16-byte boundary */ |
| 152 | |
| 153 | #ifdef __GENODE__ |
| 154 | if ((handle = (cothread_t)genode_alloc_secondary_stack(size))) |
| 155 | { |
| 156 | long long *p = (long long*)((char*)handle); /* OS returns top of stack */ |
| 157 | *--p = (long long)crash; /* crash if entrypoint returns */ |
| 158 | *--p = (long long)entrypoint; /* start of function */ |
| 159 | *(long long*)handle = (long long)p; /* stack pointer */ |
| 160 | } |
| 161 | #else |
| 162 | if ((handle = (cothread_t)malloc(size))) |
| 163 | { |
| 164 | long long *p = (long long*)((char*)handle + size); /* seek to top of stack */ |
| 165 | *--p = (long long)crash; /* crash if entrypoint returns */ |
| 166 | *--p = (long long)entrypoint; /* start of function */ |
| 167 | *(long long*)handle = (long long)p; /* stack pointer */ |
| 168 | } |
| 169 | #endif |
| 170 | |
| 171 | return handle; |
| 172 | } |
| 173 | |
| 174 | void co_delete(cothread_t handle) |
| 175 | { |
| 176 | #ifdef __GENODE__ |
| 177 | genode_free_secondary_stack(handle); |
| 178 | #else |
| 179 | free(handle); |
| 180 | #endif |
| 181 | } |
| 182 | |
| 183 | #ifndef CO_USE_INLINE_ASM |
| 184 | void co_switch(cothread_t handle) |
| 185 | { |
| 186 | register cothread_t co_previous_handle = co_active_handle; |
| 187 | co_swap(co_active_handle = handle, co_previous_handle); |
| 188 | } |
| 189 | #else |
| 190 | #ifdef __APPLE__ |
| 191 | #define ASM_PREFIX "_" |
| 192 | #else |
| 193 | #define ASM_PREFIX "" |
| 194 | #endif |
| 195 | __asm__( |
| 196 | ".intel_syntax noprefix \n" |
| 197 | ".globl " ASM_PREFIX "co_switch \n" |
| 198 | ASM_PREFIX "co_switch: \n" |
| 199 | "mov rsi, [rip+" ASM_PREFIX "co_active_handle]\n" |
| 200 | "mov [rsi],rsp \n" |
| 201 | "mov [rsi+0x08],rbp \n" |
| 202 | "mov [rsi+0x10],rbx \n" |
| 203 | "mov [rsi+0x18],r12 \n" |
| 204 | "mov [rsi+0x20],r13 \n" |
| 205 | "mov [rsi+0x28],r14 \n" |
| 206 | "mov [rsi+0x30],r15 \n" |
| 207 | "mov [rip+" ASM_PREFIX "co_active_handle], rdi\n" |
| 208 | "mov rsp,[rdi] \n" |
| 209 | "mov rbp,[rdi+0x08] \n" |
| 210 | "mov rbx,[rdi+0x10] \n" |
| 211 | "mov r12,[rdi+0x18] \n" |
| 212 | "mov r13,[rdi+0x20] \n" |
| 213 | "mov r14,[rdi+0x28] \n" |
| 214 | "mov r15,[rdi+0x30] \n" |
| 215 | "ret \n" |
| 216 | ".att_syntax \n" |
| 217 | ); |
| 218 | #endif |
| 219 | |
| 220 | #ifdef __cplusplus |
| 221 | } |
| 222 | #endif |