+ int ret;
+
+#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
+ // the asm doesn't bother to save callee-save vector regs, so do it here
+ __asm__ __volatile__("":::"q4","q5","q6","q7");
+#endif
+
+ if (gpu.state.enhancement_active)
+ ret = gpu_parse_enhanced(&egpu, list, count * 4, (u32 *)last_cmd);
+ else
+ ret = gpu_parse(&egpu, list, count * 4, (u32 *)last_cmd);
+
+#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
+ __asm__ __volatile__("":::"q4","q5","q6","q7");
+#endif
+
+ ex_regs[1] &= ~0x1ff;
+ ex_regs[1] |= egpu.texture_settings & 0x1ff;
+ return ret;
+}
+
+#define ENHANCEMENT_BUF_SIZE (1024 * 1024 * 2 * 4 + 4096 * 2)
+
+static uint16_t *get_enhancement_bufer(int *x, int *y, int *w, int *h,
+ int *vram_h)
+{
+ uint16_t *ret = select_enhancement_buf_ptr(&egpu, *x);
+
+ *x *= 2;
+ *y *= 2;
+ *w = *w * 2;
+ *h = *h * 2;
+ *vram_h = 1024;
+ return ret;
+}
+
+static void map_enhancement_buffer(void)
+{
+ // currently we use 4x 1024*1024 buffers instead of single 2048*1024
+ // to be able to reuse 1024-width code better (triangle setup,
+ // dithering phase, lines).
+ egpu.enhancement_buf_ptr = gpu.mmap(ENHANCEMENT_BUF_SIZE);
+ if (egpu.enhancement_buf_ptr == NULL) {
+ fprintf(stderr, "failed to map enhancement buffer\n");
+ gpu.get_enhancement_bufer = NULL;
+ }
+ else {
+ egpu.enhancement_buf_ptr += 4096 / 2;
+ gpu.get_enhancement_bufer = get_enhancement_bufer;
+ }