- const int step_x = 1024 / sizeof(egpu.enhancement_buf_by_x16);
- u16 *src, *dst;
- int w1, fb_index;
-
- w += x & (step_x - 1);
- x &= ~(step_x - 1);
- w = (w + step_x - 1) & ~(step_x - 1);
- if (y + h > 512)
- h = 512 - y;
-
- while (w > 0) {
- fb_index = egpu.enhancement_buf_by_x16[x / step_x];
- for (w1 = 0; w > 0; w1++, w -= step_x)
- if (fb_index != egpu.enhancement_buf_by_x16[x / step_x + w1])
- break;
-
- src = gpu.vram + y * 1024 + x;
- dst = select_enhancement_buf_ptr(&egpu, x);
- dst += (y * 1024 + x) * 2;
- scale2x_tiles8(dst, src, w1 * step_x / 8, h);
-
- x += w1 * step_x;
+ int i, right = x + w, bottom = y + h;
+ const u16 *src = gpu.vram;
+ // use these because the scanout struct may hold reduced w, h
+ // due to intersection stuff, see the update_enhancement_buf_scanouts() mess
+ int s_w = max(gpu.screen.hres, gpu.screen.w);
+ int s_h = gpu.screen.vres;
+ s_w = min(s_w, 512);
+ for (i = 0; i < ARRAY_SIZE(egpu.enhancement_scanouts); i++) {
+ const struct psx_gpu_scanout *s = &egpu.enhancement_scanouts[i];
+ u16 *dst = select_enhancement_buf_by_index(&egpu, i);
+ int x1, x2, y1, y2;
+ if (s->w == 0) continue;
+ if (s->x >= right) continue;
+ if (s->x + s_w <= x) continue;
+ if (s->y >= bottom) continue;
+ if (s->y + s_h <= y) continue;
+ x1 = max(x, s->x);
+ x2 = min(right, s->x + s_w);
+ y1 = max(y, s->y);
+ y2 = min(bottom, s->y + s_h);
+ // 16-byte align for the asm version
+ x2 += x1 & 7;
+ x1 &= ~7;
+ scale2x_tiles8(dst + y1 * 1024*2 + x1 * 2,
+ src + y1 * 1024 + x1, (x2 - x1 + 7) / 8u, y2 - y1);