#include "common.h"
#include "../../gpulib/gpu_timing.h"
+#include "../../gpulib/gpu.h"
#ifndef command_lengths
const u8 command_lengths[256] =
#define SET_Ex(r, v)
#endif
+static void textured_sprite(psx_gpu_struct *psx_gpu, const u32 *list,
+ s32 width, s32 height, u32 *cpu_cycles_sum, u32 *cpu_cycles)
+{
+ s32 x = sign_extend_11bit(list[1] + psx_gpu->offset_x);
+ s32 y = sign_extend_11bit((list[1] >> 16) + psx_gpu->offset_y);
+ u8 v = (list[2] >> 8) & 0xff;
+ u8 u = list[2] & 0xff;
+
+ set_clut(psx_gpu, list[2] >> 16);
+
+ render_sprite(psx_gpu, x, y, u, v, &width, &height, list[0] >> 24, list[0]);
+ gput_sum(*cpu_cycles_sum, *cpu_cycles, gput_sprite(width, height));
+}
+
u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command)
{
vertex_struct vertexes[4] __attribute__((aligned(16))) = {};
u32 current_command = 0, command_length;
u32 cpu_cycles_sum = 0, cpu_cycles = *cpu_cycles_last;
+ u32 siplified_prim[4*4];
u32 *list_start = list;
u32 *list_end = list + (size / 4);
case 0x2C ... 0x2F:
{
- set_clut(psx_gpu, list_s16[5]);
- set_texture(psx_gpu, list_s16[9]);
+ u32 i, simplified_count;
+ set_texture(psx_gpu, list[4] >> 16);
+ if ((simplified_count = prim_try_simplify_quad_t(siplified_prim, list)))
+ {
+ for (i = 0; i < simplified_count; i++) {
+ const u32 *list_ = &siplified_prim[i * 4];
+ textured_sprite(psx_gpu, list_, list_[3] & 0x3FF,
+ (list_[3] >> 16) & 0x1FF, &cpu_cycles_sum, &cpu_cycles);
+ }
+ break;
+ }
+
+ set_clut(psx_gpu, list[2] >> 16);
set_triangle_color(psx_gpu, list[0] & 0xFFFFFF);
get_vertex_data_xy_uv(0, 2);
case 0x3C ... 0x3F:
{
- set_clut(psx_gpu, list_s16[5]);
- set_texture(psx_gpu, list_s16[11]);
+ u32 i, simplified_count;
+ set_texture(psx_gpu, list[5] >> 16);
+ if ((simplified_count = prim_try_simplify_quad_gt(siplified_prim, list)))
+ {
+ for (i = 0; i < simplified_count; i++) {
+ const u32 *list_ = &siplified_prim[i * 4];
+ textured_sprite(psx_gpu, list_, list_[3] & 0x3FF,
+ (list_[3] >> 16) & 0x1FF, &cpu_cycles_sum, &cpu_cycles);
+ }
+ break;
+ }
+
+ set_clut(psx_gpu, list[2] >> 16);
get_vertex_data_xy_uv_rgb(0, 0);
get_vertex_data_xy_uv_rgb(1, 6);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
break;
}
-
- case 0x64 ... 0x67:
- {
- u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
- u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
- u32 uv = list_s16[4];
- s32 width = list_s16[6] & 0x3FF;
- s32 height = list_s16[7] & 0x1FF;
-
- set_clut(psx_gpu, list_s16[5]);
- render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF,
- &width, &height, current_command, list[0]);
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
+ case 0x64 ... 0x67:
+ textured_sprite(psx_gpu, list, list[3] & 0x3FF, (list[3] >> 16) & 0x1FF,
+ &cpu_cycles_sum, &cpu_cycles);
break;
- }
-
+
case 0x68 ... 0x6B:
{
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
break;
}
-
- case 0x74 ... 0x77:
- {
- s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
- s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
- u32 uv = list_s16[4];
- s32 width = 8, height = 8;
- set_clut(psx_gpu, list_s16[5]);
-
- render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF,
- &width, &height, current_command, list[0]);
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
+ case 0x74 ... 0x77:
+ textured_sprite(psx_gpu, list, 8, 8, &cpu_cycles_sum, &cpu_cycles);
break;
- }
-
+
case 0x78 ... 0x7B:
{
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
}
case 0x7C ... 0x7F:
- {
- s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
- s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
- u32 uv = list_s16[4];
- s32 width = 16, height = 16;
-
- set_clut(psx_gpu, list_s16[5]);
-
- render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF,
- &width, &height, current_command, list[0]);
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
+ textured_sprite(psx_gpu, list, 16, 16, &cpu_cycles_sum, &cpu_cycles);
break;
- }
#ifdef PCSX
case 0x1F: // irq?
}
#endif
+static void textured_sprite_enh(psx_gpu_struct *psx_gpu, const u32 *list,
+ s32 width, s32 height, u32 *cpu_cycles_sum, u32 *cpu_cycles)
+{
+ s32 x = sign_extend_11bit(list[1] + psx_gpu->offset_x);
+ s32 y = sign_extend_11bit((list[1] >> 16) + psx_gpu->offset_y);
+ s32 width_b = width, height_b = height;
+ u8 v = (list[2] >> 8) & 0xff;
+ u8 u = list[2] & 0xff;
+
+ set_clut(psx_gpu, list[2] >> 16);
+
+ render_sprite(psx_gpu, x, y, u, v, &width, &height, list[0] >> 24, list[0]);
+ gput_sum(*cpu_cycles_sum, *cpu_cycles, gput_sprite(width, height));
+
+ if (check_enhanced_range(psx_gpu, x, x + width))
+ do_sprite_enhanced(psx_gpu, x, y, u, v, width_b, height_b, list[0]);
+}
+
u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command)
{
vertex_struct vertexes[4] __attribute__((aligned(16))) = {};
u32 current_command = 0, command_length;
u32 cpu_cycles_sum = 0, cpu_cycles = *cpu_cycles_last;
+ u32 siplified_prim[4*4];
u32 *list_start = list;
u32 *list_end = list + (size / 4);
case 0x2C ... 0x2F:
{
- set_clut(psx_gpu, list_s16[5]);
- set_texture(psx_gpu, list_s16[9]);
+ u32 i, simplified_count;
+ set_texture(psx_gpu, list[4] >> 16);
+ if ((simplified_count = prim_try_simplify_quad_t(siplified_prim, list)))
+ {
+ for (i = 0; i < simplified_count; i++) {
+ const u32 *list_ = &siplified_prim[i * 4];
+ textured_sprite_enh(psx_gpu, list_, list_[3] & 0x3FF,
+ (list_[3] >> 16) & 0x1FF, &cpu_cycles_sum, &cpu_cycles);
+ }
+ break;
+ }
+
+ set_clut(psx_gpu, list[2] >> 16);
set_triangle_color(psx_gpu, list[0] & 0xFFFFFF);
get_vertex_data_xy_uv(0, 2);
case 0x3C ... 0x3F:
{
- set_clut(psx_gpu, list_s16[5]);
- set_texture(psx_gpu, list_s16[11]);
+ u32 i, simplified_count;
+ set_texture(psx_gpu, list[5] >> 16);
+ if ((simplified_count = prim_try_simplify_quad_gt(siplified_prim, list)))
+ {
+ for (i = 0; i < simplified_count; i++) {
+ const u32 *list_ = &siplified_prim[i * 4];
+ textured_sprite_enh(psx_gpu, list_, list_[3] & 0x3FF,
+ (list_[3] >> 16) & 0x1FF, &cpu_cycles_sum, &cpu_cycles);
+ }
+ break;
+ }
+
+ set_clut(psx_gpu, list[2] >> 16);
get_vertex_data_xy_uv_rgb(0, 0);
get_vertex_data_xy_uv_rgb(1, 6);
}
break;
}
-
- case 0x64 ... 0x67:
- {
- u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
- u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
- u8 u = list_s16[4];
- u8 v = list_s16[4] >> 8;
- s32 width = list_s16[6] & 0x3FF;
- s32 height = list_s16[7] & 0x1FF;
-
- set_clut(psx_gpu, list_s16[5]);
- render_sprite(psx_gpu, x, y, u, v,
- &width, &height, current_command, list[0]);
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
-
- if (check_enhanced_range(psx_gpu, x, x + width)) {
- width = list_s16[6] & 0x3FF;
- height = list_s16[7] & 0x1FF;
- do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]);
- }
+ case 0x64 ... 0x67:
+ textured_sprite_enh(psx_gpu, list, list[3] & 0x3FF, (list[3] >> 16) & 0x1FF,
+ &cpu_cycles_sum, &cpu_cycles);
break;
- }
-
+
case 0x68 ... 0x6B:
{
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
do_sprite_enhanced(psx_gpu, x, y, 0, 0, 8, 8, list[0]);
break;
}
-
- case 0x74 ... 0x77:
- {
- s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
- s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
- u8 u = list_s16[4];
- u8 v = list_s16[4] >> 8;
- s32 width = 8, height = 8;
- set_clut(psx_gpu, list_s16[5]);
-
- render_sprite(psx_gpu, x, y, u, v,
- &width, &height, current_command, list[0]);
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
-
- if (check_enhanced_range(psx_gpu, x, x + 8))
- do_sprite_enhanced(psx_gpu, x, y, u, v, 8, 8, list[0]);
+ case 0x74 ... 0x77:
+ textured_sprite_enh(psx_gpu, list, 8, 8, &cpu_cycles_sum, &cpu_cycles);
break;
- }
-
+
case 0x78 ... 0x7B:
{
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
do_sprite_enhanced(psx_gpu, x, y, 0, 0, 16, 16, list[0]);
break;
}
-
- case 0x7C ... 0x7F:
- {
- s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
- s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
- u8 u = list_s16[4];
- u8 v = list_s16[4] >> 8;
- s32 width = 16, height = 16;
- set_clut(psx_gpu, list_s16[5]);
-
- render_sprite(psx_gpu, x, y, u, v,
- &width, &height, current_command, list[0]);
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
-
- if (check_enhanced_range(psx_gpu, x, x + 16))
- do_sprite_enhanced(psx_gpu, x, y, u, v, 16, 16, list[0]);
+ case 0x7C ... 0x7F:
+ textured_sprite_enh(psx_gpu, list, 16, 16, &cpu_cycles_sum, &cpu_cycles);
break;
- }
case 0x80 ... 0x9F: // vid -> vid
case 0xA0 ... 0xBF: // sys -> vid
#endif
#include "../gpulib/gpu_timing.h"
+
+static inline void textured_sprite(int &cpu_cycles_sum, int &cpu_cycles)
+{
+ u32 PRIM = le32_to_u32(gpu_unai.PacketBuffer.U4[0]) >> 24;
+ gpuSetCLUT(le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+ u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
+ s32 w = 0, h = 0;
+
+ //senquack - Only color 808080h-878787h allows skipping lighting calculation:
+ // This fixes Silent Hill running animation on loading screens:
+ // (On PSX, color values 0x00-0x7F darken the source texture's color,
+ // 0x81-FF lighten textures (ultimately clamped to 0x1F),
+ // 0x80 leaves source texture color unchanged, HOWEVER,
+ // gpu_unai uses a simple lighting LUT whereby only the upper
+ // 5 bits of an 8-bit color are used, so 0x80-0x87 all behave as
+ // 0x80.
+ //
+ // NOTE: I've changed all textured sprite draw commands here and
+ // elsewhere to use proper behavior, but left poly commands
+ // alone, I don't want to slow rendering down too much. (TODO)
+ //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
+ // Strip lower 3 bits of each color and determine if lighting should be used:
+ if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
+ driver_idx |= Lighting;
+ PS driver = gpuSpriteDrivers[driver_idx];
+ PtrUnion packet = { .ptr = (void*)&gpu_unai.PacketBuffer };
+ gpuDrawS(packet, driver, &w, &h);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
+}
+
extern const unsigned char cmd_lengths[256];
int do_cmd_list(u32 *list_, int list_len,
case 0x2D:
case 0x2E:
case 0x2F: { // Textured 4-pt poly
- gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+ u32 simplified_count;
gpuSetTexture(le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16);
+ if ((simplified_count = prim_try_simplify_quad_t(gpu_unai.PacketBuffer.U4,
+ gpu_unai.PacketBuffer.U4)))
+ {
+ for (i = 0;; ) {
+ textured_sprite(cpu_cycles_sum, cpu_cycles);
+ if (++i >= simplified_count)
+ break;
+ memcpy(&gpu_unai.PacketBuffer.U4[0], &gpu_unai.PacketBuffer.U4[i * 4], 16);
+ }
+ break;
+ }
+ gpuSetCLUT(le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
u32 driver_idx =
//(gpu_unai.blit_mask?1024:0) |
case 0x3D:
case 0x3E:
case 0x3F: { // Gouraud-shaded, textured 4-pt poly
- gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
- gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16);
+ u32 simplified_count;
+ gpuSetTexture(le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16);
+ if ((simplified_count = prim_try_simplify_quad_gt(gpu_unai.PacketBuffer.U4,
+ gpu_unai.PacketBuffer.U4)))
+ {
+ for (i = 0;; ) {
+ textured_sprite(cpu_cycles_sum, cpu_cycles);
+ if (++i >= simplified_count)
+ break;
+ memcpy(&gpu_unai.PacketBuffer.U4[0], &gpu_unai.PacketBuffer.U4[i * 4], 16);
+ }
+ break;
+ }
+ gpuSetCLUT(le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
PP driver = gpuPolySpanDrivers[
//(gpu_unai.blit_mask?1024:0) |
Dithering |
case 0x64:
case 0x65:
case 0x66:
- case 0x67: { // Textured rectangle (variable size)
- gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
- u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
- s32 w = 0, h = 0;
-
- //senquack - Only color 808080h-878787h allows skipping lighting calculation:
- // This fixes Silent Hill running animation on loading screens:
- // (On PSX, color values 0x00-0x7F darken the source texture's color,
- // 0x81-FF lighten textures (ultimately clamped to 0x1F),
- // 0x80 leaves source texture color unchanged, HOWEVER,
- // gpu_unai uses a simple lighting LUT whereby only the upper
- // 5 bits of an 8-bit color are used, so 0x80-0x87 all behave as
- // 0x80.
- //
- // NOTE: I've changed all textured sprite draw commands here and
- // elsewhere to use proper behavior, but left poly commands
- // alone, I don't want to slow rendering down too much. (TODO)
- //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
- // Strip lower 3 bits of each color and determine if lighting should be used:
- if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
- driver_idx |= Lighting;
- PS driver = gpuSpriteDrivers[driver_idx];
- gpuDrawS(packet, driver, &w, &h);
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
- } break;
+ case 0x67: // Textured rectangle (variable size)
+ textured_sprite(cpu_cycles_sum, cpu_cycles);
+ break;
case 0x68:
case 0x69:
case 0x76:
case 0x77: { // Textured rectangle (8x8)
gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00080008);
- gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
- u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
- s32 w = 0, h = 0;
-
- //senquack - Only color 808080h-878787h allows skipping lighting calculation:
- //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
- // Strip lower 3 bits of each color and determine if lighting should be used:
- if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
- driver_idx |= Lighting;
- PS driver = gpuSpriteDrivers[driver_idx];
- gpuDrawS(packet, driver, &w, &h);
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
+ textured_sprite(cpu_cycles_sum, cpu_cycles);
} break;
case 0x78:
case 0x7E:
case 0x7F: { // Textured rectangle (16x16)
gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00100010);
- gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
- u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
- s32 w = 0, h = 0;
- //senquack - Only color 808080h-878787h allows skipping lighting calculation:
- //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
- // Strip lower 3 bits of each color and determine if lighting should be used:
- if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
- driver_idx |= Lighting;
- PS driver = gpuSpriteDrivers[driver_idx];
- gpuDrawS(packet, driver, &w, &h);
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
+ textured_sprite(cpu_cycles_sum, cpu_cycles);
} break;
#ifdef TEST
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "../../include/compiler_features.h"
+#include "gpu.h"
+
+// retain neon's ability to sample textures pixel-perfectly
+#ifdef GPU_NEON
+#define STRICT
+#endif
+
+struct vert_t
+{
+ union {
+ struct {
+ int16_t x, y;
+ };
+ uint32_t xy;
+ };
+ union {
+ struct {
+ uint8_t u, v;
+ int16_t clut;
+ };
+ uint32_t uvclut;
+ };
+};
+
+// gt ~ gouraud textured
+struct vert_gt
+{
+ uint32_t rgb;
+ struct vert_t t;
+};
+
+struct quad_t
+{
+ uint32_t rgb_c;
+ struct vert_t v[4];
+};
+
+struct quad_gt
+{
+ struct vert_gt v[4];
+};
+
+struct sprite
+{
+ uint32_t rgb_c;
+ union {
+ struct {
+ int16_t x, y;
+ };
+ uint32_t xy;
+ };
+ union {
+ struct {
+ uint8_t u, v;
+ int16_t clut;
+ };
+ uint32_t uvclut;
+ };
+ int16_t w, h;
+};
+
+// debug
+#if 0
+static void log_quad_t(const struct quad_t *q, int ret)
+{
+#if 1
+ printf("quad_t %08x", q->rgb_c);
+ int i;
+ for (i = 0; i < 4; i++)
+ printf(" | %3d,%3d %3d,%3d",
+ q->v[i].x, q->v[i].y, q->v[i].u, q->v[i].v);
+ printf(" -> %d\n", ret);
+#endif
+}
+
+static void log_quad_gt(const struct vert_gt *v, int ret)
+{
+#if 1
+ printf("quad_gt %02x", v[0].rgb >> 24);
+ int i;
+ for (i = 0; i < 4; i++)
+ printf(" | %3d,%3d %3d,%3d %06x",
+ v[i].t.x, v[i].t.y, v[i].t.u, v[i].t.v, v[i].rgb & 0xffffff);
+ printf(" -> %d\n", ret);
+#endif
+}
+
+int prim_try_simplify_quad_t_(void *simplified, const void *prim_);
+int prim_try_simplify_quad_t(void *simplified, const void *prim_)
+{
+ struct quad_t prim = *(struct quad_t *)prim_;
+ int ret = prim_try_simplify_quad_t_(simplified, prim_);
+ #define prim_try_simplify_quad_t prim_try_simplify_quad_t_
+ ///if (!ret)
+ log_quad_t(&prim, ret);
+ return ret;
+}
+
+int prim_try_simplify_quad_gt_(void *simplified, const void *prim_);
+int prim_try_simplify_quad_gt(void *simplified, const void *prim_)
+{
+ struct quad_gt prim = *(struct quad_gt *)prim_;
+ int ret = prim_try_simplify_quad_gt_(simplified, prim_);
+ #define prim_try_simplify_quad_gt prim_try_simplify_quad_gt_
+ ///if (!ret)
+ log_quad_gt(prim.v, ret);
+ return ret;
+}
+#endif // debug
+
+static noinline int simplify_quad_t(void *simplified, const struct vert_t *v,
+ int xd, int ud, int yd, int vd, uint32_t rgb_c, uint16_t clut)
+{
+ struct sprite *s = simplified;
+ int ret = 1;
+ rgb_c &= HTOLE32(0x03ffffff);
+ rgb_c |= HTOLE32(0x64000000);
+ xd = abs(xd);
+ ud = abs(ud);
+ s[0].rgb_c = rgb_c;
+ s[0].xy = v->xy;
+ s[0].u = v->u;
+ s[0].v = v->v;
+ s[0].clut = clut;
+ s[0].w = HTOLE16(xd);
+ s[0].h = HTOLE16(yd);
+#ifndef STRICT
+ if (xd != ud) {
+ int mid = xd / 2;
+ s[0].w = HTOLE16(mid);
+ s[1].rgb_c = rgb_c;
+ s[1].x = HTOLE16(LE16TOH(s[0].x) + mid);
+ s[1].y = s[0].y;
+ s[1].u = s[0].u + mid + ud - xd;
+ s[1].v = s[0].v;
+ s[1].clut = clut;
+ s[1].w = HTOLE16(xd - mid);
+ s[1].h = s[0].h;
+ ret = 2;
+ }
+ if (yd != vd) {
+ int i, mid = yd / 2, y = LE16TOH(s[0].y);
+ memcpy(s + ret, s, sizeof(s[0]) * ret);
+ for (i = 0; i < ret; i++) {
+ s[i].h = HTOLE16(mid);
+ s[ret+i].y = HTOLE16(y + mid);
+ s[ret+i].h = HTOLE16(yd - mid);
+ s[ret+i].v = s[0].v + mid + vd - yd;
+ }
+ ret *= 2;
+ }
+#endif
+ return ret;
+}
+
+// this is split to reduce gcc spilling
+static noinline int prim_try_simplify_quad_t2(void *simplified,
+ const struct vert_t *v, uint32_t rgb_c)
+{
+ do {
+ int yd = LE16TOH(v[2].y) - LE16TOH(v[0].y);
+ int xd, ud, vd;
+ if (yd < 0)
+ break;
+ xd = LE16TOH(v[1].x) - LE16TOH(v[0].x);
+ ud = LE16TOH(v[1].u) - LE16TOH(v[0].u);
+ vd = LE16TOH(v[2].v) - LE16TOH(v[0].v);
+#ifdef STRICT
+ if (xd != ud || yd != vd)
+#else
+ if (abs(xd - ud) > 1 || abs(yd - vd) > 1)
+#endif
+ break;
+ return simplify_quad_t(simplified, xd < 0 ? &v[1] : &v[0],
+ xd, ud, yd, vd, rgb_c, v[0].clut);
+ }
+ while (0);
+ return 0;
+}
+
+static noinline int prim_try_simplify_quad_gt2(void *simplified,
+ const struct vert_gt *v)
+{
+ do {
+ int yd = LE16TOH(v[2].t.y) - LE16TOH(v[0].t.y);
+ int xd, ud, vd;
+ if (yd < 0)
+ break;
+ xd = LE16TOH(v[1].t.x) - LE16TOH(v[0].t.x);
+ ud = LE16TOH(v[1].t.u) - LE16TOH(v[0].t.u);
+ vd = LE16TOH(v[2].t.v) - LE16TOH(v[0].t.v);
+#ifdef STRICT
+ if (xd != ud || yd != vd)
+#else
+ if (abs(xd - ud) > 1 || abs(yd - vd) > 1)
+#endif
+ break;
+ if (!(v[0].rgb & HTOLE32(1 << 24))) { // modulation/"lighting"
+ uint32_t i, xor = 0, rgb0 = v[0].rgb;
+ for (i = 1; i < 4; i++)
+ xor |= rgb0 ^ v[i].rgb;
+ if (xor & HTOLE32(0xf8f8f8))
+ break;
+ }
+ return simplify_quad_t(simplified, xd < 0 ? &v[1].t : &v[0].t,
+ xd, ud, yd, vd, v[0].rgb, v[0].t.clut);
+ }
+ while (0);
+ return 0;
+}
+
+// 2c-2f
+int prim_try_simplify_quad_t(void *simplified, const void *prim_)
+{
+ const struct quad_t *prim = prim_;
+ const struct vert_t *v = prim->v;
+ int ret = 0;
+ do {
+ if (v[0].y != v[1].y || v[0].x != v[2].x || v[2].y != v[3].y || v[1].x != v[3].x)
+ break;
+ if (v[0].v != v[1].v || v[0].u != v[2].u || v[2].v != v[3].v || v[1].u != v[3].u)
+ break;
+ ret = prim_try_simplify_quad_t2(simplified, v, prim->rgb_c);
+ }
+ while (0);
+ return ret;
+}
+
+// 3c-3f
+int prim_try_simplify_quad_gt(void *simplified, const void *prim)
+{
+ const struct vert_gt *v = prim;
+ int ret = 0;
+ do {
+ if (v[0].t.y != v[1].t.y || v[0].t.x != v[2].t.x || v[2].t.y != v[3].t.y || v[1].t.x != v[3].t.x)
+ break;
+ if (v[0].t.v != v[1].t.v || v[0].t.u != v[2].t.u || v[2].t.v != v[3].t.v || v[1].t.u != v[3].t.u)
+ break;
+ ret = prim_try_simplify_quad_gt2(simplified, v);
+ }
+ while (0);
+ return ret;
+}
+
+// vim:shiftwidth=2:expandtab