From: Paul Cercueil Date: Wed, 30 Aug 2023 14:36:56 +0000 (+0200) Subject: unai: Add support for big-endian X-Git-Tag: r24l~207^2 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4949d4fff530344f0f4a1676bc45d0203749f291;p=pcsx_rearmed.git unai: Add support for big-endian Fix VRAM and PacketBuffer accesses to always go through endianness conversion. This should fix most of Unai on big-endian systems. I think it is not 100% complete as RGB888 frames are still rendered in the host's endian - but in my case the emulator (WiiSX) expects a host endian format for RGB888 so it still works. Signed-off-by: Paul Cercueil --- diff --git a/plugins/gpu_unai/gpu_command.h b/plugins/gpu_unai/gpu_command.h index c39c81b3..cf6b62b4 100644 --- a/plugins/gpu_unai/gpu_command.h +++ b/plugins/gpu_unai/gpu_command.h @@ -45,13 +45,13 @@ void gpuSetTexture(u16 tpage) gpu_unai.BLEND_MODE = ((tpage>>5) & 3) << 3; gpu_unai.TEXT_MODE = (tmode + 1) << 5; // gpu_unai.TEXT_MODE should be values 1..3, so add one - gpu_unai.TBA = &((u16*)gpu_unai.vram)[FRAME_OFFSET(tx, ty)]; + gpu_unai.TBA = &gpu_unai.vram[FRAME_OFFSET(tx, ty)]; } /////////////////////////////////////////////////////////////////////////////// INLINE void gpuSetCLUT(u16 clut) { - gpu_unai.CBA = &((u16*)gpu_unai.vram)[(clut & 0x7FFF) << 4]; + gpu_unai.CBA = &gpu_unai.vram[(clut & 0x7FFF) << 4]; } #ifdef ENABLE_GPU_NULL_SUPPORT @@ -193,8 +193,8 @@ void gpuSendPacketFunction(const int PRIM) if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); - gpuSetTexture (gpu_unai.PacketBuffer.U4[4] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16); u32 driver_idx = (gpu_unai.blit_mask?1024:0) | @@ -241,8 +241,8 @@ void gpuSendPacketFunction(const int PRIM) if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); - gpuSetTexture (gpu_unai.PacketBuffer.U4[4] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16); u32 driver_idx = (gpu_unai.blit_mask?1024:0) | @@ -294,8 +294,8 @@ void gpuSendPacketFunction(const int PRIM) if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); - gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16); PP driver = gpuPolySpanDrivers[ (gpu_unai.blit_mask?1024:0) | Dithering | @@ -335,8 +335,8 @@ void gpuSendPacketFunction(const int PRIM) if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); - gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16); PP driver = gpuPolySpanDrivers[ (gpu_unai.blit_mask?1024:0) | Dithering | @@ -383,7 +383,7 @@ void gpuSendPacketFunction(const int PRIM) gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawLineF(0x%x)\n",PRIM)); } - if ((gpu_unai.PacketBuffer.U4[3] & 0xF000F000) != 0x50005000) + if ((le32_raw(gpu_unai.PacketBuffer.U4[3]) & HTOLE32(0xF000F000)) != HTOLE32(0x50005000)) { gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[2]; gpu_unai.PacketBuffer.U4[2] = gpu_unai.PacketBuffer.U4[3]; @@ -430,7 +430,7 @@ void gpuSendPacketFunction(const int PRIM) gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawLineG(0x%x)\n",PRIM)); } - if ((gpu_unai.PacketBuffer.U4[4] & 0xF000F000) != 0x50005000) + if ((le32_raw(gpu_unai.PacketBuffer.U4[4]) & HTOLE32(0xF000F000)) != HTOLE32(0x50005000)) { gpu_unai.PacketBuffer.U1[3 + (2 * 4)] = gpu_unai.PacketBuffer.U1[3 + (0 * 4)]; gpu_unai.PacketBuffer.U4[0] = gpu_unai.PacketBuffer.U4[2]; @@ -462,7 +462,7 @@ void gpuSendPacketFunction(const int PRIM) if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); // This fixes Silent Hill running animation on loading screens: @@ -478,7 +478,7 @@ void gpuSendPacketFunction(const int PRIM) // alone, I don't want to slow rendering down too much. (TODO) //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) // Strip lower 3 bits of each color and determine if lighting should be used: - if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver); @@ -494,7 +494,7 @@ void gpuSendPacketFunction(const int PRIM) if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpu_unai.PacketBuffer.U4[2] = 0x00010001; + gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00010001); PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); gpu_unai.fb_dirty = true; @@ -509,7 +509,7 @@ void gpuSendPacketFunction(const int PRIM) if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpu_unai.PacketBuffer.U4[2] = 0x00080008; + gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00080008); PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); gpu_unai.fb_dirty = true; @@ -524,14 +524,14 @@ void gpuSendPacketFunction(const int PRIM) if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpu_unai.PacketBuffer.U4[3] = 0x00080008; - gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00080008); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); //senquack - Only color 808080h-878787h allows skipping lighting calculation: //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) // Strip lower 3 bits of each color and determine if lighting should be used: - if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver); @@ -547,7 +547,7 @@ void gpuSendPacketFunction(const int PRIM) if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpu_unai.PacketBuffer.U4[2] = 0x00100010; + gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00100010); PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); gpu_unai.fb_dirty = true; @@ -561,7 +561,7 @@ void gpuSendPacketFunction(const int PRIM) /* Notaz 4bit sprites optimization */ if ((!gpu_unai.frameskip.skipGPU) && (!(gpu_unai.GPU_GP1&0x180)) && (!(gpu_unai.Masking|gpu_unai.PixelMSB))) { - gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); gpuDrawS16(packet); gpu_unai.fb_dirty = true; break; @@ -572,14 +572,14 @@ void gpuSendPacketFunction(const int PRIM) if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpu_unai.PacketBuffer.U4[3] = 0x00100010; - gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00100010); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); //senquack - Only color 808080h-878787h allows skipping lighting calculation: //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) // Strip lower 3 bits of each color and determine if lighting should be used: - if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver); @@ -609,7 +609,7 @@ void gpuSendPacketFunction(const int PRIM) DO_LOG(("gpuStoreImage(0x%x)\n",PRIM)); break; case 0xE1 ... 0xE6: { // Draw settings - gpuGP0Cmd_0xEx(gpu_unai, gpu_unai.PacketBuffer.U4[0]); + gpuGP0Cmd_0xEx(gpu_unai, le32_to_u32(gpu_unai.PacketBuffer.U4[0])); } break; } } diff --git a/plugins/gpu_unai/gpu_inner.h b/plugins/gpu_unai/gpu_inner.h index 4aab6042..eb209ef4 100644 --- a/plugins/gpu_unai/gpu_inner.h +++ b/plugins/gpu_unai/gpu_inner.h @@ -123,12 +123,8 @@ static inline u16 gpuGouraudColor15bpp(u32 r, u32 g, u32 b) // rectangles) to use the same set of functions. Since tiles are always // monochrome, they simply wouldn't use the extra set of 32 gouraud-shaded // gpuPixelSpanFn functions (TODO?). -// -// NOTE: While the PS1 framebuffer is 16 bit, we use 8-bit pointers here, -// so that pDst can be incremented directly by 'incr' parameter -// without having to shift it before use. template -static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) +static le16_t* gpuPixelSpanFn(le16_t* pDst, uintptr_t data, ptrdiff_t incr, size_t len) { // Blend func can save an operation if it knows uSrc MSB is // unset. For untextured prims, this is always true. @@ -139,6 +135,9 @@ static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) u32 r, g, b; s32 r_incr, g_incr, b_incr; + // Caller counts in bytes, we count in pixels + incr /= 2; + if (CF_GOURAUD) { gcPtr = (GouraudColor*)data; r = gcPtr->r; r_incr = gcPtr->r_incr; @@ -152,15 +151,15 @@ static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) if (!CF_GOURAUD) { // NO GOURAUD if (!CF_MASKCHECK && !CF_BLEND) { - if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; } - else { *(u16*)pDst = col; } + if (CF_MASKSET) { *pDst = u16_to_le16(col | 0x8000); } + else { *pDst = u16_to_le16(col); } } else if (CF_MASKCHECK && !CF_BLEND) { - if (!(*(u16*)pDst & 0x8000)) { - if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; } - else { *(u16*)pDst = col; } + if (!(le16_raw(*pDst) & HTOLE16(0x8000))) { + if (CF_MASKSET) { *pDst = u16_to_le16(col | 0x8000); } + else { *pDst = u16_to_le16(col); } } } else { - uint_fast16_t uDst = *(u16*)pDst; + uint_fast16_t uDst = le16_to_u16(*pDst); if (CF_MASKCHECK) { if (uDst & 0x8000) goto endpixel; } uint_fast16_t uSrc = col; @@ -168,8 +167,8 @@ static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) if (CF_BLEND) uSrc = gpuBlending(uSrc, uDst); - if (CF_MASKSET) { *(u16*)pDst = uSrc | 0x8000; } - else { *(u16*)pDst = uSrc; } + if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } + else { *pDst = u16_to_le16(uSrc); } } } else @@ -177,16 +176,16 @@ static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) if (!CF_MASKCHECK && !CF_BLEND) { col = gpuGouraudColor15bpp(r, g, b); - if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; } - else { *(u16*)pDst = col; } + if (CF_MASKSET) { *pDst = u16_to_le16(col | 0x8000); } + else { *pDst = u16_to_le16(col); } } else if (CF_MASKCHECK && !CF_BLEND) { col = gpuGouraudColor15bpp(r, g, b); - if (!(*(u16*)pDst & 0x8000)) { - if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; } - else { *(u16*)pDst = col; } + if (!(le16_raw(*pDst) & HTOLE16(0x8000))) { + if (CF_MASKSET) { *pDst = u16_to_le16(col | 0x8000); } + else { *pDst = u16_to_le16(col); } } } else { - uint_fast16_t uDst = *(u16*)pDst; + uint_fast16_t uDst = le16_to_u16(*pDst); if (CF_MASKCHECK) { if (uDst & 0x8000) goto endpixel; } col = gpuGouraudColor15bpp(r, g, b); @@ -199,8 +198,8 @@ static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) if (CF_BLEND) uSrc = gpuBlending(uSrc, uDst); - if (CF_MASKSET) { *(u16*)pDst = uSrc | 0x8000; } - else { *(u16*)pDst = uSrc; } + if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } + else { *pDst = u16_to_le16(uSrc); } } } @@ -228,7 +227,7 @@ endpixel: return pDst; } -static u8* PixelSpanNULL(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) +static le16_t* PixelSpanNULL(le16_t* pDst, uintptr_t data, ptrdiff_t incr, size_t len) { #ifdef ENABLE_GPU_LOG_SUPPORT fprintf(stdout,"PixelSpanNULL()\n"); @@ -238,7 +237,7 @@ static u8* PixelSpanNULL(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) /////////////////////////////////////////////////////////////////////////////// // PixelSpan (lines) innerloops driver -typedef u8* (*PSD)(u8* dst, uintptr_t data, ptrdiff_t incr, size_t len); +typedef le16_t* (*PSD)(le16_t* dst, uintptr_t data, ptrdiff_t incr, size_t len); const PSD gpuPixelSpanDrivers[64] = { @@ -282,14 +281,26 @@ const PSD gpuPixelSpanDrivers[64] = // GPU Tiles innerloops generator template -static void gpuTileSpanFn(u16 *pDst, u32 count, u16 data) +static void gpuTileSpanFn(le16_t *pDst, u32 count, u16 data) { + le16_t ldata; + if (!CF_MASKCHECK && !CF_BLEND) { - if (CF_MASKSET) { data = data | 0x8000; } - do { *pDst++ = data; } while (--count); + if (CF_MASKSET) + ldata = u16_to_le16(data | 0x8000); + else + ldata = u16_to_le16(data); + do { *pDst++ = ldata; } while (--count); } else if (CF_MASKCHECK && !CF_BLEND) { - if (CF_MASKSET) { data = data | 0x8000; } - do { if (!(*pDst&0x8000)) { *pDst = data; } pDst++; } while (--count); + if (CF_MASKSET) + ldata = u16_to_le16(data | 0x8000); + else + ldata = u16_to_le16(data); + do { + if (!(le16_raw(*pDst) & HTOLE16(0x8000))) + *pDst = ldata; + pDst++; + } while (--count); } else { // Blend func can save an operation if it knows uSrc MSB is @@ -299,16 +310,16 @@ static void gpuTileSpanFn(u16 *pDst, u32 count, u16 data) uint_fast16_t uSrc, uDst; do { - if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; } - if (CF_MASKCHECK) { if (uDst&0x8000) goto endtile; } + if (CF_MASKCHECK || CF_BLEND) { uDst = le16_to_u16(*pDst); } + if (CF_MASKCHECK) if (uDst&0x8000) { goto endtile; } uSrc = data; if (CF_BLEND) uSrc = gpuBlending(uSrc, uDst); - if (CF_MASKSET) { *pDst = uSrc | 0x8000; } - else { *pDst = uSrc; } + if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } + else { *pDst = u16_to_le16(uSrc); } //senquack - Did not apply "Silent Hill" mask-bit fix to here. // It is hard to tell from scarce documentation available and @@ -322,7 +333,7 @@ endtile: } } -static void TileNULL(u16 *pDst, u32 count, u16 data) +static void TileNULL(le16_t *pDst, u32 count, u16 data) { #ifdef ENABLE_GPU_LOG_SUPPORT fprintf(stdout,"TileNULL()\n"); @@ -331,7 +342,7 @@ static void TileNULL(u16 *pDst, u32 count, u16 data) /////////////////////////////////////////////////////////////////////////////// // Tiles innerloops driver -typedef void (*PT)(u16 *pDst, u32 count, u16 data); +typedef void (*PT)(le16_t *pDst, u32 count, u16 data); // Template instantiation helper macros #define TI(cf) gpuTileSpanFn<(cf)> @@ -355,7 +366,7 @@ const PT gpuTileSpanDrivers[32] = { // GPU Sprites innerloops generator template -static void gpuSpriteSpanFn(u16 *pDst, u32 count, u8* pTxt, u32 u0) +static void gpuSpriteSpanFn(le16_t *pDst, u32 count, u8* pTxt, u32 u0) { // Blend func can save an operation if it knows uSrc MSB is unset. // Untextured prims can always skip (source color always comes with MSB=0). @@ -378,22 +389,22 @@ static void gpuSpriteSpanFn(u16 *pDst, u32 count, u8* pTxt, u32 u0) u0_mask <<= 1; } - const u16 *CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA; + const le16_t *CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA; do { - if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; } + if (CF_MASKCHECK || CF_BLEND) { uDst = le16_to_u16(*pDst); } if (CF_MASKCHECK) if (uDst&0x8000) { goto endsprite; } if (CF_TEXTMODE==1) { // 4bpp (CLUT) u8 rgb = pTxt[(u0 & u0_mask)>>1]; - uSrc = CBA_[(rgb>>((u0&1)<<2))&0xf]; + uSrc = le16_to_u16(CBA_[(rgb>>((u0&1)<<2))&0xf]); } if (CF_TEXTMODE==2) { // 8bpp (CLUT) - uSrc = CBA_[pTxt[u0 & u0_mask]]; + uSrc = le16_to_u16(CBA_[pTxt[u0 & u0_mask]]); } if (CF_TEXTMODE==3) { // 16bpp - uSrc = *(u16*)(&pTxt[u0 & u0_mask]); + uSrc = le16_to_u16(*(le16_t*)(&pTxt[u0 & u0_mask])); } if (!uSrc) goto endsprite; @@ -410,9 +421,9 @@ static void gpuSpriteSpanFn(u16 *pDst, u32 count, u8* pTxt, u32 u0) if (CF_BLEND && should_blend) uSrc = gpuBlending(uSrc, uDst); - if (CF_MASKSET) { *pDst = uSrc | 0x8000; } - else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = uSrc | srcMSB; } - else { *pDst = uSrc; } + if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } + else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = u16_to_le16(uSrc | srcMSB); } + else { *pDst = u16_to_le16(uSrc); } endsprite: u0 += (CF_TEXTMODE==3) ? 2 : 1; @@ -421,7 +432,7 @@ endsprite: while (--count); } -static void SpriteNULL(u16 *pDst, u32 count, u8* pTxt, u32 u0) +static void SpriteNULL(le16_t *pDst, u32 count, u8* pTxt, u32 u0) { #ifdef ENABLE_GPU_LOG_SUPPORT fprintf(stdout,"SpriteNULL()\n"); @@ -432,7 +443,7 @@ static void SpriteNULL(u16 *pDst, u32 count, u8* pTxt, u32 u0) /////////////////////////////////////////////////////////////////////////////// // Sprite innerloops driver -typedef void (*PS)(u16 *pDst, u32 count, u8* pTxt, u32 u0); +typedef void (*PS)(le16_t *pDst, u32 count, u8* pTxt, u32 u0); // Template instantiation helper macros #define TI(cf) gpuSpriteSpanFn<(cf)> @@ -485,7 +496,7 @@ const PS gpuSpriteSpanDrivers[256] = { // relevant blend/light headers. // (see README_senquack.txt) template -static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count) +static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count) { // Blend func can save an operation if it knows uSrc MSB is unset. // Untextured prims can always skip this (src color MSB is always 0). @@ -509,7 +520,7 @@ static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count) // gravestone text at end of Medieval intro sequence. -senquack //if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) { goto endpolynotextnogou; } } - if (CF_BLEND || CF_MASKCHECK) uDst = *pDst; + if (CF_BLEND || CF_MASKCHECK) uDst = le16_to_u16(*pDst); if (CF_MASKCHECK) { if (uDst&0x8000) { goto endpolynotextnogou; } } uSrc = pix15; @@ -517,8 +528,8 @@ static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count) if (CF_BLEND) uSrc = gpuBlending(uSrc, uDst); - if (CF_MASKSET) { *pDst = uSrc | 0x8000; } - else { *pDst = uSrc; } + if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } + else { *pDst = u16_to_le16(uSrc); } endpolynotextnogou: pDst++; @@ -536,7 +547,7 @@ endpolynotextnogou: // See note in above loop regarding CF_BLITMASK //if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) goto endpolynotextgou; } - if (CF_BLEND || CF_MASKCHECK) uDst = *pDst; + if (CF_BLEND || CF_MASKCHECK) uDst = le16_to_u16(*pDst); if (CF_MASKCHECK) { if (uDst&0x8000) goto endpolynotextgou; } if (CF_DITHER) { @@ -555,8 +566,8 @@ endpolynotextnogou: uSrc = gpuBlending(uSrc, uDst); } - if (CF_MASKSET) { *pDst = uSrc | 0x8000; } - else { *pDst = uSrc; } + if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } + else { *pDst = u16_to_le16(uSrc); } endpolynotextgou: pDst++; @@ -578,8 +589,8 @@ endpolynotextgou: u32 l_u = gpu_unai.u & l_u_msk; u32 l_v = gpu_unai.v & l_v_msk; s32 l_u_inc = gpu_unai.u_inc; s32 l_v_inc = gpu_unai.v_inc; - const u16* TBA_ = gpu_unai.TBA; - const u16* CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA; + const le16_t* TBA_ = gpu_unai.TBA; + const le16_t* CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA; u8 r5, g5, b5; u8 r8, g8, b8; @@ -606,7 +617,7 @@ endpolynotextgou: do { if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) goto endpolytext; } - if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; } + if (CF_MASKCHECK || CF_BLEND) { uDst = le16_to_u16(*pDst); } if (CF_MASKCHECK) if (uDst&0x8000) { goto endpolytext; } //senquack - adapted to work with new 22.10 fixed point routines: @@ -615,15 +626,15 @@ endpolynotextgou: u32 tu=(l_u>>10); u32 tv=(l_v<<1)&(0xff<<11); u8 rgb=((u8*)TBA_)[tv+(tu>>1)]; - uSrc=CBA_[(rgb>>((tu&1)<<2))&0xf]; + uSrc=le16_to_u16(CBA_[(rgb>>((tu&1)<<2))&0xf]); if (!uSrc) goto endpolytext; } if (CF_TEXTMODE==2) { // 8bpp (CLUT) - uSrc = CBA_[(((u8*)TBA_)[(l_u>>10)+((l_v<<1)&(0xff<<11))])]; + uSrc = le16_to_u16(CBA_[(((u8*)TBA_)[(l_u>>10)+((l_v<<1)&(0xff<<11))])]); if (!uSrc) goto endpolytext; } if (CF_TEXTMODE==3) { // 16bpp - uSrc = TBA_[(l_u>>10)+((l_v)&(0xff<<10))]; + uSrc = le16_to_u16(TBA_[(l_u>>10)+((l_v)&(0xff<<10))]); if (!uSrc) goto endpolytext; } @@ -661,9 +672,9 @@ endpolynotextgou: uSrc = gpuBlending(uSrc, uDst); } - if (CF_MASKSET) { *pDst = uSrc | 0x8000; } - else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = uSrc | srcMSB; } - else { *pDst = uSrc; } + if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); } + else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = u16_to_le16(uSrc | srcMSB); } + else { *pDst = u16_to_le16(uSrc); } endpolytext: pDst++; l_u = (l_u + l_u_inc) & l_u_msk; @@ -674,7 +685,7 @@ endpolytext: } } -static void PolyNULL(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count) +static void PolyNULL(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count) { #ifdef ENABLE_GPU_LOG_SUPPORT fprintf(stdout,"PolyNULL()\n"); @@ -683,7 +694,7 @@ static void PolyNULL(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count) /////////////////////////////////////////////////////////////////////////////// // Polygon innerloops driver -typedef void (*PP)(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count); +typedef void (*PP)(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count); // Template instantiation helper macros #define TI(cf) gpuPolySpanFn<(cf)> diff --git a/plugins/gpu_unai/gpu_inner_quantization.h b/plugins/gpu_unai/gpu_inner_quantization.h index 0e7e3e8a..4368ddb8 100644 --- a/plugins/gpu_unai/gpu_inner_quantization.h +++ b/plugins/gpu_unai/gpu_inner_quantization.h @@ -85,11 +85,11 @@ static void SetupDitheringConstants() // Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care //////////////////////////////////////////////////////////////////////////////// template -GPU_INLINE u16 gpuColorQuantization24(u32 uSrc24, const u16 *pDst) +GPU_INLINE u16 gpuColorQuantization24(u32 uSrc24, const le16_t *pDst) { if (DITHER) { - u16 fbpos = (u32)(pDst - gpu_unai.vram); + u16 fbpos = (uintptr_t)pDst - (uintptr_t)gpu_unai.vram; u16 offset = ((fbpos & (0x7 << 10)) >> 7) | (fbpos & 0x7); //clean overflow flags and add diff --git a/plugins/gpu_unai/gpu_raster_image.h b/plugins/gpu_unai/gpu_raster_image.h index 2d34b343..909ca390 100644 --- a/plugins/gpu_unai/gpu_raster_image.h +++ b/plugins/gpu_unai/gpu_raster_image.h @@ -26,10 +26,10 @@ void gpuLoadImage(PtrUnion packet) { u16 x0, y0, w0, h0; - x0 = packet.U2[2] & 1023; - y0 = packet.U2[3] & 511; - w0 = packet.U2[4]; - h0 = packet.U2[5]; + x0 = le16_to_u16(packet.U2[2]) & 1023; + y0 = le16_to_u16(packet.U2[3]) & 511; + w0 = le16_to_u16(packet.U2[4]); + h0 = le16_to_u16(packet.U2[5]); if ((y0 + h0) > FRAME_HEIGHT) { @@ -42,7 +42,7 @@ void gpuLoadImage(PtrUnion packet) gpu_unai.dma.py = 0; gpu_unai.dma.x_end = w0; gpu_unai.dma.y_end = h0; - gpu_unai.dma.pvram = &((u16*)gpu_unai.vram)[x0+(y0*1024)]; + gpu_unai.dma.pvram = &gpu_unai.vram[x0+(y0*1024)]; gpu_unai.GPU_GP1 |= 0x08000000; } @@ -53,10 +53,10 @@ void gpuLoadImage(PtrUnion packet) void gpuStoreImage(PtrUnion packet) { u16 x0, y0, w0, h0; - x0 = packet.U2[2] & 1023; - y0 = packet.U2[3] & 511; - w0 = packet.U2[4]; - h0 = packet.U2[5]; + x0 = le16_to_u16(packet.U2[2]) & 1023; + y0 = le16_to_u16(packet.U2[3]) & 511; + w0 = le16_to_u16(packet.U2[4]); + h0 = le16_to_u16(packet.U2[5]); if ((y0 + h0) > FRAME_HEIGHT) { @@ -68,7 +68,7 @@ void gpuStoreImage(PtrUnion packet) gpu_unai.dma.py = 0; gpu_unai.dma.x_end = w0; gpu_unai.dma.y_end = h0; - gpu_unai.dma.pvram = &((u16*)gpu_unai.vram)[x0+(y0*1024)]; + gpu_unai.dma.pvram = &gpu_unai.vram[x0+(y0*1024)]; gpu_unai.GPU_GP1 |= 0x08000000; } @@ -78,12 +78,12 @@ void gpuMoveImage(PtrUnion packet) { u32 x0, y0, x1, y1; s32 w0, h0; - x0 = packet.U2[2] & 1023; - y0 = packet.U2[3] & 511; - x1 = packet.U2[4] & 1023; - y1 = packet.U2[5] & 511; - w0 = packet.U2[6]; - h0 = packet.U2[7]; + x0 = le16_to_u16(packet.U2[2]) & 1023; + y0 = le16_to_u16(packet.U2[3]) & 511; + x1 = le16_to_u16(packet.U2[4]) & 1023; + y1 = le16_to_u16(packet.U2[5]) & 511; + w0 = le16_to_u16(packet.U2[6]); + h0 = le16_to_u16(packet.U2[7]); if( (x0==x1) && (y0==y1) ) return; if ((w0<=0) || (h0<=0)) return; @@ -94,7 +94,7 @@ void gpuMoveImage(PtrUnion packet) if (((y0+h0)>512)||((x0+w0)>1024)||((y1+h0)>512)||((x1+w0)>1024)) { - u16 *psxVuw=gpu_unai.vram; + le16_t *psxVuw=gpu_unai.vram; s32 i,j; for(j=0;j>1); lpDst += ((FRAME_OFFSET(x1, y1))>>1); if (w0&1) @@ -127,7 +127,7 @@ void gpuMoveImage(PtrUnion packet) w0>>=1; if (!w0) { do { - *((u16*)lpDst) = *((u16*)lpSrc); + *((le16_t*)lpDst) = *((le16_t*)lpSrc); lpDst += x1; lpSrc += x1; } while (--h0); @@ -135,7 +135,7 @@ void gpuMoveImage(PtrUnion packet) do { x0=w0; do { *lpDst++ = *lpSrc++; } while (--x0); - *((u16*)lpDst) = *((u16*)lpSrc); + *((le16_t*)lpDst) = *((le16_t*)lpSrc); lpDst += x1; lpSrc += x1; } while (--h0); @@ -157,11 +157,11 @@ void gpuMoveImage(PtrUnion packet) void gpuClearImage(PtrUnion packet) { s32 x0, y0, w0, h0; - x0 = packet.S2[2]; - y0 = packet.S2[3]; - w0 = packet.S2[4] & 0x3ff; - h0 = packet.S2[5] & 0x3ff; - + x0 = le16_to_s16(packet.U2[2]); + y0 = le16_to_s16(packet.U2[3]); + w0 = le16_to_s16(packet.U2[4]) & 0x3ff; + h0 = le16_to_s16(packet.U2[5]) & 0x3ff; + w0 += x0; if (x0 < 0) x0 = 0; if (w0 > FRAME_WIDTH) w0 = FRAME_WIDTH; @@ -176,11 +176,11 @@ void gpuClearImage(PtrUnion packet) #ifdef ENABLE_GPU_LOG_SUPPORT fprintf(stdout,"gpuClearImage(x0=%d,y0=%d,w0=%d,h0=%d)\n",x0,y0,w0,h0); #endif - + if (x0&1) { - u16* pixel = (u16*)gpu_unai.vram + FRAME_OFFSET(x0, y0); - u16 rgb = GPU_RGB16(packet.U4[0]); + le16_t* pixel = gpu_unai.vram + FRAME_OFFSET(x0, y0); + le16_t rgb = u16_to_le16(GPU_RGB16(le32_to_u32(packet.U4[0]))); y0 = FRAME_WIDTH - w0; do { x0=w0; @@ -190,9 +190,9 @@ void gpuClearImage(PtrUnion packet) } else { - u32* pixel = (u32*)gpu_unai.vram + ((FRAME_OFFSET(x0, y0))>>1); - u32 rgb = GPU_RGB16(packet.U4[0]); - rgb |= (rgb<<16); + le32_t* pixel = (le32_t*)gpu_unai.vram + ((FRAME_OFFSET(x0, y0))>>1); + u32 _rgb = GPU_RGB16(le32_to_u32(packet.U4[0])); + le32_t rgb = u32_to_le32(_rgb | (_rgb << 16)); if (w0&1) { y0 = (FRAME_WIDTH - w0 +1)>>1; @@ -200,7 +200,7 @@ void gpuClearImage(PtrUnion packet) do { x0=w0; do { *pixel++ = rgb; } while (--x0); - *((u16*)pixel) = (u16)rgb; + *((u16*)pixel) = (u16)le32_raw(rgb); pixel += y0; } while (--h0); } diff --git a/plugins/gpu_unai/gpu_raster_line.h b/plugins/gpu_unai/gpu_raster_line.h index 2a7b4227..a338f974 100644 --- a/plugins/gpu_unai/gpu_raster_line.h +++ b/plugins/gpu_unai/gpu_raster_line.h @@ -74,10 +74,10 @@ void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver) const int xmax = gpu_unai.DrawingArea[2] - 1; const int ymax = gpu_unai.DrawingArea[3] - 1; - x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_unai.DrawingOffset[0]; - y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_unai.DrawingOffset[1]; - x1 = GPU_EXPANDSIGN(packet.S2[4]) + gpu_unai.DrawingOffset[0]; - y1 = GPU_EXPANDSIGN(packet.S2[5]) + gpu_unai.DrawingOffset[1]; + x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2])) + gpu_unai.DrawingOffset[0]; + y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3])) + gpu_unai.DrawingOffset[1]; + x1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[4])) + gpu_unai.DrawingOffset[0]; + y1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[5])) + gpu_unai.DrawingOffset[1]; // Always draw top to bottom, so ensure y0 <= y1 if (y0 > y1) { @@ -177,12 +177,9 @@ void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver) err_adjdown; // Subract this from err_term after drawing longer run // Color to draw with (16 bits, highest of which is unset mask bit) - uintptr_t col16 = GPU_RGB16(packet.U4[0]); + uintptr_t col16 = GPU_RGB16(le32_to_u32(packet.U4[0])); - // We use u8 pointers even though PS1 has u16 framebuffer. - // This allows pixel-drawing functions to increment dst pointer - // directly by the passed 'incr' value, not having to shift it first. - u8 *dst = (u8*)gpu_unai.vram + y0 * dst_stride + x0 * dst_depth; + le16_t *dst = gpu_unai.vram + (y0 * dst_stride + x0 * dst_depth) / FRAME_BYTES_PER_PIXEL; // SPECIAL CASE: Vertical line if (dx == 0) { @@ -278,7 +275,7 @@ void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver) // First run of pixels dst = gpuPixelSpanDriver(dst, col16, incr_major, start_length); - dst += incr_minor; + dst += incr_minor / 2; // Middle runs of pixels while (--minor > 0) { @@ -292,7 +289,7 @@ void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver) } dst = gpuPixelSpanDriver(dst, col16, incr_major, run_length); - dst += incr_minor; + dst += incr_minor / 2; } // Final run of pixels @@ -321,13 +318,13 @@ void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver) const int xmax = gpu_unai.DrawingArea[2] - 1; const int ymax = gpu_unai.DrawingArea[3] - 1; - x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_unai.DrawingOffset[0]; - y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_unai.DrawingOffset[1]; - x1 = GPU_EXPANDSIGN(packet.S2[6]) + gpu_unai.DrawingOffset[0]; - y1 = GPU_EXPANDSIGN(packet.S2[7]) + gpu_unai.DrawingOffset[1]; + x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2])) + gpu_unai.DrawingOffset[0]; + y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3])) + gpu_unai.DrawingOffset[1]; + x1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[6])) + gpu_unai.DrawingOffset[0]; + y1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[7])) + gpu_unai.DrawingOffset[1]; - u32 col0 = packet.U4[0]; - u32 col1 = packet.U4[2]; + u32 col0 = le32_to_u32(packet.U4[0]); + u32 col1 = le32_to_u32(packet.U4[2]); // Always draw top to bottom, so ensure y0 <= y1 if (y0 > y1) { @@ -519,10 +516,7 @@ void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver) gcol.g = g0 << GPU_GOURAUD_FIXED_BITS; gcol.b = b0 << GPU_GOURAUD_FIXED_BITS; - // We use u8 pointers even though PS1 has u16 framebuffer. - // This allows pixel-drawing functions to increment dst pointer - // directly by the passed 'incr' value, not having to shift it first. - u8 *dst = (u8*)gpu_unai.vram + y0 * dst_stride + x0 * dst_depth; + le16_t *dst = gpu_unai.vram + (y0 * dst_stride + x0 * dst_depth) / FRAME_BYTES_PER_PIXEL; // SPECIAL CASE: Vertical line if (dx == 0) { @@ -547,7 +541,7 @@ void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver) if (db) gcol.b_incr /= dy; } #endif - + gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride, dy+1); return; } @@ -696,7 +690,7 @@ void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver) // First run of pixels dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, start_length); - dst += incr_minor; + dst += incr_minor / 2; // Middle runs of pixels while (--minor > 0) { @@ -710,7 +704,7 @@ void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver) } dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, run_length); - dst += incr_minor; + dst += incr_minor / 2; } // Final run of pixels diff --git a/plugins/gpu_unai/gpu_raster_polygon.h b/plugins/gpu_unai/gpu_raster_polygon.h index b30286db..ff6dc00d 100644 --- a/plugins/gpu_unai/gpu_raster_polygon.h +++ b/plugins/gpu_unai/gpu_raster_polygon.h @@ -31,11 +31,19 @@ struct PolyVertex { s32 x, y; // Sign-extended 11-bit X,Y coords union { +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + struct { u8 pad[2], v, u; } tex; // Texture coords (if used) +#else struct { u8 u, v, pad[2]; } tex; // Texture coords (if used) +#endif u32 tex_word; }; union { +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + struct { u8 pad, b, g, r; } col; // 24-bit RGB color (if used) +#else struct { u8 r, g, b, pad; } col; // 24-bit RGB color (if used) +#endif u32 col_word; }; }; @@ -68,30 +76,30 @@ static void polyInitVertexBuffer(PolyVertex *vbuf, const PtrUnion packet, PolyTy vert_stride++; int num_verts = (is_quad) ? 4 : 3; - u32 *ptr; + le32_t *ptr; // X,Y coords, adjusted by draw offsets s32 x_off = gpu_unai.DrawingOffset[0]; s32 y_off = gpu_unai.DrawingOffset[1]; ptr = &packet.U4[1]; for (int i=0; i < num_verts; ++i, ptr += vert_stride) { - s16* coord_ptr = (s16*)ptr; - vbuf[i].x = GPU_EXPANDSIGN(coord_ptr[0]) + x_off; - vbuf[i].y = GPU_EXPANDSIGN(coord_ptr[1]) + y_off; + u32 coords = le32_to_u32(*ptr); + vbuf[i].x = GPU_EXPANDSIGN((s16)coords) + x_off; + vbuf[i].y = GPU_EXPANDSIGN((s16)(coords >> 16)) + y_off; } // U,V texture coords (if applicable) if (texturing) { ptr = &packet.U4[2]; for (int i=0; i < num_verts; ++i, ptr += vert_stride) - vbuf[i].tex_word = *ptr; + vbuf[i].tex_word = le32_to_u32(*ptr); } // Colors (if applicable) if (gouraud) { ptr = &packet.U4[0]; for (int i=0; i < num_verts; ++i, ptr += vert_stride) - vbuf[i].col_word = *ptr; + vbuf[i].col_word = le32_to_u32(*ptr); } } @@ -218,7 +226,7 @@ gpuDrawPolyF - Flat-shaded, untextured poly void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad) { // Set up bgr555 color to be used across calls in inner driver - gpu_unai.PixelData = GPU_RGB16(packet.U4[0]); + gpu_unai.PixelData = GPU_RGB16(le32_to_u32(packet.U4[0])); PolyVertex vbuf[4]; polyInitVertexBuffer(vbuf, packet, POLYTYPE_F, is_quad); @@ -342,7 +350,7 @@ void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad if (loop1 <= 0) continue; - u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)]; + le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)]; int li=gpu_unai.ilace_mask; int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); @@ -652,7 +660,7 @@ void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua if (loop1 <= 0) continue; - u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)]; + le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)]; int li=gpu_unai.ilace_mask; int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); @@ -997,7 +1005,7 @@ void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad if (loop1 <= 0) continue; - u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)]; + le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)]; int li=gpu_unai.ilace_mask; int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); @@ -1392,7 +1400,7 @@ void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua if (loop1 <= 0) continue; - u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)]; + le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)]; int li=gpu_unai.ilace_mask; int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); diff --git a/plugins/gpu_unai/gpu_raster_sprite.h b/plugins/gpu_unai/gpu_raster_sprite.h index 91f7bc0e..ea4e82f2 100644 --- a/plugins/gpu_unai/gpu_raster_sprite.h +++ b/plugins/gpu_unai/gpu_raster_sprite.h @@ -32,11 +32,11 @@ void gpuDrawS(PtrUnion packet, const PS gpuSpriteSpanDriver) //NOTE: Must 11-bit sign-extend the whole sum here, not just packet X/Y, // or sprites in 1st level of SkullMonkeys disappear when walking right. // This now matches behavior of Mednafen and PCSX Rearmed's gpu_neon: - x0 = GPU_EXPANDSIGN(packet.S2[2] + gpu_unai.DrawingOffset[0]); - y0 = GPU_EXPANDSIGN(packet.S2[3] + gpu_unai.DrawingOffset[1]); + x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2]) + gpu_unai.DrawingOffset[0]); + y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3]) + gpu_unai.DrawingOffset[1]); - u32 w = packet.U2[6] & 0x3ff; // Max width is 1023 - u32 h = packet.U2[7] & 0x1ff; // Max height is 511 + u32 w = le16_to_u16(packet.U2[6]) & 0x3ff; // Max width is 1023 + u32 h = le16_to_u16(packet.U2[7]) & 0x1ff; // Max height is 511 x1 = x0 + w; y1 = y0 + h; @@ -63,7 +63,7 @@ void gpuDrawS(PtrUnion packet, const PS gpuSpriteSpanDriver) gpu_unai.g5 = packet.U1[1] >> 3; gpu_unai.b5 = packet.U1[2] >> 3; - u16 *Pixel = &((u16*)gpu_unai.vram)[FRAME_OFFSET(x0, y0)]; + le16_t *Pixel = &gpu_unai.vram[FRAME_OFFSET(x0, y0)]; const int li=gpu_unai.ilace_mask; const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); @@ -98,8 +98,8 @@ void gpuDrawS16(PtrUnion packet) //NOTE: Must 11-bit sign-extend the whole sum here, not just packet X/Y, // or sprites in 1st level of SkullMonkeys disappear when walking right. // This now matches behavior of Mednafen and PCSX Rearmed's gpu_neon: - x0 = GPU_EXPANDSIGN(packet.S2[2] + gpu_unai.DrawingOffset[0]); - y0 = GPU_EXPANDSIGN(packet.S2[3] + gpu_unai.DrawingOffset[1]); + x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2]) + gpu_unai.DrawingOffset[0]); + y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3]) + gpu_unai.DrawingOffset[1]); xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; @@ -109,7 +109,7 @@ void gpuDrawS16(PtrUnion packet) if (x0 > xmax - 16 || x0 < xmin || ((u0 | v0) & 15) || !(gpu_unai.TextureWindow[2] & gpu_unai.TextureWindow[3] & 8)) { // send corner cases to general handler - packet.U4[3] = 0x00100010; + packet.U4[3] = u32_to_le32(0x00100010); gpuDrawS(packet, gpuSpriteSpanFn<0x20>); return; } @@ -133,11 +133,11 @@ void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver) s32 x0, x1, y0, y1; // This now matches behavior of Mednafen and PCSX Rearmed's gpu_neon: - x0 = GPU_EXPANDSIGN(packet.S2[2] + gpu_unai.DrawingOffset[0]); - y0 = GPU_EXPANDSIGN(packet.S2[3] + gpu_unai.DrawingOffset[1]); + x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2]) + gpu_unai.DrawingOffset[0]); + y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3]) + gpu_unai.DrawingOffset[1]); - u32 w = packet.U2[4] & 0x3ff; // Max width is 1023 - u32 h = packet.U2[5] & 0x1ff; // Max height is 511 + u32 w = le16_to_u16(packet.U2[4]) & 0x3ff; // Max width is 1023 + u32 h = le16_to_u16(packet.U2[5]) & 0x1ff; // Max height is 511 x1 = x0 + w; y1 = y0 + h; @@ -154,8 +154,8 @@ void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver) x1 -= x0; if (x1 <= 0) return; - const u16 Data = GPU_RGB16(packet.U4[0]); - u16 *Pixel = &((u16*)gpu_unai.vram)[FRAME_OFFSET(x0, y0)]; + const u16 Data = GPU_RGB16(le32_to_u32(packet.U4[0])); + le16_t *Pixel = &gpu_unai.vram[FRAME_OFFSET(x0, y0)]; const int li=gpu_unai.ilace_mask; const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); diff --git a/plugins/gpu_unai/gpu_unai.h b/plugins/gpu_unai/gpu_unai.h index 6886eb86..7aa06679 100644 --- a/plugins/gpu_unai/gpu_unai.h +++ b/plugins/gpu_unai/gpu_unai.h @@ -54,25 +54,67 @@ #define s32 int32_t #define s64 int64_t +typedef struct { + u32 v; +} le32_t; + +typedef struct { + u16 v; +} le16_t; + +static inline u32 le32_to_u32(le32_t le) +{ + return LE32TOH(le.v); +} + +static inline s32 le32_to_s32(le32_t le) +{ + return (int32_t) LE32TOH(le.v); +} + +static inline u32 le32_raw(le32_t le) +{ + return le.v; +} + +static inline le32_t u32_to_le32(u32 u) +{ + return (le32_t){ .v = HTOLE32(u) }; +} + +static inline u16 le16_to_u16(le16_t le) +{ + return LE16TOH(le.v); +} + +static inline s16 le16_to_s16(le16_t le) +{ + return (int16_t) LE16TOH(le.v); +} + +static inline u16 le16_raw(le16_t le) +{ + return le.v; +} + +static inline le16_t u16_to_le16(u16 u) +{ + return (le16_t){ .v = HTOLE16(u) }; +} + union PtrUnion { - u32 *U4; - s32 *S4; - u16 *U2; - s16 *S2; + le32_t *U4; + le16_t *U2; u8 *U1; - s8 *S1; void *ptr; }; union GPUPacket { - u32 U4[16]; - s32 S4[16]; - u16 U2[32]; - s16 S2[32]; + le32_t U4[16]; + le16_t U2[32]; u8 U1[64]; - s8 S1[64]; }; template static inline void SwapValues(T &x, T &y) @@ -136,7 +178,7 @@ static inline s32 GPU_DIV(s32 rs, s32 rt) struct gpu_unai_t { u32 GPU_GP1; GPUPacket PacketBuffer; - u16 *vram; + le16_t *vram; #ifdef USE_GPULIB u16 *downscale_vram; @@ -164,7 +206,7 @@ struct gpu_unai_t { struct { s32 px,py; s32 x_end,y_end; - u16* pvram; + le16_t* pvram; u32 *last_dma; // Last dma pointer bool FrameToRead; // Load image in progress bool FrameToWrite; // Store image in progress @@ -197,8 +239,8 @@ struct gpu_unai_t { s16 DrawingOffset[2]; // [0] : Drawing offset X (signed) // [1] : Drawing offset Y (signed) - u16* TBA; // Ptr to current texture in VRAM - u16* CBA; // Ptr to current CLUT in VRAM + le16_t* TBA; // Ptr to current texture in VRAM + le16_t* CBA; // Ptr to current CLUT in VRAM //////////////////////////////////////////////////////////////////////////// // Inner Loop parameters diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 710a7137..453cc7a6 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -70,7 +70,7 @@ #define DOWNSCALE_VRAM_SIZE (1024 * 512 * 2 * 2 + 4096) -INLINE void scale_640_to_320(uint16_t *dest, const uint16_t *src, bool isRGB24) { +INLINE void scale_640_to_320(uint16_t *dest, const le16_t *src, bool isRGB24) { size_t uCount = 320; if(isRGB24) { @@ -84,17 +84,17 @@ INLINE void scale_640_to_320(uint16_t *dest, const uint16_t *src, bool isRGB24) src8 += 4; } while(--uCount); } else { - const uint16_t* src16 = src; + const le16_t *src16 = src; uint16_t* dst16 = dest; do { - *dst16++ = *src16; + *dst16++ = le16_to_u16(*src16); src16 += 2; } while(--uCount); } } -INLINE void scale_512_to_320(uint16_t *dest, const uint16_t *src, bool isRGB24) { +INLINE void scale_512_to_320(uint16_t *dest, const le16_t *src, bool isRGB24) { size_t uCount = 64; if(isRGB24) { @@ -122,17 +122,17 @@ INLINE void scale_512_to_320(uint16_t *dest, const uint16_t *src, bool isRGB24) src8 += 4; } while(--uCount); } else { - const uint16_t* src16 = src; + const le16_t* src16 = src; uint16_t* dst16 = dest; do { - *dst16++ = *src16++; - *dst16++ = *src16; + *dst16++ = le16_to_u16(*src16++); + *dst16++ = le16_to_u16(*src16); src16 += 2; - *dst16++ = *src16++; - *dst16++ = *src16; + *dst16++ = le16_to_u16(*src16++); + *dst16++ = le16_to_u16(*src16); src16 += 2; - *dst16++ = *src16; + *dst16++ = le16_to_u16(*src16); src16 += 2; } while(--uCount); } @@ -141,7 +141,7 @@ INLINE void scale_512_to_320(uint16_t *dest, const uint16_t *src, bool isRGB24) static uint16_t *get_downscale_buffer(int *x, int *y, int *w, int *h, int *vram_h) { uint16_t *dest = gpu_unai.downscale_vram; - const uint16_t *src = gpu_unai.vram; + const le16_t *src = gpu_unai.vram; bool isRGB24 = (gpu_unai.GPU_GP1 & 0x00200000 ? true : false); int stride = 1024, dstride = 1024, lines = *h, orig_w = *w; @@ -184,7 +184,12 @@ static uint16_t *get_downscale_buffer(int *x, int *y, int *w, int *h, int *vram_ size_t size = isRGB24 ? *w * 3 : *w * 2; do { - memcpy(dest + fb_offset_dest, src + fb_offset_src, size); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + for (unsigned int i; i < size; i += 2) + dest[fb_offset_dest + i] = le16_to_u16(src[fb_offset_src + i]); +#else + memcpy(dest + fb_offset_dest, (u16 *)src + fb_offset_src, size); +#endif fb_offset_src = (fb_offset_src + stride) & fb_mask; fb_offset_dest = (fb_offset_dest + dstride) & fb_mask; } while(--lines); @@ -223,7 +228,7 @@ static void unmap_downscale_buffer(void) int renderer_init(void) { memset((void*)&gpu_unai, 0, sizeof(gpu_unai)); - gpu_unai.vram = (u16*)gpu.vram; + gpu_unai.vram = (le16_t *)gpu.vram; // Original standalone gpu_unai initialized TextureWindow[]. I added the // same behavior here, since it seems unsafe to leave [2],[3] unset when @@ -392,11 +397,12 @@ static void gpuGP0Cmd_0xEx(gpu_unai_t &gpu_unai, u32 cmd_word) extern const unsigned char cmd_lengths[256]; -int do_cmd_list(u32 *list, int list_len, int *last_cmd) +int do_cmd_list(u32 *_list, int list_len, int *last_cmd) { u32 cmd = 0, len, i; - u32 *list_start = list; - u32 *list_end = list + list_len; + le32_t *list = (le32_t *)_list; + le32_t *list_start = list; + le32_t *list_end = list + list_len; //TODO: set ilace_mask when resolution changes instead of every time, // eliminate #ifdef below. @@ -411,7 +417,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) for (; list < list_end; list += 1 + len) { - cmd = *list >> 24; + cmd = le32_to_u32(*list) >> 24; len = cmd_lengths[cmd]; if (list + 1 + len > list_end) { cmd = -1; @@ -447,8 +453,8 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x25: case 0x26: case 0x27: { // Textured 3-pt poly - gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); - gpuSetTexture(gpu_unai.PacketBuffer.U4[4] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture(le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16); u32 driver_idx = (gpu_unai.blit_mask?1024:0) | @@ -483,8 +489,8 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x2D: case 0x2E: case 0x2F: { // Textured 4-pt poly - gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); - gpuSetTexture(gpu_unai.PacketBuffer.U4[4] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture(le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16); u32 driver_idx = (gpu_unai.blit_mask?1024:0) | @@ -524,8 +530,8 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x35: case 0x36: case 0x37: { // Gouraud-shaded, textured 3-pt poly - gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); - gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16); PP driver = gpuPolySpanDrivers[ (gpu_unai.blit_mask?1024:0) | Dithering | @@ -553,8 +559,8 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x3D: case 0x3E: case 0x3F: { // Gouraud-shaded, textured 4-pt poly - gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); - gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); + gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16); PP driver = gpuPolySpanDrivers[ (gpu_unai.blit_mask?1024:0) | Dithering | @@ -576,7 +582,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x48 ... 0x4F: { // Monochrome line strip u32 num_vertexes = 1; - u32 *list_position = &(list[2]); + le32_t *list_position = &list[2]; // Shift index right by one, as untextured prims don't use lighting u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; @@ -594,7 +600,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) cmd = -1; goto breakloop; } - if((*list_position & 0xf000f000) == 0x50005000) + if((le32_raw(*list_position) & HTOLE32(0xf000f000)) == HTOLE32(0x50005000)) break; } @@ -615,7 +621,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x58 ... 0x5F: { // Gouraud-shaded line strip u32 num_vertexes = 1; - u32 *list_position = &(list[2]); + le32_t *list_position = &list[2]; // Shift index right by one, as untextured prims don't use lighting u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; @@ -637,7 +643,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) cmd = -1; goto breakloop; } - if((*list_position & 0xf000f000) == 0x50005000) + if((le32_raw(*list_position) & HTOLE32(0xf000f000)) == HTOLE32(0x50005000)) break; } @@ -656,7 +662,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x65: case 0x66: case 0x67: { // Textured rectangle (variable size) - gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); //senquack - Only color 808080h-878787h allows skipping lighting calculation: @@ -673,7 +679,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) // alone, I don't want to slow rendering down too much. (TODO) //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) // Strip lower 3 bits of each color and determine if lighting should be used: - if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver); @@ -683,7 +689,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x69: case 0x6A: case 0x6B: { // Monochrome rectangle (1x1 dot) - gpu_unai.PacketBuffer.U4[2] = 0x00010001; + gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00010001); PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); } break; @@ -692,7 +698,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x71: case 0x72: case 0x73: { // Monochrome rectangle (8x8) - gpu_unai.PacketBuffer.U4[2] = 0x00080008; + gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00080008); PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); } break; @@ -701,14 +707,14 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x75: case 0x76: case 0x77: { // Textured rectangle (8x8) - gpu_unai.PacketBuffer.U4[3] = 0x00080008; - gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00080008); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); //senquack - Only color 808080h-878787h allows skipping lighting calculation: //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) // Strip lower 3 bits of each color and determine if lighting should be used: - if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver); @@ -718,7 +724,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) case 0x79: case 0x7A: case 0x7B: { // Monochrome rectangle (16x16) - gpu_unai.PacketBuffer.U4[2] = 0x00100010; + gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00100010); PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; gpuDrawT(packet, driver); } break; @@ -728,7 +734,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) #ifdef __arm__ if ((gpu_unai.GPU_GP1 & 0x180) == 0 && (gpu_unai.Masking | gpu_unai.PixelMSB) == 0) { - gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); gpuDrawS16(packet); break; } @@ -736,13 +742,13 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) #endif case 0x7E: case 0x7F: { // Textured rectangle (16x16) - gpu_unai.PacketBuffer.U4[3] = 0x00100010; - gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00100010); + gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16); u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); //senquack - Only color 808080h-878787h allows skipping lighting calculation: //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) // Strip lower 3 bits of each color and determine if lighting should be used: - if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080)) driver_idx |= Lighting; PS driver = gpuSpriteSpanDrivers[driver_idx]; gpuDrawS(packet, driver); @@ -771,7 +777,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) goto breakloop; #endif case 0xE1 ... 0xE6: { // Draw settings - gpuGP0Cmd_0xEx(gpu_unai, gpu_unai.PacketBuffer.U4[0]); + gpuGP0Cmd_0xEx(gpu_unai, le32_to_u32(gpu_unai.PacketBuffer.U4[0])); } break; } } @@ -784,7 +790,7 @@ breakloop: return list - list_start; } -void renderer_sync_ecmds(uint32_t *ecmds) +void renderer_sync_ecmds(u32 *ecmds) { int dummy; do_cmd_list(&ecmds[1], 6, &dummy); @@ -806,7 +812,7 @@ void renderer_set_interlace(int enable, int is_odd) // Handle any gpulib settings applicable to gpu_unai: void renderer_set_config(const struct rearmed_cbs *cbs) { - gpu_unai.vram = (u16*)gpu.vram; + gpu_unai.vram = (le16_t *)gpu.vram; gpu_unai.config.ilace_force = cbs->gpu_unai.ilace_force; gpu_unai.config.pixel_skip = cbs->gpu_unai.pixel_skip; gpu_unai.config.lighting = cbs->gpu_unai.lighting;