unai: Add support for big-endian

author Paul Cercueil <paul@crapouillou.net>

Wed, 30 Aug 2023 14:36:56 +0000 (16:36 +0200)

committer Paul Cercueil <paul@crapouillou.net>

Wed, 30 Aug 2023 16:57:14 +0000 (18:57 +0200)
author Paul Cercueil <paul@crapouillou.net>
Wed, 30 Aug 2023 14:36:56 +0000 (16:36 +0200)
committer Paul Cercueil <paul@crapouillou.net>
Wed, 30 Aug 2023 16:57:14 +0000 (18:57 +0200)
diff --git a/plugins/gpu_unai/gpu_command.h b/plugins/gpu_unai/gpu_command.h

index c39c81b..cf6b62b 100644 (file)
--- a/plugins/gpu_unai/gpu_command.h
+++ b/plugins/gpu_unai/gpu_command.h
@@ -45,13 +45,13 @@ void gpuSetTexture(u16 tpage)
         
         gpu_unai.BLEND_MODE  = ((tpage>>5) & 3) << 3;
         gpu_unai.TEXT_MODE   = (tmode + 1) << 5; // gpu_unai.TEXT_MODE should be values 1..3, so add one
-       gpu_unai.TBA = &((u16*)gpu_unai.vram)[FRAME_OFFSET(tx, ty)];
+       gpu_unai.TBA = &gpu_unai.vram[FRAME_OFFSET(tx, ty)];
  }
  
  ///////////////////////////////////////////////////////////////////////////////
  INLINE void gpuSetCLUT(u16 clut)
  {
-       gpu_unai.CBA = &((u16*)gpu_unai.vram)[(clut & 0x7FFF) << 4];
+       gpu_unai.CBA = &gpu_unai.vram[(clut & 0x7FFF) << 4];
  }
  
  #ifdef  ENABLE_GPU_NULL_SUPPORT
@@ -193,8 +193,8 @@ void gpuSendPacketFunction(const int PRIM)
                         if (!gpu_unai.frameskip.skipGPU)
                         {
                                 NULL_GPU();
-                               gpuSetCLUT    (gpu_unai.PacketBuffer.U4[2] >> 16);
-                               gpuSetTexture (gpu_unai.PacketBuffer.U4[4] >> 16);
+                               gpuSetCLUT    (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+                               gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16);
  
                                 u32 driver_idx =
                                         (gpu_unai.blit_mask?1024:0) |
@@ -241,8 +241,8 @@ void gpuSendPacketFunction(const int PRIM)
                         if (!gpu_unai.frameskip.skipGPU)
                         {
                                 NULL_GPU();
-                               gpuSetCLUT    (gpu_unai.PacketBuffer.U4[2] >> 16);
-                               gpuSetTexture (gpu_unai.PacketBuffer.U4[4] >> 16);
+                               gpuSetCLUT    (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+                               gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16);
  
                                 u32 driver_idx =
                                         (gpu_unai.blit_mask?1024:0) |
@@ -294,8 +294,8 @@ void gpuSendPacketFunction(const int PRIM)
                         if (!gpu_unai.frameskip.skipGPU)
                         {
                                 NULL_GPU();
-                               gpuSetCLUT    (gpu_unai.PacketBuffer.U4[2] >> 16);
-                               gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16);
+                               gpuSetCLUT    (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+                               gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16);
                                 PP driver = gpuPolySpanDrivers[
                                         (gpu_unai.blit_mask?1024:0) |
                                         Dithering |
@@ -335,8 +335,8 @@ void gpuSendPacketFunction(const int PRIM)
                         if (!gpu_unai.frameskip.skipGPU)
                         {
                                 NULL_GPU();
-                               gpuSetCLUT    (gpu_unai.PacketBuffer.U4[2] >> 16);
-                               gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16);
+                               gpuSetCLUT    (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+                               gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16);
                                 PP driver = gpuPolySpanDrivers[
                                         (gpu_unai.blit_mask?1024:0) |
                                         Dithering |
@@ -383,7 +383,7 @@ void gpuSendPacketFunction(const int PRIM)
                                 gpu_unai.fb_dirty = true;
                                 DO_LOG(("gpuDrawLineF(0x%x)\n",PRIM));
                         }
-                       if ((gpu_unai.PacketBuffer.U4[3] & 0xF000F000) != 0x50005000)
+                       if ((le32_raw(gpu_unai.PacketBuffer.U4[3]) & HTOLE32(0xF000F000)) != HTOLE32(0x50005000))
                         {
                                 gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[2];
                                 gpu_unai.PacketBuffer.U4[2] = gpu_unai.PacketBuffer.U4[3];
@@ -430,7 +430,7 @@ void gpuSendPacketFunction(const int PRIM)
                                 gpu_unai.fb_dirty = true;
                                 DO_LOG(("gpuDrawLineG(0x%x)\n",PRIM));
                         }
-                       if ((gpu_unai.PacketBuffer.U4[4] & 0xF000F000) != 0x50005000)
+                       if ((le32_raw(gpu_unai.PacketBuffer.U4[4]) & HTOLE32(0xF000F000)) != HTOLE32(0x50005000))
                         {
                                 gpu_unai.PacketBuffer.U1[3 + (2 * 4)] = gpu_unai.PacketBuffer.U1[3 + (0 * 4)];
                                 gpu_unai.PacketBuffer.U4[0] = gpu_unai.PacketBuffer.U4[2];
@@ -462,7 +462,7 @@ void gpuSendPacketFunction(const int PRIM)
                         if (!gpu_unai.frameskip.skipGPU)
                         {
                                 NULL_GPU();
-                               gpuSetCLUT    (gpu_unai.PacketBuffer.U4[2] >> 16);
+                               gpuSetCLUT    (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
                                 u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
  
                                 // This fixes Silent Hill running animation on loading screens:
@@ -478,7 +478,7 @@ void gpuSendPacketFunction(const int PRIM)
                                 //  alone, I don't want to slow rendering down too much. (TODO)
                                 //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
                                 // Strip lower 3 bits of each color and determine if lighting should be used:
-                               if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080)
+                               if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
                                         driver_idx |= Lighting;
                                 PS driver = gpuSpriteSpanDrivers[driver_idx];
                                 gpuDrawS(packet, driver);
@@ -494,7 +494,7 @@ void gpuSendPacketFunction(const int PRIM)
                         if (!gpu_unai.frameskip.skipGPU)
                         {
                                 NULL_GPU();
-                               gpu_unai.PacketBuffer.U4[2] = 0x00010001;
+                               gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00010001);
                                 PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
                                 gpuDrawT(packet, driver);
                                 gpu_unai.fb_dirty = true;
@@ -509,7 +509,7 @@ void gpuSendPacketFunction(const int PRIM)
                         if (!gpu_unai.frameskip.skipGPU)
                         {
                                 NULL_GPU();
-                               gpu_unai.PacketBuffer.U4[2] = 0x00080008;
+                               gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00080008);
                                 PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
                                 gpuDrawT(packet, driver);
                                 gpu_unai.fb_dirty = true;
@@ -524,14 +524,14 @@ void gpuSendPacketFunction(const int PRIM)
                         if (!gpu_unai.frameskip.skipGPU)
                         {
                                 NULL_GPU();
-                               gpu_unai.PacketBuffer.U4[3] = 0x00080008;
-                               gpuSetCLUT    (gpu_unai.PacketBuffer.U4[2] >> 16);
+                               gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00080008);
+                               gpuSetCLUT    (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
                                 u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
  
                                 //senquack - Only color 808080h-878787h allows skipping lighting calculation:
                                 //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
                                 // Strip lower 3 bits of each color and determine if lighting should be used:
-                               if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080)
+                               if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
                                         driver_idx |= Lighting;
                                 PS driver = gpuSpriteSpanDrivers[driver_idx];
                                 gpuDrawS(packet, driver);
@@ -547,7 +547,7 @@ void gpuSendPacketFunction(const int PRIM)
                         if (!gpu_unai.frameskip.skipGPU)
                         {
                                 NULL_GPU();
-                               gpu_unai.PacketBuffer.U4[2] = 0x00100010;
+                               gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00100010);
                                 PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
                                 gpuDrawT(packet, driver);
                                 gpu_unai.fb_dirty = true;
@@ -561,7 +561,7 @@ void gpuSendPacketFunction(const int PRIM)
                         /* Notaz 4bit sprites optimization */
                         if ((!gpu_unai.frameskip.skipGPU) && (!(gpu_unai.GPU_GP1&0x180)) && (!(gpu_unai.Masking|gpu_unai.PixelMSB)))
                         {
-                               gpuSetCLUT    (gpu_unai.PacketBuffer.U4[2] >> 16);
+                               gpuSetCLUT    (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
                                 gpuDrawS16(packet);
                                 gpu_unai.fb_dirty = true;
                                 break;
@@ -572,14 +572,14 @@ void gpuSendPacketFunction(const int PRIM)
                         if (!gpu_unai.frameskip.skipGPU)
                         {
                                 NULL_GPU();
-                               gpu_unai.PacketBuffer.U4[3] = 0x00100010;
-                               gpuSetCLUT    (gpu_unai.PacketBuffer.U4[2] >> 16);
+                               gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00100010);
+                               gpuSetCLUT    (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
                                 u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
  
                                 //senquack - Only color 808080h-878787h allows skipping lighting calculation:
                                 //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
                                 // Strip lower 3 bits of each color and determine if lighting should be used:
-                               if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080)
+                               if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
                                         driver_idx |= Lighting;
                                 PS driver = gpuSpriteSpanDrivers[driver_idx];
                                 gpuDrawS(packet, driver);
@@ -609,7 +609,7 @@ void gpuSendPacketFunction(const int PRIM)
                         DO_LOG(("gpuStoreImage(0x%x)\n",PRIM));
                         break;
                 case 0xE1 ... 0xE6: { // Draw settings
-                       gpuGP0Cmd_0xEx(gpu_unai, gpu_unai.PacketBuffer.U4[0]);
+                       gpuGP0Cmd_0xEx(gpu_unai, le32_to_u32(gpu_unai.PacketBuffer.U4[0]));
                 } break;
         }
  }
diff --git a/plugins/gpu_unai/gpu_inner.h b/plugins/gpu_unai/gpu_inner.h

index 4aab604..eb209ef 100644 (file)
--- a/plugins/gpu_unai/gpu_inner.h
+++ b/plugins/gpu_unai/gpu_inner.h
@@ -123,12 +123,8 @@ static inline u16 gpuGouraudColor15bpp(u32 r, u32 g, u32 b)
  //  rectangles) to use the same set of functions. Since tiles are always
  //  monochrome, they simply wouldn't use the extra set of 32 gouraud-shaded
  //  gpuPixelSpanFn functions (TODO?).
-//
-// NOTE: While the PS1 framebuffer is 16 bit, we use 8-bit pointers here,
-//       so that pDst can be incremented directly by 'incr' parameter
-//       without having to shift it before use.
  template<int CF>
-static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len)
+static le16_t* gpuPixelSpanFn(le16_t* pDst, uintptr_t data, ptrdiff_t incr, size_t len)
  {
         // Blend func can save an operation if it knows uSrc MSB is
         //  unset. For untextured prims, this is always true.
@@ -139,6 +135,9 @@ static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len)
         u32 r, g, b;
         s32 r_incr, g_incr, b_incr;
  
+       // Caller counts in bytes, we count in pixels
+       incr /= 2;
+
         if (CF_GOURAUD) {
                 gcPtr = (GouraudColor*)data;
                 r = gcPtr->r;  r_incr = gcPtr->r_incr;
@@ -152,15 +151,15 @@ static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len)
                 if (!CF_GOURAUD)
                 {   // NO GOURAUD
                         if (!CF_MASKCHECK && !CF_BLEND) {
-                               if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; }
-                               else            { *(u16*)pDst = col;          }
+                               if (CF_MASKSET) { *pDst = u16_to_le16(col | 0x8000); }
+                               else            { *pDst = u16_to_le16(col);          }
                         } else if (CF_MASKCHECK && !CF_BLEND) {
-                               if (!(*(u16*)pDst & 0x8000)) {
-                                       if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; }
-                                       else            { *(u16*)pDst = col;          }
+                               if (!(le16_raw(*pDst) & HTOLE16(0x8000))) {
+                                       if (CF_MASKSET) { *pDst = u16_to_le16(col | 0x8000); }
+                                       else            { *pDst = u16_to_le16(col);          }
                                 }
                         } else {
-                               uint_fast16_t uDst = *(u16*)pDst;
+                               uint_fast16_t uDst = le16_to_u16(*pDst);
                                 if (CF_MASKCHECK) { if (uDst & 0x8000) goto endpixel; }
  
                                 uint_fast16_t uSrc = col;
@@ -168,8 +167,8 @@ static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len)
                                 if (CF_BLEND)
                                         uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
  
-                               if (CF_MASKSET) { *(u16*)pDst = uSrc | 0x8000; }
-                               else            { *(u16*)pDst = uSrc;          }
+                               if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); }
+                               else            { *pDst = u16_to_le16(uSrc);          }
                         }
  
                 } else
@@ -177,16 +176,16 @@ static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len)
  
                         if (!CF_MASKCHECK && !CF_BLEND) {
                                 col = gpuGouraudColor15bpp(r, g, b);
-                               if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; }
-                               else            { *(u16*)pDst = col;          }
+                               if (CF_MASKSET) { *pDst = u16_to_le16(col | 0x8000); }
+                               else            { *pDst = u16_to_le16(col);          }
                         } else if (CF_MASKCHECK && !CF_BLEND) {
                                 col = gpuGouraudColor15bpp(r, g, b);
-                               if (!(*(u16*)pDst & 0x8000)) {
-                                       if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; }
-                                       else            { *(u16*)pDst = col;          }
+                               if (!(le16_raw(*pDst) & HTOLE16(0x8000))) {
+                                       if (CF_MASKSET) { *pDst = u16_to_le16(col | 0x8000); }
+                                       else            { *pDst = u16_to_le16(col);          }
                                 }
                         } else {
-                               uint_fast16_t uDst = *(u16*)pDst;
+                               uint_fast16_t uDst = le16_to_u16(*pDst);
                                 if (CF_MASKCHECK) { if (uDst & 0x8000) goto endpixel; }
                                 col = gpuGouraudColor15bpp(r, g, b);
  
@@ -199,8 +198,8 @@ static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len)
                                 if (CF_BLEND)
                                         uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
  
-                               if (CF_MASKSET) { *(u16*)pDst = uSrc | 0x8000; }
-                               else            { *(u16*)pDst = uSrc;          }
+                               if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); }
+                               else            { *pDst = u16_to_le16(uSrc);          }
                         }
                 }
  
@@ -228,7 +227,7 @@ endpixel:
         return pDst;
  }
  
-static u8* PixelSpanNULL(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len)
+static le16_t* PixelSpanNULL(le16_t* pDst, uintptr_t data, ptrdiff_t incr, size_t len)
  {
         #ifdef ENABLE_GPU_LOG_SUPPORT
                 fprintf(stdout,"PixelSpanNULL()\n");
@@ -238,7 +237,7 @@ static u8* PixelSpanNULL(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len)
  
  ///////////////////////////////////////////////////////////////////////////////
  //  PixelSpan (lines) innerloops driver
-typedef u8* (*PSD)(u8* dst, uintptr_t data, ptrdiff_t incr, size_t len);
+typedef le16_t* (*PSD)(le16_t* dst, uintptr_t data, ptrdiff_t incr, size_t len);
  
  const PSD gpuPixelSpanDrivers[64] =
  { 
@@ -282,14 +281,26 @@ const PSD gpuPixelSpanDrivers[64] =
  //  GPU Tiles innerloops generator
  
  template<int CF>
-static void gpuTileSpanFn(u16 *pDst, u32 count, u16 data)
+static void gpuTileSpanFn(le16_t *pDst, u32 count, u16 data)
  {
+       le16_t ldata;
+
         if (!CF_MASKCHECK && !CF_BLEND) {
-               if (CF_MASKSET) { data = data | 0x8000; }
-               do { *pDst++ = data; } while (--count);
+               if (CF_MASKSET)
+                       ldata = u16_to_le16(data | 0x8000);
+               else
+                       ldata = u16_to_le16(data);
+               do { *pDst++ = ldata; } while (--count);
         } else if (CF_MASKCHECK && !CF_BLEND) {
-               if (CF_MASKSET) { data = data | 0x8000; }
-               do { if (!(*pDst&0x8000)) { *pDst = data; } pDst++; } while (--count);
+               if (CF_MASKSET)
+                       ldata = u16_to_le16(data | 0x8000);
+               else
+                       ldata = u16_to_le16(data);
+               do {
+                       if (!(le16_raw(*pDst) & HTOLE16(0x8000)))
+                               *pDst = ldata;
+                       pDst++;
+               } while (--count);
         } else
         {
                 // Blend func can save an operation if it knows uSrc MSB is
@@ -299,16 +310,16 @@ static void gpuTileSpanFn(u16 *pDst, u32 count, u16 data)
                 uint_fast16_t uSrc, uDst;
                 do
                 {
-                       if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; }
-                       if (CF_MASKCHECK) { if (uDst&0x8000) goto endtile; }
+                       if (CF_MASKCHECK || CF_BLEND) { uDst = le16_to_u16(*pDst); }
+                       if (CF_MASKCHECK) if (uDst&0x8000) { goto endtile; }
  
                         uSrc = data;
  
                         if (CF_BLEND)
                                 uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
  
-                       if (CF_MASKSET) { *pDst = uSrc | 0x8000; }
-                       else            { *pDst = uSrc;          }
+                       if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); }
+                       else            { *pDst = u16_to_le16(uSrc);          }
  
                         //senquack - Did not apply "Silent Hill" mask-bit fix to here.
                         // It is hard to tell from scarce documentation available and
@@ -322,7 +333,7 @@ endtile:
         }
  }
  
-static void TileNULL(u16 *pDst, u32 count, u16 data)
+static void TileNULL(le16_t *pDst, u32 count, u16 data)
  {
         #ifdef ENABLE_GPU_LOG_SUPPORT
                 fprintf(stdout,"TileNULL()\n");
@@ -331,7 +342,7 @@ static void TileNULL(u16 *pDst, u32 count, u16 data)
  
  ///////////////////////////////////////////////////////////////////////////////
  //  Tiles innerloops driver
-typedef void (*PT)(u16 *pDst, u32 count, u16 data);
+typedef void (*PT)(le16_t *pDst, u32 count, u16 data);
  
  // Template instantiation helper macros
  #define TI(cf) gpuTileSpanFn<(cf)>
@@ -355,7 +366,7 @@ const PT gpuTileSpanDrivers[32] = {
  //  GPU Sprites innerloops generator
  
  template<int CF>
-static void gpuSpriteSpanFn(u16 *pDst, u32 count, u8* pTxt, u32 u0)
+static void gpuSpriteSpanFn(le16_t *pDst, u32 count, u8* pTxt, u32 u0)
  {
         // Blend func can save an operation if it knows uSrc MSB is unset.
         //  Untextured prims can always skip (source color always comes with MSB=0).
@@ -378,22 +389,22 @@ static void gpuSpriteSpanFn(u16 *pDst, u32 count, u8* pTxt, u32 u0)
                 u0_mask <<= 1;
         }
  
-       const u16 *CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA;
+       const le16_t *CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA;
  
         do
         {
-               if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; }
+               if (CF_MASKCHECK || CF_BLEND) { uDst = le16_to_u16(*pDst); }
                 if (CF_MASKCHECK) if (uDst&0x8000) { goto endsprite; }
  
                 if (CF_TEXTMODE==1) {  //  4bpp (CLUT)
                         u8 rgb = pTxt[(u0 & u0_mask)>>1];
-                       uSrc = CBA_[(rgb>>((u0&1)<<2))&0xf];
+                       uSrc = le16_to_u16(CBA_[(rgb>>((u0&1)<<2))&0xf]);
                 }
                 if (CF_TEXTMODE==2) {  //  8bpp (CLUT)
-                       uSrc = CBA_[pTxt[u0 & u0_mask]];
+                       uSrc = le16_to_u16(CBA_[pTxt[u0 & u0_mask]]);
                 }
                 if (CF_TEXTMODE==3) {  // 16bpp
-                       uSrc = *(u16*)(&pTxt[u0 & u0_mask]);
+                       uSrc = le16_to_u16(*(le16_t*)(&pTxt[u0 & u0_mask]));
                 }
  
                 if (!uSrc) goto endsprite;
@@ -410,9 +421,9 @@ static void gpuSpriteSpanFn(u16 *pDst, u32 count, u8* pTxt, u32 u0)
                 if (CF_BLEND && should_blend)
                         uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
  
-               if (CF_MASKSET)                                    { *pDst = uSrc | 0x8000; }
-               else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = uSrc | srcMSB; }
-               else                                               { *pDst = uSrc;          }
+               if (CF_MASKSET)                                    { *pDst = u16_to_le16(uSrc | 0x8000); }
+               else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = u16_to_le16(uSrc | srcMSB); }
+               else                                               { *pDst = u16_to_le16(uSrc);          }
  
  endsprite:
                 u0 += (CF_TEXTMODE==3) ? 2 : 1;
@@ -421,7 +432,7 @@ endsprite:
         while (--count);
  }
  
-static void SpriteNULL(u16 *pDst, u32 count, u8* pTxt, u32 u0)
+static void SpriteNULL(le16_t *pDst, u32 count, u8* pTxt, u32 u0)
  {
         #ifdef ENABLE_GPU_LOG_SUPPORT
                 fprintf(stdout,"SpriteNULL()\n");
@@ -432,7 +443,7 @@ static void SpriteNULL(u16 *pDst, u32 count, u8* pTxt, u32 u0)
  
  ///////////////////////////////////////////////////////////////////////////////
  //  Sprite innerloops driver
-typedef void (*PS)(u16 *pDst, u32 count, u8* pTxt, u32 u0);
+typedef void (*PS)(le16_t *pDst, u32 count, u8* pTxt, u32 u0);
  
  // Template instantiation helper macros
  #define TI(cf) gpuSpriteSpanFn<(cf)>
@@ -485,7 +496,7 @@ const PS gpuSpriteSpanDrivers[256] = {
  //             relevant blend/light headers.
  // (see README_senquack.txt)
  template<int CF>
-static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count)
+static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count)
  {
         // Blend func can save an operation if it knows uSrc MSB is unset.
         //  Untextured prims can always skip this (src color MSB is always 0).
@@ -509,7 +520,7 @@ static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count)
                                 //  gravestone text at end of Medieval intro sequence. -senquack
                                 //if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) { goto endpolynotextnogou; } }
  
-                               if (CF_BLEND || CF_MASKCHECK) uDst = *pDst;
+                               if (CF_BLEND || CF_MASKCHECK) uDst = le16_to_u16(*pDst);
                                 if (CF_MASKCHECK) { if (uDst&0x8000) { goto endpolynotextnogou; } }
  
                                 uSrc = pix15;
@@ -517,8 +528,8 @@ static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count)
                                 if (CF_BLEND)
                                         uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
  
-                               if (CF_MASKSET) { *pDst = uSrc | 0x8000; }
-                               else            { *pDst = uSrc;          }
+                               if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); }
+                               else            { *pDst = u16_to_le16(uSrc);          }
  
  endpolynotextnogou:
                                 pDst++;
@@ -536,7 +547,7 @@ endpolynotextnogou:
                                 // See note in above loop regarding CF_BLITMASK
                                 //if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) goto endpolynotextgou; }
  
-                               if (CF_BLEND || CF_MASKCHECK) uDst = *pDst;
+                               if (CF_BLEND || CF_MASKCHECK) uDst = le16_to_u16(*pDst);
                                 if (CF_MASKCHECK) { if (uDst&0x8000) goto endpolynotextgou; }
  
                                 if (CF_DITHER) {
@@ -555,8 +566,8 @@ endpolynotextnogou:
                                                 uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
                                 }
  
-                               if (CF_MASKSET) { *pDst = uSrc | 0x8000; }
-                               else            { *pDst = uSrc;          }
+                               if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); }
+                               else            { *pDst = u16_to_le16(uSrc);          }
  
  endpolynotextgou:
                                 pDst++;
@@ -578,8 +589,8 @@ endpolynotextgou:
                 u32 l_u = gpu_unai.u & l_u_msk;   u32 l_v = gpu_unai.v & l_v_msk;
                 s32 l_u_inc = gpu_unai.u_inc;     s32 l_v_inc = gpu_unai.v_inc;
  
-               const u16* TBA_ = gpu_unai.TBA;
-               const u16* CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA;
+               const le16_t* TBA_ = gpu_unai.TBA;
+               const le16_t* CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA;
  
                 u8 r5, g5, b5;
                 u8 r8, g8, b8;
@@ -606,7 +617,7 @@ endpolynotextgou:
                 do
                 {
                         if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) goto endpolytext; }
-                       if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; }
+                       if (CF_MASKCHECK || CF_BLEND) { uDst = le16_to_u16(*pDst); }
                         if (CF_MASKCHECK) if (uDst&0x8000) { goto endpolytext; }
  
                         //senquack - adapted to work with new 22.10 fixed point routines:
@@ -615,15 +626,15 @@ endpolynotextgou:
                                 u32 tu=(l_u>>10);
                                 u32 tv=(l_v<<1)&(0xff<<11);
                                 u8 rgb=((u8*)TBA_)[tv+(tu>>1)];
-                               uSrc=CBA_[(rgb>>((tu&1)<<2))&0xf];
+                               uSrc=le16_to_u16(CBA_[(rgb>>((tu&1)<<2))&0xf]);
                                 if (!uSrc) goto endpolytext;
                         }
                         if (CF_TEXTMODE==2) {  //  8bpp (CLUT)
-                               uSrc = CBA_[(((u8*)TBA_)[(l_u>>10)+((l_v<<1)&(0xff<<11))])];
+                               uSrc = le16_to_u16(CBA_[(((u8*)TBA_)[(l_u>>10)+((l_v<<1)&(0xff<<11))])]);
                                 if (!uSrc) goto endpolytext;
                         }
                         if (CF_TEXTMODE==3) {  // 16bpp
-                               uSrc = TBA_[(l_u>>10)+((l_v)&(0xff<<10))];
+                               uSrc = le16_to_u16(TBA_[(l_u>>10)+((l_v)&(0xff<<10))]);
                                 if (!uSrc) goto endpolytext;
                         }
  
@@ -661,9 +672,9 @@ endpolynotextgou:
                                         uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
                         }
  
-                       if (CF_MASKSET)                                    { *pDst = uSrc | 0x8000; }
-                       else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = uSrc | srcMSB; }
-                       else                                               { *pDst = uSrc;          }
+                       if (CF_MASKSET)                                    { *pDst = u16_to_le16(uSrc | 0x8000); }
+                       else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = u16_to_le16(uSrc | srcMSB); }
+                       else                                               { *pDst = u16_to_le16(uSrc);          }
  endpolytext:
                         pDst++;
                         l_u = (l_u + l_u_inc) & l_u_msk;
@@ -674,7 +685,7 @@ endpolytext:
         }
  }
  
-static void PolyNULL(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count)
+static void PolyNULL(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count)
  {
         #ifdef ENABLE_GPU_LOG_SUPPORT
                 fprintf(stdout,"PolyNULL()\n");
@@ -683,7 +694,7 @@ static void PolyNULL(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count)
  
  ///////////////////////////////////////////////////////////////////////////////
  //  Polygon innerloops driver
-typedef void (*PP)(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count);
+typedef void (*PP)(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count);
  
  // Template instantiation helper macros
  #define TI(cf) gpuPolySpanFn<(cf)>
diff --git a/plugins/gpu_unai/gpu_inner_quantization.h b/plugins/gpu_unai/gpu_inner_quantization.h

index 0e7e3e8..4368ddb 100644 (file)
--- a/plugins/gpu_unai/gpu_inner_quantization.h
+++ b/plugins/gpu_unai/gpu_inner_quantization.h
@@ -85,11 +85,11 @@ static void SetupDitheringConstants()
  // Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
  ////////////////////////////////////////////////////////////////////////////////
  template <int DITHER>
-GPU_INLINE u16 gpuColorQuantization24(u32 uSrc24, const u16 *pDst)
+GPU_INLINE u16 gpuColorQuantization24(u32 uSrc24, const le16_t *pDst)
  {
         if (DITHER)
         {
-               u16 fbpos  = (u32)(pDst - gpu_unai.vram);
+               u16 fbpos  = (uintptr_t)pDst - (uintptr_t)gpu_unai.vram;
                 u16 offset = ((fbpos & (0x7 << 10)) >> 7) | (fbpos & 0x7);
  
                 //clean overflow flags and add
diff --git a/plugins/gpu_unai/gpu_raster_image.h b/plugins/gpu_unai/gpu_raster_image.h

index 2d34b34..909ca39 100644 (file)
--- a/plugins/gpu_unai/gpu_raster_image.h
+++ b/plugins/gpu_unai/gpu_raster_image.h
@@ -26,10 +26,10 @@
  void gpuLoadImage(PtrUnion packet)
  {
         u16 x0, y0, w0, h0;
-       x0 = packet.U2[2] & 1023;
-       y0 = packet.U2[3] & 511;
-       w0 = packet.U2[4];
-       h0 = packet.U2[5];
+       x0 = le16_to_u16(packet.U2[2]) & 1023;
+       y0 = le16_to_u16(packet.U2[3]) & 511;
+       w0 = le16_to_u16(packet.U2[4]);
+       h0 = le16_to_u16(packet.U2[5]);
  
         if ((y0 + h0) > FRAME_HEIGHT)
         {
@@ -42,7 +42,7 @@ void gpuLoadImage(PtrUnion packet)
         gpu_unai.dma.py = 0;
         gpu_unai.dma.x_end = w0;
         gpu_unai.dma.y_end = h0;
-       gpu_unai.dma.pvram = &((u16*)gpu_unai.vram)[x0+(y0*1024)];
+       gpu_unai.dma.pvram = &gpu_unai.vram[x0+(y0*1024)];
  
         gpu_unai.GPU_GP1 |= 0x08000000;
  }
@@ -53,10 +53,10 @@ void gpuLoadImage(PtrUnion packet)
  void gpuStoreImage(PtrUnion packet)
  {
         u16 x0, y0, w0, h0;
-       x0 = packet.U2[2] & 1023;
-       y0 = packet.U2[3] & 511;
-       w0 = packet.U2[4];
-       h0 = packet.U2[5];
+       x0 = le16_to_u16(packet.U2[2]) & 1023;
+       y0 = le16_to_u16(packet.U2[3]) & 511;
+       w0 = le16_to_u16(packet.U2[4]);
+       h0 = le16_to_u16(packet.U2[5]);
  
         if ((y0 + h0) > FRAME_HEIGHT)
         {
@@ -68,7 +68,7 @@ void gpuStoreImage(PtrUnion packet)
         gpu_unai.dma.py = 0;
         gpu_unai.dma.x_end = w0;
         gpu_unai.dma.y_end = h0;
-       gpu_unai.dma.pvram = &((u16*)gpu_unai.vram)[x0+(y0*1024)];
+       gpu_unai.dma.pvram = &gpu_unai.vram[x0+(y0*1024)];
         
         gpu_unai.GPU_GP1 |= 0x08000000;
  }
@@ -78,12 +78,12 @@ void gpuMoveImage(PtrUnion packet)
  {
         u32 x0, y0, x1, y1;
         s32 w0, h0;
-       x0 = packet.U2[2] & 1023;
-       y0 = packet.U2[3] & 511;
-       x1 = packet.U2[4] & 1023;
-       y1 = packet.U2[5] & 511;
-       w0 = packet.U2[6];
-       h0 = packet.U2[7];
+       x0 = le16_to_u16(packet.U2[2]) & 1023;
+       y0 = le16_to_u16(packet.U2[3]) & 511;
+       x1 = le16_to_u16(packet.U2[4]) & 1023;
+       y1 = le16_to_u16(packet.U2[5]) & 511;
+       w0 = le16_to_u16(packet.U2[6]);
+       h0 = le16_to_u16(packet.U2[7]);
  
         if( (x0==x1) && (y0==y1) ) return;
         if ((w0<=0) || (h0<=0)) return;
@@ -94,7 +94,7 @@ void gpuMoveImage(PtrUnion packet)
         
         if (((y0+h0)>512)||((x0+w0)>1024)||((y1+h0)>512)||((x1+w0)>1024))
         {
-               u16 *psxVuw=gpu_unai.vram;
+               le16_t *psxVuw=gpu_unai.vram;
                 s32 i,j;
             for(j=0;j<h0;j++)
                  for(i=0;i<w0;i++)
@@ -103,8 +103,8 @@ void gpuMoveImage(PtrUnion packet)
         }
         else if ((x0&1)||(x1&1))
         {
-               u16 *lpDst, *lpSrc;
-               lpDst = lpSrc = (u16*)gpu_unai.vram;
+               le16_t *lpDst, *lpSrc;
+               lpDst = lpSrc = gpu_unai.vram;
                 lpSrc += FRAME_OFFSET(x0, y0);
                 lpDst += FRAME_OFFSET(x1, y1);
                 x1 = FRAME_WIDTH - w0;
@@ -117,8 +117,8 @@ void gpuMoveImage(PtrUnion packet)
         }
         else
         {
-               u32 *lpDst, *lpSrc;
-               lpDst = lpSrc = (u32*)(void*)gpu_unai.vram;
+               le32_t *lpDst, *lpSrc;
+               lpDst = lpSrc = (le32_t *)gpu_unai.vram;
                 lpSrc += ((FRAME_OFFSET(x0, y0))>>1);
                 lpDst += ((FRAME_OFFSET(x1, y1))>>1);
                 if (w0&1)
@@ -127,7 +127,7 @@ void gpuMoveImage(PtrUnion packet)
                         w0>>=1;
                         if (!w0) {
                                 do {
-                                       *((u16*)lpDst) = *((u16*)lpSrc);
+                                       *((le16_t*)lpDst) = *((le16_t*)lpSrc);
                                         lpDst += x1;
                                         lpSrc += x1;
                                 } while (--h0);
@@ -135,7 +135,7 @@ void gpuMoveImage(PtrUnion packet)
                         do {
                                 x0=w0;
                                 do { *lpDst++ = *lpSrc++; } while (--x0);
-                               *((u16*)lpDst) = *((u16*)lpSrc);
+                               *((le16_t*)lpDst) = *((le16_t*)lpSrc);
                                 lpDst += x1;
                                 lpSrc += x1;
                         } while (--h0);
@@ -157,11 +157,11 @@ void gpuMoveImage(PtrUnion packet)
  void gpuClearImage(PtrUnion packet)
  {
         s32   x0, y0, w0, h0;
-       x0 = packet.S2[2];
-       y0 = packet.S2[3];
-       w0 = packet.S2[4] & 0x3ff;
-       h0 = packet.S2[5] & 0x3ff;
-        
+       x0 = le16_to_s16(packet.U2[2]);
+       y0 = le16_to_s16(packet.U2[3]);
+       w0 = le16_to_s16(packet.U2[4]) & 0x3ff;
+       h0 = le16_to_s16(packet.U2[5]) & 0x3ff;
+
         w0 += x0;
         if (x0 < 0) x0 = 0;
         if (w0 > FRAME_WIDTH) w0 = FRAME_WIDTH;
@@ -176,11 +176,11 @@ void gpuClearImage(PtrUnion packet)
         #ifdef ENABLE_GPU_LOG_SUPPORT
                 fprintf(stdout,"gpuClearImage(x0=%d,y0=%d,w0=%d,h0=%d)\n",x0,y0,w0,h0);
         #endif
-       
+
         if (x0&1)
         {
-               u16* pixel = (u16*)gpu_unai.vram + FRAME_OFFSET(x0, y0);
-               u16 rgb = GPU_RGB16(packet.U4[0]);
+               le16_t* pixel = gpu_unai.vram + FRAME_OFFSET(x0, y0);
+               le16_t rgb = u16_to_le16(GPU_RGB16(le32_to_u32(packet.U4[0])));
                 y0 = FRAME_WIDTH - w0;
                 do {
                         x0=w0;
@@ -190,9 +190,9 @@ void gpuClearImage(PtrUnion packet)
         }
         else
         {
-               u32* pixel = (u32*)gpu_unai.vram + ((FRAME_OFFSET(x0, y0))>>1);
-               u32 rgb = GPU_RGB16(packet.U4[0]);
-               rgb |= (rgb<<16);
+               le32_t* pixel = (le32_t*)gpu_unai.vram + ((FRAME_OFFSET(x0, y0))>>1);
+               u32 _rgb = GPU_RGB16(le32_to_u32(packet.U4[0]));
+               le32_t rgb = u32_to_le32(_rgb | (_rgb << 16));
                 if (w0&1)
                 {
                         y0 = (FRAME_WIDTH - w0 +1)>>1;
@@ -200,7 +200,7 @@ void gpuClearImage(PtrUnion packet)
                         do {
                                 x0=w0;
                                 do { *pixel++ = rgb; } while (--x0);
-                               *((u16*)pixel) = (u16)rgb;
+                               *((u16*)pixel) = (u16)le32_raw(rgb);
                                 pixel += y0;
                         } while (--h0);
                 }
diff --git a/plugins/gpu_unai/gpu_raster_line.h b/plugins/gpu_unai/gpu_raster_line.h

index 2a7b422..a338f97 100644 (file)
--- a/plugins/gpu_unai/gpu_raster_line.h
+++ b/plugins/gpu_unai/gpu_raster_line.h
@@ -74,10 +74,10 @@ void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver)
         const int xmax = gpu_unai.DrawingArea[2] - 1;
         const int ymax = gpu_unai.DrawingArea[3] - 1;
  
-       x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_unai.DrawingOffset[0];
-       y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_unai.DrawingOffset[1];
-       x1 = GPU_EXPANDSIGN(packet.S2[4]) + gpu_unai.DrawingOffset[0];
-       y1 = GPU_EXPANDSIGN(packet.S2[5]) + gpu_unai.DrawingOffset[1];
+       x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2])) + gpu_unai.DrawingOffset[0];
+       y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3])) + gpu_unai.DrawingOffset[1];
+       x1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[4])) + gpu_unai.DrawingOffset[0];
+       y1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[5])) + gpu_unai.DrawingOffset[1];
  
         // Always draw top to bottom, so ensure y0 <= y1
         if (y0 > y1) {
@@ -177,12 +177,9 @@ void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver)
             err_adjdown;   // Subract this from err_term after drawing longer run
  
         // Color to draw with (16 bits, highest of which is unset mask bit)
-       uintptr_t col16 = GPU_RGB16(packet.U4[0]);
+       uintptr_t col16 = GPU_RGB16(le32_to_u32(packet.U4[0]));
  
-       // We use u8 pointers even though PS1 has u16 framebuffer.
-       //  This allows pixel-drawing functions to increment dst pointer
-       //  directly by the passed 'incr' value, not having to shift it first.
-       u8 *dst = (u8*)gpu_unai.vram + y0 * dst_stride + x0 * dst_depth;
+       le16_t *dst = gpu_unai.vram + (y0 * dst_stride + x0 * dst_depth) / FRAME_BYTES_PER_PIXEL;
  
         // SPECIAL CASE: Vertical line
         if (dx == 0) {
@@ -278,7 +275,7 @@ void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver)
  
         // First run of pixels
         dst = gpuPixelSpanDriver(dst, col16, incr_major, start_length);
-       dst += incr_minor;
+       dst += incr_minor / 2;
  
         // Middle runs of pixels
         while (--minor > 0) {
@@ -292,7 +289,7 @@ void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver)
                 }
  
                 dst = gpuPixelSpanDriver(dst, col16, incr_major, run_length);
-               dst += incr_minor;
+               dst += incr_minor / 2;
         }
  
         // Final run of pixels
@@ -321,13 +318,13 @@ void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver)
         const int xmax = gpu_unai.DrawingArea[2] - 1;
         const int ymax = gpu_unai.DrawingArea[3] - 1;
  
-       x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_unai.DrawingOffset[0];
-       y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_unai.DrawingOffset[1];
-       x1 = GPU_EXPANDSIGN(packet.S2[6]) + gpu_unai.DrawingOffset[0];
-       y1 = GPU_EXPANDSIGN(packet.S2[7]) + gpu_unai.DrawingOffset[1];
+       x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2])) + gpu_unai.DrawingOffset[0];
+       y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3])) + gpu_unai.DrawingOffset[1];
+       x1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[6])) + gpu_unai.DrawingOffset[0];
+       y1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[7])) + gpu_unai.DrawingOffset[1];
  
-       u32 col0 = packet.U4[0];
-       u32 col1 = packet.U4[2];
+       u32 col0 = le32_to_u32(packet.U4[0]);
+       u32 col1 = le32_to_u32(packet.U4[2]);
  
         // Always draw top to bottom, so ensure y0 <= y1
         if (y0 > y1) {
@@ -519,10 +516,7 @@ void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver)
         gcol.g = g0 << GPU_GOURAUD_FIXED_BITS;
         gcol.b = b0 << GPU_GOURAUD_FIXED_BITS;
  
-       // We use u8 pointers even though PS1 has u16 framebuffer.
-       //  This allows pixel-drawing functions to increment dst pointer
-       //  directly by the passed 'incr' value, not having to shift it first.
-       u8 *dst = (u8*)gpu_unai.vram + y0 * dst_stride + x0 * dst_depth;
+       le16_t *dst = gpu_unai.vram + (y0 * dst_stride + x0 * dst_depth) / FRAME_BYTES_PER_PIXEL;
  
         // SPECIAL CASE: Vertical line
         if (dx == 0) {
@@ -547,7 +541,7 @@ void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver)
                         if (db) gcol.b_incr /= dy;
                 }
  #endif
-               
+
                 gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride, dy+1);
                 return;
         }
@@ -696,7 +690,7 @@ void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver)
  
         // First run of pixels
         dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, start_length);
-       dst += incr_minor;
+       dst += incr_minor / 2;
  
         // Middle runs of pixels
         while (--minor > 0) {
@@ -710,7 +704,7 @@ void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver)
                 }
  
                 dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, run_length);
-               dst += incr_minor;
+               dst += incr_minor / 2;
         }
  
         // Final run of pixels
diff --git a/plugins/gpu_unai/gpu_raster_polygon.h b/plugins/gpu_unai/gpu_raster_polygon.h

index b30286d..ff6dc00 100644 (file)
--- a/plugins/gpu_unai/gpu_raster_polygon.h
+++ b/plugins/gpu_unai/gpu_raster_polygon.h
@@ -31,11 +31,19 @@
  struct PolyVertex {
         s32 x, y; // Sign-extended 11-bit X,Y coords
         union {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+               struct { u8 pad[2], v, u; } tex; // Texture coords (if used)
+#else
                 struct { u8 u, v, pad[2]; } tex; // Texture coords (if used)
+#endif
                 u32 tex_word;
         };
         union {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+               struct { u8 pad, b, g, r; } col; // 24-bit RGB color (if used)
+#else
                 struct { u8 r, g, b, pad; } col; // 24-bit RGB color (if used)
+#endif
                 u32 col_word;
         };
  };
@@ -68,30 +76,30 @@ static void polyInitVertexBuffer(PolyVertex *vbuf, const PtrUnion packet, PolyTy
                 vert_stride++;
  
         int num_verts = (is_quad) ? 4 : 3;
-       u32 *ptr;
+       le32_t *ptr;
  
         // X,Y coords, adjusted by draw offsets
         s32 x_off = gpu_unai.DrawingOffset[0];
         s32 y_off = gpu_unai.DrawingOffset[1];
         ptr = &packet.U4[1];
         for (int i=0;  i < num_verts; ++i, ptr += vert_stride) {
-               s16* coord_ptr = (s16*)ptr;
-               vbuf[i].x = GPU_EXPANDSIGN(coord_ptr[0]) + x_off;
-               vbuf[i].y = GPU_EXPANDSIGN(coord_ptr[1]) + y_off;
+               u32 coords = le32_to_u32(*ptr);
+               vbuf[i].x = GPU_EXPANDSIGN((s16)coords) + x_off;
+               vbuf[i].y = GPU_EXPANDSIGN((s16)(coords >> 16)) + y_off;
         }
  
         // U,V texture coords (if applicable)
         if (texturing) {
                 ptr = &packet.U4[2];
                 for (int i=0;  i < num_verts; ++i, ptr += vert_stride)
-                       vbuf[i].tex_word = *ptr;
+                       vbuf[i].tex_word = le32_to_u32(*ptr);
         }
  
         // Colors (if applicable)
         if (gouraud) {
                 ptr = &packet.U4[0];
                 for (int i=0;  i < num_verts; ++i, ptr += vert_stride)
-                       vbuf[i].col_word = *ptr;
+                       vbuf[i].col_word = le32_to_u32(*ptr);
         }
  }
  
@@ -218,7 +226,7 @@ gpuDrawPolyF - Flat-shaded, untextured poly
  void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
  {
         // Set up bgr555 color to be used across calls in inner driver
-       gpu_unai.PixelData = GPU_RGB16(packet.U4[0]);
+       gpu_unai.PixelData = GPU_RGB16(le32_to_u32(packet.U4[0]));
  
         PolyVertex vbuf[4];
         polyInitVertexBuffer(vbuf, packet, POLYTYPE_F, is_quad);
@@ -342,7 +350,7 @@ void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad
                         if (loop1 <= 0)
                                 continue;
  
-                       u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
+                       le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
                         int li=gpu_unai.ilace_mask;
                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
@@ -652,7 +660,7 @@ void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua
                         if (loop1 <= 0)
                                 continue;
  
-                       u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
+                       le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
                         int li=gpu_unai.ilace_mask;
                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
@@ -997,7 +1005,7 @@ void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad
                         if (loop1 <= 0)
                                 continue;
  
-                       u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
+                       le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
                         int li=gpu_unai.ilace_mask;
                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
@@ -1392,7 +1400,7 @@ void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua
                         if (loop1 <= 0)
                                 continue;
  
-                       u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
+                       le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
                         int li=gpu_unai.ilace_mask;
                         int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
                         int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
diff --git a/plugins/gpu_unai/gpu_raster_sprite.h b/plugins/gpu_unai/gpu_raster_sprite.h

index 91f7bc0..ea4e82f 100644 (file)
--- a/plugins/gpu_unai/gpu_raster_sprite.h
+++ b/plugins/gpu_unai/gpu_raster_sprite.h
@@ -32,11 +32,11 @@ void gpuDrawS(PtrUnion packet, const PS gpuSpriteSpanDriver)
         //NOTE: Must 11-bit sign-extend the whole sum here, not just packet X/Y,
         // or sprites in 1st level of SkullMonkeys disappear when walking right.
         // This now matches behavior of Mednafen and PCSX Rearmed's gpu_neon:
-       x0 = GPU_EXPANDSIGN(packet.S2[2] + gpu_unai.DrawingOffset[0]);
-       y0 = GPU_EXPANDSIGN(packet.S2[3] + gpu_unai.DrawingOffset[1]);
+       x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2]) + gpu_unai.DrawingOffset[0]);
+       y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3]) + gpu_unai.DrawingOffset[1]);
  
-       u32 w = packet.U2[6] & 0x3ff; // Max width is 1023
-       u32 h = packet.U2[7] & 0x1ff; // Max height is 511
+       u32 w = le16_to_u16(packet.U2[6]) & 0x3ff; // Max width is 1023
+       u32 h = le16_to_u16(packet.U2[7]) & 0x1ff; // Max height is 511
         x1 = x0 + w;
         y1 = y0 + h;
  
@@ -63,7 +63,7 @@ void gpuDrawS(PtrUnion packet, const PS gpuSpriteSpanDriver)
         gpu_unai.g5 = packet.U1[1] >> 3;
         gpu_unai.b5 = packet.U1[2] >> 3;
  
-       u16 *Pixel = &((u16*)gpu_unai.vram)[FRAME_OFFSET(x0, y0)];
+       le16_t *Pixel = &gpu_unai.vram[FRAME_OFFSET(x0, y0)];
         const int li=gpu_unai.ilace_mask;
         const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
         const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
@@ -98,8 +98,8 @@ void gpuDrawS16(PtrUnion packet)
         //NOTE: Must 11-bit sign-extend the whole sum here, not just packet X/Y,
         // or sprites in 1st level of SkullMonkeys disappear when walking right.
         // This now matches behavior of Mednafen and PCSX Rearmed's gpu_neon:
-       x0 = GPU_EXPANDSIGN(packet.S2[2] + gpu_unai.DrawingOffset[0]);
-       y0 = GPU_EXPANDSIGN(packet.S2[3] + gpu_unai.DrawingOffset[1]);
+       x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2]) + gpu_unai.DrawingOffset[0]);
+       y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3]) + gpu_unai.DrawingOffset[1]);
  
         xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
         ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
@@ -109,7 +109,7 @@ void gpuDrawS16(PtrUnion packet)
         if (x0 > xmax - 16 || x0 < xmin ||
             ((u0 | v0) & 15) || !(gpu_unai.TextureWindow[2] & gpu_unai.TextureWindow[3] & 8)) {
                 // send corner cases to general handler
-               packet.U4[3] = 0x00100010;
+               packet.U4[3] = u32_to_le32(0x00100010);
                 gpuDrawS(packet, gpuSpriteSpanFn<0x20>);
                 return;
         }
@@ -133,11 +133,11 @@ void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver)
         s32 x0, x1, y0, y1;
  
         // This now matches behavior of Mednafen and PCSX Rearmed's gpu_neon:
-       x0 = GPU_EXPANDSIGN(packet.S2[2] + gpu_unai.DrawingOffset[0]);
-       y0 = GPU_EXPANDSIGN(packet.S2[3] + gpu_unai.DrawingOffset[1]);
+       x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2]) + gpu_unai.DrawingOffset[0]);
+       y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3]) + gpu_unai.DrawingOffset[1]);
  
-       u32 w = packet.U2[4] & 0x3ff; // Max width is 1023
-       u32 h = packet.U2[5] & 0x1ff; // Max height is 511
+       u32 w = le16_to_u16(packet.U2[4]) & 0x3ff; // Max width is 1023
+       u32 h = le16_to_u16(packet.U2[5]) & 0x1ff; // Max height is 511
         x1 = x0 + w;
         y1 = y0 + h;
  
@@ -154,8 +154,8 @@ void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver)
         x1 -= x0;
         if (x1 <= 0) return;
  
-       const u16 Data = GPU_RGB16(packet.U4[0]);
-       u16 *Pixel = &((u16*)gpu_unai.vram)[FRAME_OFFSET(x0, y0)];
+       const u16 Data = GPU_RGB16(le32_to_u32(packet.U4[0]));
+       le16_t *Pixel = &gpu_unai.vram[FRAME_OFFSET(x0, y0)];
         const int li=gpu_unai.ilace_mask;
         const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
         const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
diff --git a/plugins/gpu_unai/gpu_unai.h b/plugins/gpu_unai/gpu_unai.h

index 6886eb8..7aa0667 100644 (file)
--- a/plugins/gpu_unai/gpu_unai.h
+++ b/plugins/gpu_unai/gpu_unai.h
@@ -54,25 +54,67 @@
  #define s32 int32_t
  #define s64 int64_t
  
+typedef struct {
+        u32 v;
+} le32_t;
+
+typedef struct {
+        u16 v;
+} le16_t;
+
+static inline u32 le32_to_u32(le32_t le)
+{
+        return LE32TOH(le.v);
+}
+
+static inline s32 le32_to_s32(le32_t le)
+{
+        return (int32_t) LE32TOH(le.v);
+}
+
+static inline u32 le32_raw(le32_t le)
+{
+       return le.v;
+}
+
+static inline le32_t u32_to_le32(u32 u)
+{
+       return (le32_t){ .v = HTOLE32(u) };
+}
+
+static inline u16 le16_to_u16(le16_t le)
+{
+        return LE16TOH(le.v);
+}
+
+static inline s16 le16_to_s16(le16_t le)
+{
+        return (int16_t) LE16TOH(le.v);
+}
+
+static inline u16 le16_raw(le16_t le)
+{
+       return le.v;
+}
+
+static inline le16_t u16_to_le16(u16 u)
+{
+       return (le16_t){ .v = HTOLE16(u) };
+}
+
  union PtrUnion
  {
-       u32  *U4;
-       s32  *S4;
-       u16  *U2;
-       s16  *S2;
+       le32_t  *U4;
+       le16_t  *U2;
         u8   *U1;
-       s8   *S1;
         void *ptr;
  };
  
  union GPUPacket
  {
-       u32 U4[16];
-       s32 S4[16];
-       u16 U2[32];
-       s16 S2[32];
+       le32_t U4[16];
+       le16_t U2[32];
         u8  U1[64];
-       s8  S1[64];
  };
  
  template<class T> static inline void SwapValues(T &x, T &y)
@@ -136,7 +178,7 @@ static inline s32 GPU_DIV(s32 rs, s32 rt)
  struct gpu_unai_t {
         u32 GPU_GP1;
         GPUPacket PacketBuffer;
-       u16 *vram;
+       le16_t *vram;
  
  #ifdef USE_GPULIB
         u16 *downscale_vram;
@@ -164,7 +206,7 @@ struct gpu_unai_t {
         struct {
                 s32  px,py;
                 s32  x_end,y_end;
-               u16* pvram;
+               le16_t* pvram;
                 u32 *last_dma;     // Last dma pointer
                 bool FrameToRead;  // Load image in progress
                 bool FrameToWrite; // Store image in progress
@@ -197,8 +239,8 @@ struct gpu_unai_t {
         s16 DrawingOffset[2];  // [0] : Drawing offset X (signed)
                                // [1] : Drawing offset Y (signed)
  
-       u16* TBA;              // Ptr to current texture in VRAM
-       u16* CBA;              // Ptr to current CLUT in VRAM
+       le16_t* TBA;              // Ptr to current texture in VRAM
+       le16_t* CBA;              // Ptr to current CLUT in VRAM
  
         ////////////////////////////////////////////////////////////////////////////
         //  Inner Loop parameters
diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp

index 710a713..453cc7a 100644 (file)
--- a/plugins/gpu_unai/gpulib_if.cpp
+++ b/plugins/gpu_unai/gpulib_if.cpp
@@ -70,7 +70,7 @@
  
  #define DOWNSCALE_VRAM_SIZE (1024 * 512 * 2 * 2 + 4096)
  
-INLINE void scale_640_to_320(uint16_t *dest, const uint16_t *src, bool isRGB24) {
+INLINE void scale_640_to_320(uint16_t *dest, const le16_t *src, bool isRGB24) {
    size_t uCount = 320;
  
    if(isRGB24) {
@@ -84,17 +84,17 @@ INLINE void scale_640_to_320(uint16_t *dest, const uint16_t *src, bool isRGB24)
        src8 += 4;
      } while(--uCount);
    } else {
-    const uint16_t* src16 = src;
+    const le16_t *src16 = src;
      uint16_t* dst16 = dest;
  
      do {
-      *dst16++ = *src16;
+      *dst16++ = le16_to_u16(*src16);
        src16 += 2;
      } while(--uCount);
    }
  }
  
-INLINE void scale_512_to_320(uint16_t *dest, const uint16_t *src, bool isRGB24) {
+INLINE void scale_512_to_320(uint16_t *dest, const le16_t *src, bool isRGB24) {
    size_t uCount = 64;
  
    if(isRGB24) {
@@ -122,17 +122,17 @@ INLINE void scale_512_to_320(uint16_t *dest, const uint16_t *src, bool isRGB24)
        src8 += 4;
      } while(--uCount);
    } else {
-    const uint16_t* src16 = src;
+    const le16_t* src16 = src;
      uint16_t* dst16 = dest;
  
      do {
-      *dst16++ = *src16++;
-      *dst16++ = *src16;
+      *dst16++ = le16_to_u16(*src16++);
+      *dst16++ = le16_to_u16(*src16);
        src16 += 2;
-      *dst16++ = *src16++;
-      *dst16++ = *src16;
+      *dst16++ = le16_to_u16(*src16++);
+      *dst16++ = le16_to_u16(*src16);
        src16 += 2;
-      *dst16++ = *src16;
+      *dst16++ = le16_to_u16(*src16);
        src16 += 2;
      } while(--uCount);
    }
@@ -141,7 +141,7 @@ INLINE void scale_512_to_320(uint16_t *dest, const uint16_t *src, bool isRGB24)
  static uint16_t *get_downscale_buffer(int *x, int *y, int *w, int *h, int *vram_h)
  {
    uint16_t *dest = gpu_unai.downscale_vram;
-  const uint16_t *src = gpu_unai.vram;
+  const le16_t *src = gpu_unai.vram;
    bool isRGB24 = (gpu_unai.GPU_GP1 & 0x00200000 ? true : false);
    int stride = 1024, dstride = 1024, lines = *h, orig_w = *w;
  
@@ -184,7 +184,12 @@ static uint16_t *get_downscale_buffer(int *x, int *y, int *w, int *h, int *vram_
      size_t size = isRGB24 ? *w * 3 : *w * 2;
  
      do {
-      memcpy(dest + fb_offset_dest, src + fb_offset_src, size);
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+      for (unsigned int i; i < size; i += 2)
+        dest[fb_offset_dest + i] = le16_to_u16(src[fb_offset_src + i]);
+#else
+      memcpy(dest + fb_offset_dest, (u16 *)src + fb_offset_src, size);
+#endif
        fb_offset_src = (fb_offset_src + stride) & fb_mask;
        fb_offset_dest = (fb_offset_dest + dstride) & fb_mask;
      } while(--lines);
@@ -223,7 +228,7 @@ static void unmap_downscale_buffer(void)
  int renderer_init(void)
  {
    memset((void*)&gpu_unai, 0, sizeof(gpu_unai));
-  gpu_unai.vram = (u16*)gpu.vram;
+  gpu_unai.vram = (le16_t *)gpu.vram;
  
    // Original standalone gpu_unai initialized TextureWindow[]. I added the
    //  same behavior here, since it seems unsafe to leave [2],[3] unset when
@@ -392,11 +397,12 @@ static void gpuGP0Cmd_0xEx(gpu_unai_t &gpu_unai, u32 cmd_word)
  
  extern const unsigned char cmd_lengths[256];
  
-int do_cmd_list(u32 *list, int list_len, int *last_cmd)
+int do_cmd_list(u32 *_list, int list_len, int *last_cmd)
  {
    u32 cmd = 0, len, i;
-  u32 *list_start = list;
-  u32 *list_end = list + list_len;
+  le32_t *list = (le32_t *)_list;
+  le32_t *list_start = list;
+  le32_t *list_end = list + list_len;
  
    //TODO: set ilace_mask when resolution changes instead of every time,
    // eliminate #ifdef below.
@@ -411,7 +417,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
  
    for (; list < list_end; list += 1 + len)
    {
-    cmd = *list >> 24;
+    cmd = le32_to_u32(*list) >> 24;
      len = cmd_lengths[cmd];
      if (list + 1 + len > list_end) {
        cmd = -1;
@@ -447,8 +453,8 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
        case 0x25:
        case 0x26:
        case 0x27: {          // Textured 3-pt poly
-        gpuSetCLUT   (gpu_unai.PacketBuffer.U4[2] >> 16);
-        gpuSetTexture(gpu_unai.PacketBuffer.U4[4] >> 16);
+        gpuSetCLUT   (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+        gpuSetTexture(le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16);
  
          u32 driver_idx =
            (gpu_unai.blit_mask?1024:0) |
@@ -483,8 +489,8 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
        case 0x2D:
        case 0x2E:
        case 0x2F: {          // Textured 4-pt poly
-        gpuSetCLUT   (gpu_unai.PacketBuffer.U4[2] >> 16);
-        gpuSetTexture(gpu_unai.PacketBuffer.U4[4] >> 16);
+        gpuSetCLUT   (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+        gpuSetTexture(le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16);
  
          u32 driver_idx =
            (gpu_unai.blit_mask?1024:0) |
@@ -524,8 +530,8 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
        case 0x35:
        case 0x36:
        case 0x37: {          // Gouraud-shaded, textured 3-pt poly
-        gpuSetCLUT    (gpu_unai.PacketBuffer.U4[2] >> 16);
-        gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16);
+        gpuSetCLUT    (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+        gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16);
          PP driver = gpuPolySpanDrivers[
            (gpu_unai.blit_mask?1024:0) |
            Dithering |
@@ -553,8 +559,8 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
        case 0x3D:
        case 0x3E:
        case 0x3F: {          // Gouraud-shaded, textured 4-pt poly
-        gpuSetCLUT    (gpu_unai.PacketBuffer.U4[2] >> 16);
-        gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16);
+        gpuSetCLUT    (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+        gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16);
          PP driver = gpuPolySpanDrivers[
            (gpu_unai.blit_mask?1024:0) |
            Dithering |
@@ -576,7 +582,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
  
        case 0x48 ... 0x4F: { // Monochrome line strip
          u32 num_vertexes = 1;
-        u32 *list_position = &(list[2]);
+        le32_t *list_position = &list[2];
  
          // Shift index right by one, as untextured prims don't use lighting
          u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1;
@@ -594,7 +600,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
              cmd = -1;
              goto breakloop;
            }
-          if((*list_position & 0xf000f000) == 0x50005000)
+          if((le32_raw(*list_position) & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
              break;
          }
  
@@ -615,7 +621,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
  
        case 0x58 ... 0x5F: { // Gouraud-shaded line strip
          u32 num_vertexes = 1;
-        u32 *list_position = &(list[2]);
+        le32_t *list_position = &list[2];
  
          // Shift index right by one, as untextured prims don't use lighting
          u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1;
@@ -637,7 +643,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
              cmd = -1;
              goto breakloop;
            }
-          if((*list_position & 0xf000f000) == 0x50005000)
+          if((le32_raw(*list_position) & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
              break;
          }
  
@@ -656,7 +662,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
        case 0x65:
        case 0x66:
        case 0x67: {          // Textured rectangle (variable size)
-        gpuSetCLUT    (gpu_unai.PacketBuffer.U4[2] >> 16);
+        gpuSetCLUT    (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
          u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
  
          //senquack - Only color 808080h-878787h allows skipping lighting calculation:
@@ -673,7 +679,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
          //  alone, I don't want to slow rendering down too much. (TODO)
          //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
          // Strip lower 3 bits of each color and determine if lighting should be used:
-        if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080)
+        if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
            driver_idx |= Lighting;
          PS driver = gpuSpriteSpanDrivers[driver_idx];
          gpuDrawS(packet, driver);
@@ -683,7 +689,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
        case 0x69:
        case 0x6A:
        case 0x6B: {          // Monochrome rectangle (1x1 dot)
-        gpu_unai.PacketBuffer.U4[2] = 0x00010001;
+        gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00010001);
          PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
          gpuDrawT(packet, driver);
        } break;
@@ -692,7 +698,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
        case 0x71:
        case 0x72:
        case 0x73: {          // Monochrome rectangle (8x8)
-        gpu_unai.PacketBuffer.U4[2] = 0x00080008;
+        gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00080008);
          PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
          gpuDrawT(packet, driver);
        } break;
@@ -701,14 +707,14 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
        case 0x75:
        case 0x76:
        case 0x77: {          // Textured rectangle (8x8)
-        gpu_unai.PacketBuffer.U4[3] = 0x00080008;
-        gpuSetCLUT    (gpu_unai.PacketBuffer.U4[2] >> 16);
+        gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00080008);
+        gpuSetCLUT    (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
          u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
  
          //senquack - Only color 808080h-878787h allows skipping lighting calculation:
          //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
          // Strip lower 3 bits of each color and determine if lighting should be used:
-        if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080)
+        if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
            driver_idx |= Lighting;
          PS driver = gpuSpriteSpanDrivers[driver_idx];
          gpuDrawS(packet, driver);
@@ -718,7 +724,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
        case 0x79:
        case 0x7A:
        case 0x7B: {          // Monochrome rectangle (16x16)
-        gpu_unai.PacketBuffer.U4[2] = 0x00100010;
+        gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00100010);
          PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
          gpuDrawT(packet, driver);
        } break;
@@ -728,7 +734,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
  #ifdef __arm__
          if ((gpu_unai.GPU_GP1 & 0x180) == 0 && (gpu_unai.Masking | gpu_unai.PixelMSB) == 0)
          {
-          gpuSetCLUT    (gpu_unai.PacketBuffer.U4[2] >> 16);
+          gpuSetCLUT    (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
            gpuDrawS16(packet);
            break;
          }
@@ -736,13 +742,13 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
  #endif
        case 0x7E:
        case 0x7F: {          // Textured rectangle (16x16)
-        gpu_unai.PacketBuffer.U4[3] = 0x00100010;
-        gpuSetCLUT    (gpu_unai.PacketBuffer.U4[2] >> 16);
+        gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00100010);
+        gpuSetCLUT    (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
          u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
          //senquack - Only color 808080h-878787h allows skipping lighting calculation:
          //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
          // Strip lower 3 bits of each color and determine if lighting should be used:
-        if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080)
+        if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
            driver_idx |= Lighting;
          PS driver = gpuSpriteSpanDrivers[driver_idx];
          gpuDrawS(packet, driver);
@@ -771,7 +777,7 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd)
          goto breakloop;
  #endif
        case 0xE1 ... 0xE6: { // Draw settings
-        gpuGP0Cmd_0xEx(gpu_unai, gpu_unai.PacketBuffer.U4[0]);
+        gpuGP0Cmd_0xEx(gpu_unai, le32_to_u32(gpu_unai.PacketBuffer.U4[0]));
        } break;
      }
    }
@@ -784,7 +790,7 @@ breakloop:
    return list - list_start;
  }
  
-void renderer_sync_ecmds(uint32_t *ecmds)
+void renderer_sync_ecmds(u32 *ecmds)
  {
    int dummy;
    do_cmd_list(&ecmds[1], 6, &dummy);
@@ -806,7 +812,7 @@ void renderer_set_interlace(int enable, int is_odd)
  // Handle any gpulib settings applicable to gpu_unai:
  void renderer_set_config(const struct rearmed_cbs *cbs)
  {
-  gpu_unai.vram = (u16*)gpu.vram;
+  gpu_unai.vram = (le16_t *)gpu.vram;
    gpu_unai.config.ilace_force   = cbs->gpu_unai.ilace_force;
    gpu_unai.config.pixel_skip    = cbs->gpu_unai.pixel_skip;
    gpu_unai.config.lighting      = cbs->gpu_unai.lighting;
author	Paul Cercueil <paul@crapouillou.net>
	Wed, 30 Aug 2023 14:36:56 +0000 (16:36 +0200)
committer	Paul Cercueil <paul@crapouillou.net>
	Wed, 30 Aug 2023 16:57:14 +0000 (18:57 +0200)
plugins/gpu_unai/gpu_command.h		patch \| blob \| blame \| history
plugins/gpu_unai/gpu_inner.h		patch \| blob \| blame \| history
plugins/gpu_unai/gpu_inner_quantization.h		patch \| blob \| blame \| history
plugins/gpu_unai/gpu_raster_image.h		patch \| blob \| blame \| history
plugins/gpu_unai/gpu_raster_line.h		patch \| blob \| blame \| history
plugins/gpu_unai/gpu_raster_polygon.h		patch \| blob \| blame \| history
plugins/gpu_unai/gpu_raster_sprite.h		patch \| blob \| blame \| history
plugins/gpu_unai/gpu_unai.h		patch \| blob \| blame \| history
plugins/gpu_unai/gpulib_if.cpp		patch \| blob \| blame \| history