gpu_unai.BLEND_MODE = ((tpage>>5) & 3) << 3;
gpu_unai.TEXT_MODE = (tmode + 1) << 5; // gpu_unai.TEXT_MODE should be values 1..3, so add one
- gpu_unai.TBA = &((u16*)gpu_unai.vram)[FRAME_OFFSET(tx, ty)];
+ gpu_unai.TBA = &gpu_unai.vram[FRAME_OFFSET(tx, ty)];
}
///////////////////////////////////////////////////////////////////////////////
INLINE void gpuSetCLUT(u16 clut)
{
- gpu_unai.CBA = &((u16*)gpu_unai.vram)[(clut & 0x7FFF) << 4];
+ gpu_unai.CBA = &gpu_unai.vram[(clut & 0x7FFF) << 4];
}
#ifdef ENABLE_GPU_NULL_SUPPORT
if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
- gpuSetTexture (gpu_unai.PacketBuffer.U4[4] >> 16);
+ gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+ gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16);
u32 driver_idx =
(gpu_unai.blit_mask?1024:0) |
if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
- gpuSetTexture (gpu_unai.PacketBuffer.U4[4] >> 16);
+ gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+ gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16);
u32 driver_idx =
(gpu_unai.blit_mask?1024:0) |
if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
- gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16);
+ gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+ gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16);
PP driver = gpuPolySpanDrivers[
(gpu_unai.blit_mask?1024:0) |
Dithering |
if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
- gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16);
+ gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+ gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16);
PP driver = gpuPolySpanDrivers[
(gpu_unai.blit_mask?1024:0) |
Dithering |
gpu_unai.fb_dirty = true;
DO_LOG(("gpuDrawLineF(0x%x)\n",PRIM));
}
- if ((gpu_unai.PacketBuffer.U4[3] & 0xF000F000) != 0x50005000)
+ if ((le32_raw(gpu_unai.PacketBuffer.U4[3]) & HTOLE32(0xF000F000)) != HTOLE32(0x50005000))
{
gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[2];
gpu_unai.PacketBuffer.U4[2] = gpu_unai.PacketBuffer.U4[3];
gpu_unai.fb_dirty = true;
DO_LOG(("gpuDrawLineG(0x%x)\n",PRIM));
}
- if ((gpu_unai.PacketBuffer.U4[4] & 0xF000F000) != 0x50005000)
+ if ((le32_raw(gpu_unai.PacketBuffer.U4[4]) & HTOLE32(0xF000F000)) != HTOLE32(0x50005000))
{
gpu_unai.PacketBuffer.U1[3 + (2 * 4)] = gpu_unai.PacketBuffer.U1[3 + (0 * 4)];
gpu_unai.PacketBuffer.U4[0] = gpu_unai.PacketBuffer.U4[2];
if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
// This fixes Silent Hill running animation on loading screens:
// alone, I don't want to slow rendering down too much. (TODO)
//if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
// Strip lower 3 bits of each color and determine if lighting should be used:
- if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080)
+ if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
driver_idx |= Lighting;
PS driver = gpuSpriteSpanDrivers[driver_idx];
gpuDrawS(packet, driver);
if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpu_unai.PacketBuffer.U4[2] = 0x00010001;
+ gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00010001);
PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
gpuDrawT(packet, driver);
gpu_unai.fb_dirty = true;
if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpu_unai.PacketBuffer.U4[2] = 0x00080008;
+ gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00080008);
PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
gpuDrawT(packet, driver);
gpu_unai.fb_dirty = true;
if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpu_unai.PacketBuffer.U4[3] = 0x00080008;
- gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00080008);
+ gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
//senquack - Only color 808080h-878787h allows skipping lighting calculation:
//if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
// Strip lower 3 bits of each color and determine if lighting should be used:
- if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080)
+ if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
driver_idx |= Lighting;
PS driver = gpuSpriteSpanDrivers[driver_idx];
gpuDrawS(packet, driver);
if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpu_unai.PacketBuffer.U4[2] = 0x00100010;
+ gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00100010);
PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
gpuDrawT(packet, driver);
gpu_unai.fb_dirty = true;
/* Notaz 4bit sprites optimization */
if ((!gpu_unai.frameskip.skipGPU) && (!(gpu_unai.GPU_GP1&0x180)) && (!(gpu_unai.Masking|gpu_unai.PixelMSB)))
{
- gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
gpuDrawS16(packet);
gpu_unai.fb_dirty = true;
break;
if (!gpu_unai.frameskip.skipGPU)
{
NULL_GPU();
- gpu_unai.PacketBuffer.U4[3] = 0x00100010;
- gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00100010);
+ gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
//senquack - Only color 808080h-878787h allows skipping lighting calculation:
//if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
// Strip lower 3 bits of each color and determine if lighting should be used:
- if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080)
+ if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
driver_idx |= Lighting;
PS driver = gpuSpriteSpanDrivers[driver_idx];
gpuDrawS(packet, driver);
DO_LOG(("gpuStoreImage(0x%x)\n",PRIM));
break;
case 0xE1 ... 0xE6: { // Draw settings
- gpuGP0Cmd_0xEx(gpu_unai, gpu_unai.PacketBuffer.U4[0]);
+ gpuGP0Cmd_0xEx(gpu_unai, le32_to_u32(gpu_unai.PacketBuffer.U4[0]));
} break;
}
}
// rectangles) to use the same set of functions. Since tiles are always
// monochrome, they simply wouldn't use the extra set of 32 gouraud-shaded
// gpuPixelSpanFn functions (TODO?).
-//
-// NOTE: While the PS1 framebuffer is 16 bit, we use 8-bit pointers here,
-// so that pDst can be incremented directly by 'incr' parameter
-// without having to shift it before use.
template<int CF>
-static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len)
+static le16_t* gpuPixelSpanFn(le16_t* pDst, uintptr_t data, ptrdiff_t incr, size_t len)
{
// Blend func can save an operation if it knows uSrc MSB is
// unset. For untextured prims, this is always true.
u32 r, g, b;
s32 r_incr, g_incr, b_incr;
+ // Caller counts in bytes, we count in pixels
+ incr /= 2;
+
if (CF_GOURAUD) {
gcPtr = (GouraudColor*)data;
r = gcPtr->r; r_incr = gcPtr->r_incr;
if (!CF_GOURAUD)
{ // NO GOURAUD
if (!CF_MASKCHECK && !CF_BLEND) {
- if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; }
- else { *(u16*)pDst = col; }
+ if (CF_MASKSET) { *pDst = u16_to_le16(col | 0x8000); }
+ else { *pDst = u16_to_le16(col); }
} else if (CF_MASKCHECK && !CF_BLEND) {
- if (!(*(u16*)pDst & 0x8000)) {
- if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; }
- else { *(u16*)pDst = col; }
+ if (!(le16_raw(*pDst) & HTOLE16(0x8000))) {
+ if (CF_MASKSET) { *pDst = u16_to_le16(col | 0x8000); }
+ else { *pDst = u16_to_le16(col); }
}
} else {
- uint_fast16_t uDst = *(u16*)pDst;
+ uint_fast16_t uDst = le16_to_u16(*pDst);
if (CF_MASKCHECK) { if (uDst & 0x8000) goto endpixel; }
uint_fast16_t uSrc = col;
if (CF_BLEND)
uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
- if (CF_MASKSET) { *(u16*)pDst = uSrc | 0x8000; }
- else { *(u16*)pDst = uSrc; }
+ if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); }
+ else { *pDst = u16_to_le16(uSrc); }
}
} else
if (!CF_MASKCHECK && !CF_BLEND) {
col = gpuGouraudColor15bpp(r, g, b);
- if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; }
- else { *(u16*)pDst = col; }
+ if (CF_MASKSET) { *pDst = u16_to_le16(col | 0x8000); }
+ else { *pDst = u16_to_le16(col); }
} else if (CF_MASKCHECK && !CF_BLEND) {
col = gpuGouraudColor15bpp(r, g, b);
- if (!(*(u16*)pDst & 0x8000)) {
- if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; }
- else { *(u16*)pDst = col; }
+ if (!(le16_raw(*pDst) & HTOLE16(0x8000))) {
+ if (CF_MASKSET) { *pDst = u16_to_le16(col | 0x8000); }
+ else { *pDst = u16_to_le16(col); }
}
} else {
- uint_fast16_t uDst = *(u16*)pDst;
+ uint_fast16_t uDst = le16_to_u16(*pDst);
if (CF_MASKCHECK) { if (uDst & 0x8000) goto endpixel; }
col = gpuGouraudColor15bpp(r, g, b);
if (CF_BLEND)
uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
- if (CF_MASKSET) { *(u16*)pDst = uSrc | 0x8000; }
- else { *(u16*)pDst = uSrc; }
+ if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); }
+ else { *pDst = u16_to_le16(uSrc); }
}
}
return pDst;
}
-static u8* PixelSpanNULL(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len)
+static le16_t* PixelSpanNULL(le16_t* pDst, uintptr_t data, ptrdiff_t incr, size_t len)
{
#ifdef ENABLE_GPU_LOG_SUPPORT
fprintf(stdout,"PixelSpanNULL()\n");
///////////////////////////////////////////////////////////////////////////////
// PixelSpan (lines) innerloops driver
-typedef u8* (*PSD)(u8* dst, uintptr_t data, ptrdiff_t incr, size_t len);
+typedef le16_t* (*PSD)(le16_t* dst, uintptr_t data, ptrdiff_t incr, size_t len);
const PSD gpuPixelSpanDrivers[64] =
{
// GPU Tiles innerloops generator
template<int CF>
-static void gpuTileSpanFn(u16 *pDst, u32 count, u16 data)
+static void gpuTileSpanFn(le16_t *pDst, u32 count, u16 data)
{
+ le16_t ldata;
+
if (!CF_MASKCHECK && !CF_BLEND) {
- if (CF_MASKSET) { data = data | 0x8000; }
- do { *pDst++ = data; } while (--count);
+ if (CF_MASKSET)
+ ldata = u16_to_le16(data | 0x8000);
+ else
+ ldata = u16_to_le16(data);
+ do { *pDst++ = ldata; } while (--count);
} else if (CF_MASKCHECK && !CF_BLEND) {
- if (CF_MASKSET) { data = data | 0x8000; }
- do { if (!(*pDst&0x8000)) { *pDst = data; } pDst++; } while (--count);
+ if (CF_MASKSET)
+ ldata = u16_to_le16(data | 0x8000);
+ else
+ ldata = u16_to_le16(data);
+ do {
+ if (!(le16_raw(*pDst) & HTOLE16(0x8000)))
+ *pDst = ldata;
+ pDst++;
+ } while (--count);
} else
{
// Blend func can save an operation if it knows uSrc MSB is
uint_fast16_t uSrc, uDst;
do
{
- if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; }
- if (CF_MASKCHECK) { if (uDst&0x8000) goto endtile; }
+ if (CF_MASKCHECK || CF_BLEND) { uDst = le16_to_u16(*pDst); }
+ if (CF_MASKCHECK) if (uDst&0x8000) { goto endtile; }
uSrc = data;
if (CF_BLEND)
uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
- if (CF_MASKSET) { *pDst = uSrc | 0x8000; }
- else { *pDst = uSrc; }
+ if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); }
+ else { *pDst = u16_to_le16(uSrc); }
//senquack - Did not apply "Silent Hill" mask-bit fix to here.
// It is hard to tell from scarce documentation available and
}
}
-static void TileNULL(u16 *pDst, u32 count, u16 data)
+static void TileNULL(le16_t *pDst, u32 count, u16 data)
{
#ifdef ENABLE_GPU_LOG_SUPPORT
fprintf(stdout,"TileNULL()\n");
///////////////////////////////////////////////////////////////////////////////
// Tiles innerloops driver
-typedef void (*PT)(u16 *pDst, u32 count, u16 data);
+typedef void (*PT)(le16_t *pDst, u32 count, u16 data);
// Template instantiation helper macros
#define TI(cf) gpuTileSpanFn<(cf)>
// GPU Sprites innerloops generator
template<int CF>
-static void gpuSpriteSpanFn(u16 *pDst, u32 count, u8* pTxt, u32 u0)
+static void gpuSpriteSpanFn(le16_t *pDst, u32 count, u8* pTxt, u32 u0)
{
// Blend func can save an operation if it knows uSrc MSB is unset.
// Untextured prims can always skip (source color always comes with MSB=0).
u0_mask <<= 1;
}
- const u16 *CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA;
+ const le16_t *CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA;
do
{
- if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; }
+ if (CF_MASKCHECK || CF_BLEND) { uDst = le16_to_u16(*pDst); }
if (CF_MASKCHECK) if (uDst&0x8000) { goto endsprite; }
if (CF_TEXTMODE==1) { // 4bpp (CLUT)
u8 rgb = pTxt[(u0 & u0_mask)>>1];
- uSrc = CBA_[(rgb>>((u0&1)<<2))&0xf];
+ uSrc = le16_to_u16(CBA_[(rgb>>((u0&1)<<2))&0xf]);
}
if (CF_TEXTMODE==2) { // 8bpp (CLUT)
- uSrc = CBA_[pTxt[u0 & u0_mask]];
+ uSrc = le16_to_u16(CBA_[pTxt[u0 & u0_mask]]);
}
if (CF_TEXTMODE==3) { // 16bpp
- uSrc = *(u16*)(&pTxt[u0 & u0_mask]);
+ uSrc = le16_to_u16(*(le16_t*)(&pTxt[u0 & u0_mask]));
}
if (!uSrc) goto endsprite;
if (CF_BLEND && should_blend)
uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
- if (CF_MASKSET) { *pDst = uSrc | 0x8000; }
- else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = uSrc | srcMSB; }
- else { *pDst = uSrc; }
+ if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); }
+ else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = u16_to_le16(uSrc | srcMSB); }
+ else { *pDst = u16_to_le16(uSrc); }
endsprite:
u0 += (CF_TEXTMODE==3) ? 2 : 1;
while (--count);
}
-static void SpriteNULL(u16 *pDst, u32 count, u8* pTxt, u32 u0)
+static void SpriteNULL(le16_t *pDst, u32 count, u8* pTxt, u32 u0)
{
#ifdef ENABLE_GPU_LOG_SUPPORT
fprintf(stdout,"SpriteNULL()\n");
///////////////////////////////////////////////////////////////////////////////
// Sprite innerloops driver
-typedef void (*PS)(u16 *pDst, u32 count, u8* pTxt, u32 u0);
+typedef void (*PS)(le16_t *pDst, u32 count, u8* pTxt, u32 u0);
// Template instantiation helper macros
#define TI(cf) gpuSpriteSpanFn<(cf)>
// relevant blend/light headers.
// (see README_senquack.txt)
template<int CF>
-static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count)
+static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count)
{
// Blend func can save an operation if it knows uSrc MSB is unset.
// Untextured prims can always skip this (src color MSB is always 0).
// gravestone text at end of Medieval intro sequence. -senquack
//if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) { goto endpolynotextnogou; } }
- if (CF_BLEND || CF_MASKCHECK) uDst = *pDst;
+ if (CF_BLEND || CF_MASKCHECK) uDst = le16_to_u16(*pDst);
if (CF_MASKCHECK) { if (uDst&0x8000) { goto endpolynotextnogou; } }
uSrc = pix15;
if (CF_BLEND)
uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
- if (CF_MASKSET) { *pDst = uSrc | 0x8000; }
- else { *pDst = uSrc; }
+ if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); }
+ else { *pDst = u16_to_le16(uSrc); }
endpolynotextnogou:
pDst++;
// See note in above loop regarding CF_BLITMASK
//if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) goto endpolynotextgou; }
- if (CF_BLEND || CF_MASKCHECK) uDst = *pDst;
+ if (CF_BLEND || CF_MASKCHECK) uDst = le16_to_u16(*pDst);
if (CF_MASKCHECK) { if (uDst&0x8000) goto endpolynotextgou; }
if (CF_DITHER) {
uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
}
- if (CF_MASKSET) { *pDst = uSrc | 0x8000; }
- else { *pDst = uSrc; }
+ if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); }
+ else { *pDst = u16_to_le16(uSrc); }
endpolynotextgou:
pDst++;
u32 l_u = gpu_unai.u & l_u_msk; u32 l_v = gpu_unai.v & l_v_msk;
s32 l_u_inc = gpu_unai.u_inc; s32 l_v_inc = gpu_unai.v_inc;
- const u16* TBA_ = gpu_unai.TBA;
- const u16* CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA;
+ const le16_t* TBA_ = gpu_unai.TBA;
+ const le16_t* CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA;
u8 r5, g5, b5;
u8 r8, g8, b8;
do
{
if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) goto endpolytext; }
- if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; }
+ if (CF_MASKCHECK || CF_BLEND) { uDst = le16_to_u16(*pDst); }
if (CF_MASKCHECK) if (uDst&0x8000) { goto endpolytext; }
//senquack - adapted to work with new 22.10 fixed point routines:
u32 tu=(l_u>>10);
u32 tv=(l_v<<1)&(0xff<<11);
u8 rgb=((u8*)TBA_)[tv+(tu>>1)];
- uSrc=CBA_[(rgb>>((tu&1)<<2))&0xf];
+ uSrc=le16_to_u16(CBA_[(rgb>>((tu&1)<<2))&0xf]);
if (!uSrc) goto endpolytext;
}
if (CF_TEXTMODE==2) { // 8bpp (CLUT)
- uSrc = CBA_[(((u8*)TBA_)[(l_u>>10)+((l_v<<1)&(0xff<<11))])];
+ uSrc = le16_to_u16(CBA_[(((u8*)TBA_)[(l_u>>10)+((l_v<<1)&(0xff<<11))])]);
if (!uSrc) goto endpolytext;
}
if (CF_TEXTMODE==3) { // 16bpp
- uSrc = TBA_[(l_u>>10)+((l_v)&(0xff<<10))];
+ uSrc = le16_to_u16(TBA_[(l_u>>10)+((l_v)&(0xff<<10))]);
if (!uSrc) goto endpolytext;
}
uSrc = gpuBlending<CF_BLENDMODE, skip_uSrc_mask>(uSrc, uDst);
}
- if (CF_MASKSET) { *pDst = uSrc | 0x8000; }
- else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = uSrc | srcMSB; }
- else { *pDst = uSrc; }
+ if (CF_MASKSET) { *pDst = u16_to_le16(uSrc | 0x8000); }
+ else if (!MSB_PRESERVED && (CF_BLEND || CF_LIGHT)) { *pDst = u16_to_le16(uSrc | srcMSB); }
+ else { *pDst = u16_to_le16(uSrc); }
endpolytext:
pDst++;
l_u = (l_u + l_u_inc) & l_u_msk;
}
}
-static void PolyNULL(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count)
+static void PolyNULL(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count)
{
#ifdef ENABLE_GPU_LOG_SUPPORT
fprintf(stdout,"PolyNULL()\n");
///////////////////////////////////////////////////////////////////////////////
// Polygon innerloops driver
-typedef void (*PP)(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count);
+typedef void (*PP)(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count);
// Template instantiation helper macros
#define TI(cf) gpuPolySpanFn<(cf)>
// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
////////////////////////////////////////////////////////////////////////////////
template <int DITHER>
-GPU_INLINE u16 gpuColorQuantization24(u32 uSrc24, const u16 *pDst)
+GPU_INLINE u16 gpuColorQuantization24(u32 uSrc24, const le16_t *pDst)
{
if (DITHER)
{
- u16 fbpos = (u32)(pDst - gpu_unai.vram);
+ u16 fbpos = (uintptr_t)pDst - (uintptr_t)gpu_unai.vram;
u16 offset = ((fbpos & (0x7 << 10)) >> 7) | (fbpos & 0x7);
//clean overflow flags and add
void gpuLoadImage(PtrUnion packet)
{
u16 x0, y0, w0, h0;
- x0 = packet.U2[2] & 1023;
- y0 = packet.U2[3] & 511;
- w0 = packet.U2[4];
- h0 = packet.U2[5];
+ x0 = le16_to_u16(packet.U2[2]) & 1023;
+ y0 = le16_to_u16(packet.U2[3]) & 511;
+ w0 = le16_to_u16(packet.U2[4]);
+ h0 = le16_to_u16(packet.U2[5]);
if ((y0 + h0) > FRAME_HEIGHT)
{
gpu_unai.dma.py = 0;
gpu_unai.dma.x_end = w0;
gpu_unai.dma.y_end = h0;
- gpu_unai.dma.pvram = &((u16*)gpu_unai.vram)[x0+(y0*1024)];
+ gpu_unai.dma.pvram = &gpu_unai.vram[x0+(y0*1024)];
gpu_unai.GPU_GP1 |= 0x08000000;
}
void gpuStoreImage(PtrUnion packet)
{
u16 x0, y0, w0, h0;
- x0 = packet.U2[2] & 1023;
- y0 = packet.U2[3] & 511;
- w0 = packet.U2[4];
- h0 = packet.U2[5];
+ x0 = le16_to_u16(packet.U2[2]) & 1023;
+ y0 = le16_to_u16(packet.U2[3]) & 511;
+ w0 = le16_to_u16(packet.U2[4]);
+ h0 = le16_to_u16(packet.U2[5]);
if ((y0 + h0) > FRAME_HEIGHT)
{
gpu_unai.dma.py = 0;
gpu_unai.dma.x_end = w0;
gpu_unai.dma.y_end = h0;
- gpu_unai.dma.pvram = &((u16*)gpu_unai.vram)[x0+(y0*1024)];
+ gpu_unai.dma.pvram = &gpu_unai.vram[x0+(y0*1024)];
gpu_unai.GPU_GP1 |= 0x08000000;
}
{
u32 x0, y0, x1, y1;
s32 w0, h0;
- x0 = packet.U2[2] & 1023;
- y0 = packet.U2[3] & 511;
- x1 = packet.U2[4] & 1023;
- y1 = packet.U2[5] & 511;
- w0 = packet.U2[6];
- h0 = packet.U2[7];
+ x0 = le16_to_u16(packet.U2[2]) & 1023;
+ y0 = le16_to_u16(packet.U2[3]) & 511;
+ x1 = le16_to_u16(packet.U2[4]) & 1023;
+ y1 = le16_to_u16(packet.U2[5]) & 511;
+ w0 = le16_to_u16(packet.U2[6]);
+ h0 = le16_to_u16(packet.U2[7]);
if( (x0==x1) && (y0==y1) ) return;
if ((w0<=0) || (h0<=0)) return;
if (((y0+h0)>512)||((x0+w0)>1024)||((y1+h0)>512)||((x1+w0)>1024))
{
- u16 *psxVuw=gpu_unai.vram;
+ le16_t *psxVuw=gpu_unai.vram;
s32 i,j;
for(j=0;j<h0;j++)
for(i=0;i<w0;i++)
}
else if ((x0&1)||(x1&1))
{
- u16 *lpDst, *lpSrc;
- lpDst = lpSrc = (u16*)gpu_unai.vram;
+ le16_t *lpDst, *lpSrc;
+ lpDst = lpSrc = gpu_unai.vram;
lpSrc += FRAME_OFFSET(x0, y0);
lpDst += FRAME_OFFSET(x1, y1);
x1 = FRAME_WIDTH - w0;
}
else
{
- u32 *lpDst, *lpSrc;
- lpDst = lpSrc = (u32*)(void*)gpu_unai.vram;
+ le32_t *lpDst, *lpSrc;
+ lpDst = lpSrc = (le32_t *)gpu_unai.vram;
lpSrc += ((FRAME_OFFSET(x0, y0))>>1);
lpDst += ((FRAME_OFFSET(x1, y1))>>1);
if (w0&1)
w0>>=1;
if (!w0) {
do {
- *((u16*)lpDst) = *((u16*)lpSrc);
+ *((le16_t*)lpDst) = *((le16_t*)lpSrc);
lpDst += x1;
lpSrc += x1;
} while (--h0);
do {
x0=w0;
do { *lpDst++ = *lpSrc++; } while (--x0);
- *((u16*)lpDst) = *((u16*)lpSrc);
+ *((le16_t*)lpDst) = *((le16_t*)lpSrc);
lpDst += x1;
lpSrc += x1;
} while (--h0);
void gpuClearImage(PtrUnion packet)
{
s32 x0, y0, w0, h0;
- x0 = packet.S2[2];
- y0 = packet.S2[3];
- w0 = packet.S2[4] & 0x3ff;
- h0 = packet.S2[5] & 0x3ff;
-
+ x0 = le16_to_s16(packet.U2[2]);
+ y0 = le16_to_s16(packet.U2[3]);
+ w0 = le16_to_s16(packet.U2[4]) & 0x3ff;
+ h0 = le16_to_s16(packet.U2[5]) & 0x3ff;
+
w0 += x0;
if (x0 < 0) x0 = 0;
if (w0 > FRAME_WIDTH) w0 = FRAME_WIDTH;
#ifdef ENABLE_GPU_LOG_SUPPORT
fprintf(stdout,"gpuClearImage(x0=%d,y0=%d,w0=%d,h0=%d)\n",x0,y0,w0,h0);
#endif
-
+
if (x0&1)
{
- u16* pixel = (u16*)gpu_unai.vram + FRAME_OFFSET(x0, y0);
- u16 rgb = GPU_RGB16(packet.U4[0]);
+ le16_t* pixel = gpu_unai.vram + FRAME_OFFSET(x0, y0);
+ le16_t rgb = u16_to_le16(GPU_RGB16(le32_to_u32(packet.U4[0])));
y0 = FRAME_WIDTH - w0;
do {
x0=w0;
}
else
{
- u32* pixel = (u32*)gpu_unai.vram + ((FRAME_OFFSET(x0, y0))>>1);
- u32 rgb = GPU_RGB16(packet.U4[0]);
- rgb |= (rgb<<16);
+ le32_t* pixel = (le32_t*)gpu_unai.vram + ((FRAME_OFFSET(x0, y0))>>1);
+ u32 _rgb = GPU_RGB16(le32_to_u32(packet.U4[0]));
+ le32_t rgb = u32_to_le32(_rgb | (_rgb << 16));
if (w0&1)
{
y0 = (FRAME_WIDTH - w0 +1)>>1;
do {
x0=w0;
do { *pixel++ = rgb; } while (--x0);
- *((u16*)pixel) = (u16)rgb;
+ *((u16*)pixel) = (u16)le32_raw(rgb);
pixel += y0;
} while (--h0);
}
const int xmax = gpu_unai.DrawingArea[2] - 1;
const int ymax = gpu_unai.DrawingArea[3] - 1;
- x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_unai.DrawingOffset[0];
- y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_unai.DrawingOffset[1];
- x1 = GPU_EXPANDSIGN(packet.S2[4]) + gpu_unai.DrawingOffset[0];
- y1 = GPU_EXPANDSIGN(packet.S2[5]) + gpu_unai.DrawingOffset[1];
+ x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2])) + gpu_unai.DrawingOffset[0];
+ y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3])) + gpu_unai.DrawingOffset[1];
+ x1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[4])) + gpu_unai.DrawingOffset[0];
+ y1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[5])) + gpu_unai.DrawingOffset[1];
// Always draw top to bottom, so ensure y0 <= y1
if (y0 > y1) {
err_adjdown; // Subract this from err_term after drawing longer run
// Color to draw with (16 bits, highest of which is unset mask bit)
- uintptr_t col16 = GPU_RGB16(packet.U4[0]);
+ uintptr_t col16 = GPU_RGB16(le32_to_u32(packet.U4[0]));
- // We use u8 pointers even though PS1 has u16 framebuffer.
- // This allows pixel-drawing functions to increment dst pointer
- // directly by the passed 'incr' value, not having to shift it first.
- u8 *dst = (u8*)gpu_unai.vram + y0 * dst_stride + x0 * dst_depth;
+ le16_t *dst = gpu_unai.vram + (y0 * dst_stride + x0 * dst_depth) / FRAME_BYTES_PER_PIXEL;
// SPECIAL CASE: Vertical line
if (dx == 0) {
// First run of pixels
dst = gpuPixelSpanDriver(dst, col16, incr_major, start_length);
- dst += incr_minor;
+ dst += incr_minor / 2;
// Middle runs of pixels
while (--minor > 0) {
}
dst = gpuPixelSpanDriver(dst, col16, incr_major, run_length);
- dst += incr_minor;
+ dst += incr_minor / 2;
}
// Final run of pixels
const int xmax = gpu_unai.DrawingArea[2] - 1;
const int ymax = gpu_unai.DrawingArea[3] - 1;
- x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_unai.DrawingOffset[0];
- y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_unai.DrawingOffset[1];
- x1 = GPU_EXPANDSIGN(packet.S2[6]) + gpu_unai.DrawingOffset[0];
- y1 = GPU_EXPANDSIGN(packet.S2[7]) + gpu_unai.DrawingOffset[1];
+ x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2])) + gpu_unai.DrawingOffset[0];
+ y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3])) + gpu_unai.DrawingOffset[1];
+ x1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[6])) + gpu_unai.DrawingOffset[0];
+ y1 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[7])) + gpu_unai.DrawingOffset[1];
- u32 col0 = packet.U4[0];
- u32 col1 = packet.U4[2];
+ u32 col0 = le32_to_u32(packet.U4[0]);
+ u32 col1 = le32_to_u32(packet.U4[2]);
// Always draw top to bottom, so ensure y0 <= y1
if (y0 > y1) {
gcol.g = g0 << GPU_GOURAUD_FIXED_BITS;
gcol.b = b0 << GPU_GOURAUD_FIXED_BITS;
- // We use u8 pointers even though PS1 has u16 framebuffer.
- // This allows pixel-drawing functions to increment dst pointer
- // directly by the passed 'incr' value, not having to shift it first.
- u8 *dst = (u8*)gpu_unai.vram + y0 * dst_stride + x0 * dst_depth;
+ le16_t *dst = gpu_unai.vram + (y0 * dst_stride + x0 * dst_depth) / FRAME_BYTES_PER_PIXEL;
// SPECIAL CASE: Vertical line
if (dx == 0) {
if (db) gcol.b_incr /= dy;
}
#endif
-
+
gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride, dy+1);
return;
}
// First run of pixels
dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, start_length);
- dst += incr_minor;
+ dst += incr_minor / 2;
// Middle runs of pixels
while (--minor > 0) {
}
dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, run_length);
- dst += incr_minor;
+ dst += incr_minor / 2;
}
// Final run of pixels
struct PolyVertex {
s32 x, y; // Sign-extended 11-bit X,Y coords
union {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ struct { u8 pad[2], v, u; } tex; // Texture coords (if used)
+#else
struct { u8 u, v, pad[2]; } tex; // Texture coords (if used)
+#endif
u32 tex_word;
};
union {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ struct { u8 pad, b, g, r; } col; // 24-bit RGB color (if used)
+#else
struct { u8 r, g, b, pad; } col; // 24-bit RGB color (if used)
+#endif
u32 col_word;
};
};
vert_stride++;
int num_verts = (is_quad) ? 4 : 3;
- u32 *ptr;
+ le32_t *ptr;
// X,Y coords, adjusted by draw offsets
s32 x_off = gpu_unai.DrawingOffset[0];
s32 y_off = gpu_unai.DrawingOffset[1];
ptr = &packet.U4[1];
for (int i=0; i < num_verts; ++i, ptr += vert_stride) {
- s16* coord_ptr = (s16*)ptr;
- vbuf[i].x = GPU_EXPANDSIGN(coord_ptr[0]) + x_off;
- vbuf[i].y = GPU_EXPANDSIGN(coord_ptr[1]) + y_off;
+ u32 coords = le32_to_u32(*ptr);
+ vbuf[i].x = GPU_EXPANDSIGN((s16)coords) + x_off;
+ vbuf[i].y = GPU_EXPANDSIGN((s16)(coords >> 16)) + y_off;
}
// U,V texture coords (if applicable)
if (texturing) {
ptr = &packet.U4[2];
for (int i=0; i < num_verts; ++i, ptr += vert_stride)
- vbuf[i].tex_word = *ptr;
+ vbuf[i].tex_word = le32_to_u32(*ptr);
}
// Colors (if applicable)
if (gouraud) {
ptr = &packet.U4[0];
for (int i=0; i < num_verts; ++i, ptr += vert_stride)
- vbuf[i].col_word = *ptr;
+ vbuf[i].col_word = le32_to_u32(*ptr);
}
}
void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
{
// Set up bgr555 color to be used across calls in inner driver
- gpu_unai.PixelData = GPU_RGB16(packet.U4[0]);
+ gpu_unai.PixelData = GPU_RGB16(le32_to_u32(packet.U4[0]));
PolyVertex vbuf[4];
polyInitVertexBuffer(vbuf, packet, POLYTYPE_F, is_quad);
if (loop1 <= 0)
continue;
- u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
+ le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
int li=gpu_unai.ilace_mask;
int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
if (loop1 <= 0)
continue;
- u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
+ le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
int li=gpu_unai.ilace_mask;
int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
if (loop1 <= 0)
continue;
- u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
+ le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
int li=gpu_unai.ilace_mask;
int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
if (loop1 <= 0)
continue;
- u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)];
+ le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
int li=gpu_unai.ilace_mask;
int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
//NOTE: Must 11-bit sign-extend the whole sum here, not just packet X/Y,
// or sprites in 1st level of SkullMonkeys disappear when walking right.
// This now matches behavior of Mednafen and PCSX Rearmed's gpu_neon:
- x0 = GPU_EXPANDSIGN(packet.S2[2] + gpu_unai.DrawingOffset[0]);
- y0 = GPU_EXPANDSIGN(packet.S2[3] + gpu_unai.DrawingOffset[1]);
+ x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2]) + gpu_unai.DrawingOffset[0]);
+ y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3]) + gpu_unai.DrawingOffset[1]);
- u32 w = packet.U2[6] & 0x3ff; // Max width is 1023
- u32 h = packet.U2[7] & 0x1ff; // Max height is 511
+ u32 w = le16_to_u16(packet.U2[6]) & 0x3ff; // Max width is 1023
+ u32 h = le16_to_u16(packet.U2[7]) & 0x1ff; // Max height is 511
x1 = x0 + w;
y1 = y0 + h;
gpu_unai.g5 = packet.U1[1] >> 3;
gpu_unai.b5 = packet.U1[2] >> 3;
- u16 *Pixel = &((u16*)gpu_unai.vram)[FRAME_OFFSET(x0, y0)];
+ le16_t *Pixel = &gpu_unai.vram[FRAME_OFFSET(x0, y0)];
const int li=gpu_unai.ilace_mask;
const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
//NOTE: Must 11-bit sign-extend the whole sum here, not just packet X/Y,
// or sprites in 1st level of SkullMonkeys disappear when walking right.
// This now matches behavior of Mednafen and PCSX Rearmed's gpu_neon:
- x0 = GPU_EXPANDSIGN(packet.S2[2] + gpu_unai.DrawingOffset[0]);
- y0 = GPU_EXPANDSIGN(packet.S2[3] + gpu_unai.DrawingOffset[1]);
+ x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2]) + gpu_unai.DrawingOffset[0]);
+ y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3]) + gpu_unai.DrawingOffset[1]);
xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
if (x0 > xmax - 16 || x0 < xmin ||
((u0 | v0) & 15) || !(gpu_unai.TextureWindow[2] & gpu_unai.TextureWindow[3] & 8)) {
// send corner cases to general handler
- packet.U4[3] = 0x00100010;
+ packet.U4[3] = u32_to_le32(0x00100010);
gpuDrawS(packet, gpuSpriteSpanFn<0x20>);
return;
}
s32 x0, x1, y0, y1;
// This now matches behavior of Mednafen and PCSX Rearmed's gpu_neon:
- x0 = GPU_EXPANDSIGN(packet.S2[2] + gpu_unai.DrawingOffset[0]);
- y0 = GPU_EXPANDSIGN(packet.S2[3] + gpu_unai.DrawingOffset[1]);
+ x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2]) + gpu_unai.DrawingOffset[0]);
+ y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3]) + gpu_unai.DrawingOffset[1]);
- u32 w = packet.U2[4] & 0x3ff; // Max width is 1023
- u32 h = packet.U2[5] & 0x1ff; // Max height is 511
+ u32 w = le16_to_u16(packet.U2[4]) & 0x3ff; // Max width is 1023
+ u32 h = le16_to_u16(packet.U2[5]) & 0x1ff; // Max height is 511
x1 = x0 + w;
y1 = y0 + h;
x1 -= x0;
if (x1 <= 0) return;
- const u16 Data = GPU_RGB16(packet.U4[0]);
- u16 *Pixel = &((u16*)gpu_unai.vram)[FRAME_OFFSET(x0, y0)];
+ const u16 Data = GPU_RGB16(le32_to_u32(packet.U4[0]));
+ le16_t *Pixel = &gpu_unai.vram[FRAME_OFFSET(x0, y0)];
const int li=gpu_unai.ilace_mask;
const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
#define s32 int32_t
#define s64 int64_t
+typedef struct {
+ u32 v;
+} le32_t;
+
+typedef struct {
+ u16 v;
+} le16_t;
+
+static inline u32 le32_to_u32(le32_t le)
+{
+ return LE32TOH(le.v);
+}
+
+static inline s32 le32_to_s32(le32_t le)
+{
+ return (int32_t) LE32TOH(le.v);
+}
+
+static inline u32 le32_raw(le32_t le)
+{
+ return le.v;
+}
+
+static inline le32_t u32_to_le32(u32 u)
+{
+ return (le32_t){ .v = HTOLE32(u) };
+}
+
+static inline u16 le16_to_u16(le16_t le)
+{
+ return LE16TOH(le.v);
+}
+
+static inline s16 le16_to_s16(le16_t le)
+{
+ return (int16_t) LE16TOH(le.v);
+}
+
+static inline u16 le16_raw(le16_t le)
+{
+ return le.v;
+}
+
+static inline le16_t u16_to_le16(u16 u)
+{
+ return (le16_t){ .v = HTOLE16(u) };
+}
+
union PtrUnion
{
- u32 *U4;
- s32 *S4;
- u16 *U2;
- s16 *S2;
+ le32_t *U4;
+ le16_t *U2;
u8 *U1;
- s8 *S1;
void *ptr;
};
union GPUPacket
{
- u32 U4[16];
- s32 S4[16];
- u16 U2[32];
- s16 S2[32];
+ le32_t U4[16];
+ le16_t U2[32];
u8 U1[64];
- s8 S1[64];
};
template<class T> static inline void SwapValues(T &x, T &y)
struct gpu_unai_t {
u32 GPU_GP1;
GPUPacket PacketBuffer;
- u16 *vram;
+ le16_t *vram;
#ifdef USE_GPULIB
u16 *downscale_vram;
struct {
s32 px,py;
s32 x_end,y_end;
- u16* pvram;
+ le16_t* pvram;
u32 *last_dma; // Last dma pointer
bool FrameToRead; // Load image in progress
bool FrameToWrite; // Store image in progress
s16 DrawingOffset[2]; // [0] : Drawing offset X (signed)
// [1] : Drawing offset Y (signed)
- u16* TBA; // Ptr to current texture in VRAM
- u16* CBA; // Ptr to current CLUT in VRAM
+ le16_t* TBA; // Ptr to current texture in VRAM
+ le16_t* CBA; // Ptr to current CLUT in VRAM
////////////////////////////////////////////////////////////////////////////
// Inner Loop parameters
#define DOWNSCALE_VRAM_SIZE (1024 * 512 * 2 * 2 + 4096)
-INLINE void scale_640_to_320(uint16_t *dest, const uint16_t *src, bool isRGB24) {
+INLINE void scale_640_to_320(uint16_t *dest, const le16_t *src, bool isRGB24) {
size_t uCount = 320;
if(isRGB24) {
src8 += 4;
} while(--uCount);
} else {
- const uint16_t* src16 = src;
+ const le16_t *src16 = src;
uint16_t* dst16 = dest;
do {
- *dst16++ = *src16;
+ *dst16++ = le16_to_u16(*src16);
src16 += 2;
} while(--uCount);
}
}
-INLINE void scale_512_to_320(uint16_t *dest, const uint16_t *src, bool isRGB24) {
+INLINE void scale_512_to_320(uint16_t *dest, const le16_t *src, bool isRGB24) {
size_t uCount = 64;
if(isRGB24) {
src8 += 4;
} while(--uCount);
} else {
- const uint16_t* src16 = src;
+ const le16_t* src16 = src;
uint16_t* dst16 = dest;
do {
- *dst16++ = *src16++;
- *dst16++ = *src16;
+ *dst16++ = le16_to_u16(*src16++);
+ *dst16++ = le16_to_u16(*src16);
src16 += 2;
- *dst16++ = *src16++;
- *dst16++ = *src16;
+ *dst16++ = le16_to_u16(*src16++);
+ *dst16++ = le16_to_u16(*src16);
src16 += 2;
- *dst16++ = *src16;
+ *dst16++ = le16_to_u16(*src16);
src16 += 2;
} while(--uCount);
}
static uint16_t *get_downscale_buffer(int *x, int *y, int *w, int *h, int *vram_h)
{
uint16_t *dest = gpu_unai.downscale_vram;
- const uint16_t *src = gpu_unai.vram;
+ const le16_t *src = gpu_unai.vram;
bool isRGB24 = (gpu_unai.GPU_GP1 & 0x00200000 ? true : false);
int stride = 1024, dstride = 1024, lines = *h, orig_w = *w;
size_t size = isRGB24 ? *w * 3 : *w * 2;
do {
- memcpy(dest + fb_offset_dest, src + fb_offset_src, size);
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ for (unsigned int i; i < size; i += 2)
+ dest[fb_offset_dest + i] = le16_to_u16(src[fb_offset_src + i]);
+#else
+ memcpy(dest + fb_offset_dest, (u16 *)src + fb_offset_src, size);
+#endif
fb_offset_src = (fb_offset_src + stride) & fb_mask;
fb_offset_dest = (fb_offset_dest + dstride) & fb_mask;
} while(--lines);
int renderer_init(void)
{
memset((void*)&gpu_unai, 0, sizeof(gpu_unai));
- gpu_unai.vram = (u16*)gpu.vram;
+ gpu_unai.vram = (le16_t *)gpu.vram;
// Original standalone gpu_unai initialized TextureWindow[]. I added the
// same behavior here, since it seems unsafe to leave [2],[3] unset when
extern const unsigned char cmd_lengths[256];
-int do_cmd_list(u32 *list, int list_len, int *last_cmd)
+int do_cmd_list(u32 *_list, int list_len, int *last_cmd)
{
u32 cmd = 0, len, i;
- u32 *list_start = list;
- u32 *list_end = list + list_len;
+ le32_t *list = (le32_t *)_list;
+ le32_t *list_start = list;
+ le32_t *list_end = list + list_len;
//TODO: set ilace_mask when resolution changes instead of every time,
// eliminate #ifdef below.
for (; list < list_end; list += 1 + len)
{
- cmd = *list >> 24;
+ cmd = le32_to_u32(*list) >> 24;
len = cmd_lengths[cmd];
if (list + 1 + len > list_end) {
cmd = -1;
case 0x25:
case 0x26:
case 0x27: { // Textured 3-pt poly
- gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
- gpuSetTexture(gpu_unai.PacketBuffer.U4[4] >> 16);
+ gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+ gpuSetTexture(le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16);
u32 driver_idx =
(gpu_unai.blit_mask?1024:0) |
case 0x2D:
case 0x2E:
case 0x2F: { // Textured 4-pt poly
- gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
- gpuSetTexture(gpu_unai.PacketBuffer.U4[4] >> 16);
+ gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+ gpuSetTexture(le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16);
u32 driver_idx =
(gpu_unai.blit_mask?1024:0) |
case 0x35:
case 0x36:
case 0x37: { // Gouraud-shaded, textured 3-pt poly
- gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
- gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16);
+ gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+ gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16);
PP driver = gpuPolySpanDrivers[
(gpu_unai.blit_mask?1024:0) |
Dithering |
case 0x3D:
case 0x3E:
case 0x3F: { // Gouraud-shaded, textured 4-pt poly
- gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
- gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16);
+ gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+ gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16);
PP driver = gpuPolySpanDrivers[
(gpu_unai.blit_mask?1024:0) |
Dithering |
case 0x48 ... 0x4F: { // Monochrome line strip
u32 num_vertexes = 1;
- u32 *list_position = &(list[2]);
+ le32_t *list_position = &list[2];
// Shift index right by one, as untextured prims don't use lighting
u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1;
cmd = -1;
goto breakloop;
}
- if((*list_position & 0xf000f000) == 0x50005000)
+ if((le32_raw(*list_position) & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
break;
}
case 0x58 ... 0x5F: { // Gouraud-shaded line strip
u32 num_vertexes = 1;
- u32 *list_position = &(list[2]);
+ le32_t *list_position = &list[2];
// Shift index right by one, as untextured prims don't use lighting
u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1;
cmd = -1;
goto breakloop;
}
- if((*list_position & 0xf000f000) == 0x50005000)
+ if((le32_raw(*list_position) & HTOLE32(0xf000f000)) == HTOLE32(0x50005000))
break;
}
case 0x65:
case 0x66:
case 0x67: { // Textured rectangle (variable size)
- gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
//senquack - Only color 808080h-878787h allows skipping lighting calculation:
// alone, I don't want to slow rendering down too much. (TODO)
//if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
// Strip lower 3 bits of each color and determine if lighting should be used:
- if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080)
+ if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
driver_idx |= Lighting;
PS driver = gpuSpriteSpanDrivers[driver_idx];
gpuDrawS(packet, driver);
case 0x69:
case 0x6A:
case 0x6B: { // Monochrome rectangle (1x1 dot)
- gpu_unai.PacketBuffer.U4[2] = 0x00010001;
+ gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00010001);
PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
gpuDrawT(packet, driver);
} break;
case 0x71:
case 0x72:
case 0x73: { // Monochrome rectangle (8x8)
- gpu_unai.PacketBuffer.U4[2] = 0x00080008;
+ gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00080008);
PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
gpuDrawT(packet, driver);
} break;
case 0x75:
case 0x76:
case 0x77: { // Textured rectangle (8x8)
- gpu_unai.PacketBuffer.U4[3] = 0x00080008;
- gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00080008);
+ gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
//senquack - Only color 808080h-878787h allows skipping lighting calculation:
//if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
// Strip lower 3 bits of each color and determine if lighting should be used:
- if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080)
+ if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
driver_idx |= Lighting;
PS driver = gpuSpriteSpanDrivers[driver_idx];
gpuDrawS(packet, driver);
case 0x79:
case 0x7A:
case 0x7B: { // Monochrome rectangle (16x16)
- gpu_unai.PacketBuffer.U4[2] = 0x00100010;
+ gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00100010);
PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
gpuDrawT(packet, driver);
} break;
#ifdef __arm__
if ((gpu_unai.GPU_GP1 & 0x180) == 0 && (gpu_unai.Masking | gpu_unai.PixelMSB) == 0)
{
- gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
gpuDrawS16(packet);
break;
}
#endif
case 0x7E:
case 0x7F: { // Textured rectangle (16x16)
- gpu_unai.PacketBuffer.U4[3] = 0x00100010;
- gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16);
+ gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00100010);
+ gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
//senquack - Only color 808080h-878787h allows skipping lighting calculation:
//if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
// Strip lower 3 bits of each color and determine if lighting should be used:
- if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080)
+ if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
driver_idx |= Lighting;
PS driver = gpuSpriteSpanDrivers[driver_idx];
gpuDrawS(packet, driver);
goto breakloop;
#endif
case 0xE1 ... 0xE6: { // Draw settings
- gpuGP0Cmd_0xEx(gpu_unai, gpu_unai.PacketBuffer.U4[0]);
+ gpuGP0Cmd_0xEx(gpu_unai, le32_to_u32(gpu_unai.PacketBuffer.U4[0]));
} break;
}
}
return list - list_start;
}
-void renderer_sync_ecmds(uint32_t *ecmds)
+void renderer_sync_ecmds(u32 *ecmds)
{
int dummy;
do_cmd_list(&ecmds[1], 6, &dummy);
// Handle any gpulib settings applicable to gpu_unai:
void renderer_set_config(const struct rearmed_cbs *cbs)
{
- gpu_unai.vram = (u16*)gpu.vram;
+ gpu_unai.vram = (le16_t *)gpu.vram;
gpu_unai.config.ilace_force = cbs->gpu_unai.ilace_force;
gpu_unai.config.pixel_skip = cbs->gpu_unai.pixel_skip;
gpu_unai.config.lighting = cbs->gpu_unai.lighting;