gpus: return DMA word count to avoid 1 list walk
[pcsx_rearmed.git] / plugins / gpu_unai / gpu.cpp
index ba32d18..9b0a0dc 100644 (file)
@@ -18,8 +18,8 @@
 *   51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA.           *
 ***************************************************************************/
 
-#include "gpu.h"
 #include "port.h"
+#include "gpu.h"
 #include "profiler.h"
 #include "debug.h"
 
@@ -40,6 +40,8 @@ bool frameLimit = false; /* frames to wait */
 bool light = true; /* lighting */
 bool blend = true; /* blending */
 
+bool fb_dirty = false;
+
 bool enableAbbeyHack = false; /* Abe's Odyssey hack */
 u8 BLEND_MODE;
 u8 TEXT_MODE;
@@ -92,7 +94,8 @@ u32   lInc;
 u32   tInc, tMsk;
 
 GPUPacket PacketBuffer;
-u16   GPU_FrameBuffer[FRAME_BUFFER_SIZE/2];    // FRAME_BUFFER_SIZE is defined in bytes
+// FRAME_BUFFER_SIZE is defined in bytes; 512K is guard memory for out of range reads
+u16   GPU_FrameBuffer[(FRAME_BUFFER_SIZE+512*1024)/2] __attribute__((aligned(16)));
 u32   GPU_GP1;
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -180,6 +183,14 @@ long  GPU_freeze(unsigned int bWrite, GPUFreeze_t* p2)
        if (bWrite)
        {
                p2->GPU_gp1 = GPU_GP1;
+               memset(p2->Control, 0, sizeof(p2->Control));
+               // save resolution and registers for P.E.Op.S. compatibility
+               p2->Control[3] = (3 << 24) | ((GPU_GP1 >> 23) & 1);
+               p2->Control[4] = (4 << 24) | ((GPU_GP1 >> 29) & 3);
+               p2->Control[5] = (5 << 24) | (DisplayArea[0] | (DisplayArea[1] << 10));
+               p2->Control[6] = (6 << 24) | (2560 << 12);
+               p2->Control[7] = (7 << 24) | (DisplayArea[4] | (DisplayArea[5] << 10));
+               p2->Control[8] = (8 << 24) | ((GPU_GP1 >> 17) & 0x3f) | ((GPU_GP1 >> 10) & 0x40);
                memcpy(p2->FrameBuffer, (u16*)GPU_FrameBuffer, FRAME_BUFFER_SIZE);
                return (1);
        }
@@ -187,6 +198,10 @@ long  GPU_freeze(unsigned int bWrite, GPUFreeze_t* p2)
        {
                GPU_GP1 = p2->GPU_gp1;
                memcpy((u16*)GPU_FrameBuffer, p2->FrameBuffer, FRAME_BUFFER_SIZE);
+               GPU_writeStatus((5 << 24) | p2->Control[5]);
+               GPU_writeStatus((7 << 24) | p2->Control[7]);
+               GPU_writeStatus((8 << 24) | p2->Control[8]);
+               gpuSetTexture(GPU_GP1);
                return (1);
        }
        return (0);
@@ -298,6 +313,7 @@ void  GPU_writeDataMem(u32* dmaAddress, s32 dmaCount)
        }
 
        GPU_GP1 = (GPU_GP1 | 0x14000000) & ~0x60000000;
+       fb_dirty = true;
        pcsx4all_prof_end_with_resume(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE);
        pcsx4all_prof_resume(PCSX4ALL_PROF_CPU);
 }
@@ -315,7 +331,7 @@ INLINE int CheckForEndlessLoop(u32 *laddr)
 }
 
 ///////////////////////////////////////////////////////////////////////////////
-void  GPU_dmaChain(u32* baseAddr, u32 dmaVAddr)
+long GPU_dmaChain(u32* baseAddr, u32 dmaVAddr)
 {
 #ifdef DEBUG_ANALYSIS
        dbg_anacnt_GPU_dmaChain++;
@@ -323,6 +339,7 @@ void  GPU_dmaChain(u32* baseAddr, u32 dmaVAddr)
        pcsx4all_prof_start_with_pause(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE);
        u32 data, *address, count, offset;
        unsigned int DMACommandCounter = 0;
+       long dma_words = 0;
 
        GPU_GP1 &= ~0x14000000;
        lUsedAddr[0]=lUsedAddr[1]=lUsedAddr[2]=(u32*)0x1fffff;
@@ -339,9 +356,12 @@ void  GPU_dmaChain(u32* baseAddr, u32 dmaVAddr)
                else dmaVAddr = 0x1FFFFF;
 
                if(count>0) GPU_writeDataMem(address,count);
+               dma_words += 1 + count;
        }
        GPU_GP1 = (GPU_GP1 | 0x14000000) & ~0x60000000;
        pcsx4all_prof_end_with_resume(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE);
+
+       return dma_words;
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -390,6 +410,7 @@ void  GPU_writeData(u32 data)
                gpuCheckPacket(data);
        }
        GPU_GP1 |= 0x14000000;
+       fb_dirty = true;
        pcsx4all_prof_end_with_resume(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE);
        pcsx4all_prof_resume(PCSX4ALL_PROF_CPU);
 
@@ -529,10 +550,12 @@ void  GPU_writeStatus(u32 data)
        case 0x05:
                DisplayArea[0] = (data & 0x000003FF); //(short)(data & 0x3ff);
                DisplayArea[1] = ((data & 0x0007FC00)>>10); //(data & 0x000FFC00) >> 10; //(short)((data>>10)&0x1ff);
+               fb_dirty = true;
                break;
        case 0x07:
                DisplayArea[4] = data & 0x000003FF; //(short)(data & 0x3ff);
                DisplayArea[5] = (data & 0x000FFC00) >> 10; //(short)((data>>10) & 0x3ff);
+               fb_dirty = true;
                break;
        case 0x08:
                {
@@ -543,6 +566,7 @@ void  GPU_writeStatus(u32 data)
                        DisplayArea[3] = VerticalResolution[(GPU_GP1 >> 19) & 3];
                        isPAL = (data & 0x08) ? true : false; // if 1 - PAL mode, else NTSC
                }
+               fb_dirty = true;
                break;
        case 0x10:
                switch (data & 0xffff) {
@@ -570,6 +594,8 @@ void  GPU_writeStatus(u32 data)
        pcsx4all_prof_resume(PCSX4ALL_PROF_CPU);
 }
 
+#ifndef REARMED
+
 // Blitting functions
 #include "gpu_blit.h"
 
@@ -838,3 +864,117 @@ void  GPU_updateLace(void)
 
        pcsx4all_prof_end_with_resume(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_COUNTERS);
 }
+
+#else
+
+#include "../../frontend/plugin_lib.h"
+#include "../../frontend/arm_utils.h"
+
+extern "C" {
+
+static const struct rearmed_cbs *cbs;
+static void *screen_buf;
+
+static void blit(void)
+{
+       static s16 old_res_horz, old_res_vert, old_rgb24;
+       s16 isRGB24 = (GPU_GP1 & 0x00200000) ? 1 : 0;
+       s16 h0, x0, y0, w0, h1;
+       u16 *srcs;
+       u8  *dest;
+
+       x0 = DisplayArea[0] & ~1; // alignment needed by blitter
+       y0 = DisplayArea[1];
+       srcs = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(x0,y0)];
+
+       w0 = DisplayArea[2];
+       h0 = DisplayArea[3];  // video mode
+
+       h1 = DisplayArea[5] - DisplayArea[4]; // display needed
+       if (h0 == 480) h1 = Min2(h1*2,480);
+
+       if (h1 <= 0)
+               return;
+
+       if (w0 != old_res_horz || h1 != old_res_vert || isRGB24 != old_rgb24)
+       {
+               old_res_horz = w0;
+               old_res_vert = h1;
+               old_rgb24 = (s16)isRGB24;
+               screen_buf = cbs->pl_fbdev_set_mode(w0, h1, isRGB24 ? 24 : 16);
+       }
+       dest = (u8 *)screen_buf;
+
+       if (isRGB24)
+       {
+#ifndef MAEMO
+               for (; h1-- > 0; dest += w0 * 3, srcs += 1024)
+               {
+                       bgr888_to_rgb888(dest, srcs, w0 * 3);
+               }
+#else
+               for (; h1-- > 0; dest += w0 * 2, srcs += 1024)
+               {
+                       bgr888_to_rgb565(dest, srcs, w0 * 3);
+               }
+#endif
+       }
+       else
+       {
+               for (; h1-- > 0; dest += w0 * 2, srcs += 1024)
+               {
+                       bgr555_to_rgb565(dest, srcs, w0 * 2);
+               }
+       }
+
+       screen_buf = cbs->pl_fbdev_flip();
+}
+
+void GPU_updateLace(void)
+{
+       // Interlace bit toggle
+       GPU_GP1 ^= 0x80000000;
+
+       if (!fb_dirty || (GPU_GP1&0x08800000))
+               return;
+
+       if (!isSkip) {
+               blit();
+
+               fb_dirty = false;
+               if (cbs->fskip_advice)
+                       isSkip = true;
+       }
+       else
+               isSkip = false;
+}
+
+long GPUopen(unsigned long *, char *, char *)
+{
+       cbs->pl_fbdev_open();
+       screen_buf = cbs->pl_fbdev_flip();
+       return 0;
+}
+
+long GPUclose(void)
+{
+       cbs->pl_fbdev_close();
+       return 0;
+}
+
+long GPUfreeze(unsigned int ulGetFreezeData, GPUFreeze_t* p2)
+{
+       if (ulGetFreezeData > 1)
+               return 0;
+
+       return GPU_freeze(ulGetFreezeData, p2);
+}
+
+void GPUrearmedCallbacks(const struct rearmed_cbs *cbs_)
+{
+       cbs = cbs_;
+}
+
+} /* extern "C" */
+
+#endif