psp memhandlers, vsync, stuff
[picodrive.git] / platform / psp / emu.c
index 338ffa4..3f645ed 100644 (file)
@@ -3,6 +3,10 @@
 #include <sys/syslimits.h> // PATH_MAX
 
 #include <pspthreadman.h>
+#include <pspdisplay.h>
+#include <psputils.h>
+#include <pspgu.h>
+#include <pspaudio.h>
 
 #include "psp.h"
 #include "menu.h"
 #include "../common/lprintf.h"
 #include "../../Pico/PicoInt.h"
 
-#ifdef BENCHMARK
-#define OSD_FPS_X 220
-#else
-#define OSD_FPS_X 260
-#endif
+#define OSD_FPS_X 424
+
+// additional pspaudio imports, credits to crazyc
+int sceAudio_38553111(unsigned short samples, unsigned short freq, char unknown);  // play with conversion?
+int sceAudio_5C37C0AE(void);                           // end play?
+int sceAudio_E0727056(int volume, void *buffer);       // blocking output
+int sceAudioOutput2GetRestSample();
 
-// vram usage map:
-// 000000-044000 fb0
-// 044000-088000 fb1
-// 088000-0ae000 texture0
-// 0ae000-0d4000 texture1
 
 char romFileName[PATH_MAX];
-static unsigned char picoD2FB[(8+320)*(8+240+8)];
-unsigned char *PicoDraw2FB = picoD2FB;  // temporary buffer for alt renderer ( (8+320)*(8+240+8) )
+unsigned char *PicoDraw2FB = (unsigned char *)VRAM_CACHED_STUFF + 8; // +8 to be able to skip border with 1 quadword..
 int engineState;
 
 static int combo_keys = 0, combo_acts = 0; // keys and actions which need button combos
@@ -33,7 +33,9 @@ static unsigned int noticeMsgTime = 0;
 int reset_timing = 0; // do we need this?
 
 
-static void blit(const char *fps, const char *notice);
+static void sound_init(void);
+static void sound_deinit(void);
+static void blit2(const char *fps, const char *notice, int lagging_behind);
 static void clearArea(int full);
 
 void emu_noticeMsgUpdated(void)
@@ -46,24 +48,31 @@ void emu_getMainDir(char *dst, int len)
        if (len > 0) *dst = 0;
 }
 
-static void emu_msg_cb(const char *msg)
+static void osd_text(int x, const char *text, int is_active)
 {
-       void *fb = psp_video_get_active_fb();
+       unsigned short *screen = is_active ? psp_video_get_active_fb() : psp_screen;
+       int len = strlen(text) * 8 / 2;
+       int *p, h;
+       void *tmp;
+       for (h = 0; h < 8; h++) {
+               p = (int *) (screen+x+512*(264+h));
+               p = (int *) ((int)p & ~3); // align
+               memset32(p, 0, len);
+       }
+       if (is_active) { tmp = psp_screen; psp_screen = screen; } // nasty pointer tricks
+       emu_textOut16(x, 264, text);
+       if (is_active) psp_screen = tmp;
+}
 
-       memset32((int *)((char *)fb + 512*264*2), 0, 512*8*2/4);
-       emu_textOut16(4, 264, msg);
+void emu_msg_cb(const char *msg)
+{
+       osd_text(4, msg, 1);
        noticeMsgTime = sceKernelGetSystemTimeLow() - 2000000;
 
        /* assumption: emu_msg_cb gets called only when something slow is about to happen */
        reset_timing = 1;
 }
 
-void emu_stateCb(const char *str)
-{
-       clearArea(0);
-       blit("", str);
-}
-
 static void emu_msg_tray_open(void)
 {
        strcpy(noticeMsg, "CD tray opened");
@@ -79,6 +88,8 @@ void emu_Init(void)
        mkdir("brm", 0777);
        mkdir("cfg", 0777);
 
+       sound_init();
+
        PicoInit();
        PicoMessage = emu_msg_cb;
        PicoMCDopenTray = emu_msg_tray_open;
@@ -108,6 +119,7 @@ void emu_Deinit(void)
        }
 
        PicoExit();
+       sound_deinit();
 }
 
 void emu_setDefaultConfig(void)
@@ -115,12 +127,13 @@ void emu_setDefaultConfig(void)
        memset(&currentConfig, 0, sizeof(currentConfig));
        currentConfig.lastRomFile[0] = 0;
        currentConfig.EmuOpt  = 0x1f | 0x680; // | confirm_save, cd_leds, 16bit rend
-       currentConfig.PicoOpt = 0x07 | 0xc00; // | cd_pcm, cd_cdda
+       currentConfig.PicoOpt = 0x0f | 0xc00; // | cd_pcm, cd_cdda
        currentConfig.PsndRate = 22050;
        currentConfig.PicoRegion = 0; // auto
        currentConfig.PicoAutoRgnOrder = 0x184; // US, EU, JP
        currentConfig.Frameskip = -1; // auto
        currentConfig.volume = 50;
+       currentConfig.CPUclock = 333;
        currentConfig.KeyBinds[ 4] = 1<<0; // SACB RLDU
        currentConfig.KeyBinds[ 6] = 1<<1;
        currentConfig.KeyBinds[ 7] = 1<<2;
@@ -129,42 +142,212 @@ void emu_setDefaultConfig(void)
        currentConfig.KeyBinds[13] = 1<<5;
        currentConfig.KeyBinds[15] = 1<<6;
        currentConfig.KeyBinds[ 3] = 1<<7;
-       currentConfig.KeyBinds[23] = 1<<26; // switch rend
+       currentConfig.KeyBinds[12] = 1<<26; // switch rnd
        currentConfig.KeyBinds[ 8] = 1<<27; // save state
        currentConfig.KeyBinds[ 9] = 1<<28; // load state
        currentConfig.PicoCDBuffers = 0;
-       currentConfig.scaling = 0;
+       currentConfig.scaling = 1; // bilinear filtering for psp
+       currentConfig.scale = currentConfig.hscale32 = currentConfig.hscale40 = 1.0;
 }
 
 
-static int EmuScan16(unsigned int num, void *sdata)
+extern void amips_clut(unsigned short *dst, unsigned char *src, unsigned short *pal, int count);
+
+struct Vertex
 {
-       if (!(Pico.video.reg[1]&8)) num += 8;
-       DrawLineDest = (unsigned short *) psp_screen + 512*(num+1);
+       short u,v;
+       short x,y,z;
+};
 
-       return 0;
+static struct Vertex __attribute__((aligned(4))) g_vertices[2];
+static unsigned short __attribute__((aligned(16))) localPal[0x100];
+static int dynamic_palette = 0, need_pal_upload = 0, blit_16bit_mode = 0;
+static int fbimg_offs = 0;
+
+static void set_scaling_params(void)
+{
+       int src_width, fbimg_width, fbimg_height, fbimg_xoffs, fbimg_yoffs;
+       g_vertices[0].x = g_vertices[0].y =
+       g_vertices[0].z = g_vertices[1].z = 0;
+
+       fbimg_height = (int)(240.0 * currentConfig.scale + 0.5);
+       if (Pico.video.reg[12] & 1) {
+               fbimg_width = (int)(320.0 * currentConfig.scale * currentConfig.hscale40 + 0.5);
+               src_width = 320;
+       } else {
+               fbimg_width = (int)(256.0 * currentConfig.scale * currentConfig.hscale32 + 0.5);
+               src_width = 256;
+       }
+
+       if (fbimg_width >= 480) {
+               g_vertices[0].u = (fbimg_width-480)/2;
+               g_vertices[1].u = src_width - (fbimg_width-480)/2;
+               fbimg_width = 480;
+               fbimg_xoffs = 0;
+       } else {
+               g_vertices[0].u = 0;
+               g_vertices[1].u = src_width;
+               fbimg_xoffs = 240 - fbimg_width/2;
+       }
+
+       if (fbimg_height >= 272) {
+               g_vertices[0].v = (fbimg_height-272)/2;
+               g_vertices[1].v = 240 - (fbimg_height-272)/2;
+               fbimg_height = 272;
+               fbimg_yoffs = 0;
+       } else {
+               g_vertices[0].v = 0;
+               g_vertices[1].v = 240;
+               fbimg_yoffs = 136 - fbimg_height/2;
+       }
+
+       g_vertices[1].x = fbimg_width;
+       g_vertices[1].y = fbimg_height;
+       if (fbimg_xoffs < 0) fbimg_xoffs = 0;
+       if (fbimg_yoffs < 0) fbimg_yoffs = 0;
+       fbimg_offs = (fbimg_yoffs*512 + fbimg_xoffs) * 2; // dst is always 16bit
+
+       /*
+       lprintf("set_scaling_params:\n");
+       lprintf("offs: %i, %i\n", fbimg_xoffs, fbimg_yoffs);
+       lprintf("xy0, xy1: %i, %i; %i, %i\n", g_vertices[0].x, g_vertices[0].y, g_vertices[1].x, g_vertices[1].y);
+       lprintf("uv0, uv1: %i, %i; %i, %i\n", g_vertices[0].u, g_vertices[0].v, g_vertices[1].u, g_vertices[1].v);
+       */
 }
 
-static int EmuScan8(unsigned int num, void *sdata)
+static void do_pal_update(int allow_sh)
 {
-       // draw like the fast renderer
-       // TODO?
-       //if (!(Pico.video.reg[1]&8)) num += 8;
-       //HighCol = gfx_buffer + 328*(num+1);
+       unsigned int *spal=(void *)Pico.cram;
+       unsigned int *dpal=(void *)localPal;
+       int i;
+
+       for (i = 0x3f/2; i >= 0; i--)
+               dpal[i] = ((spal[i]&0x000f000f)<< 1)|((spal[i]&0x00f000f0)<<3)|((spal[i]&0x0f000f00)<<4);
+
+       if (allow_sh && (Pico.video.reg[0xC]&8)) // shadow/hilight?
+       {
+               // shadowed pixels
+               for (i = 0x3f/2; i >= 0; i--)
+                       dpal[0x20|i] = dpal[0x60|i] = (dpal[i]>>1)&0x738e738e;
+               // hilighted pixels
+               for (i = 0x3f; i >= 0; i--) {
+                       int t=localPal[i]&0xe71c;t+=0x4208;
+                       if (t&0x20) t|=0x1c;
+                       if (t&0x800) t|=0x700;
+                       if (t&0x10000) t|=0xe000;
+                       t&=0xe71c;
+                       localPal[0x80|i]=(unsigned short)t;
+               }
+               localPal[0xe0] = 0;
+       }
+       Pico.m.dirtyPal = 0;
+       need_pal_upload = 1;
+}
+
+static void do_slowmode_lines(int line_to)
+{
+       int line = 0, line_len = (Pico.video.reg[12]&1) ? 320 : 256;
+       unsigned short *dst = (unsigned short *)VRAM_STUFF + 512*240/2;
+       unsigned char  *src = (unsigned char  *)VRAM_CACHED_STUFF + 16;
+       if (!(Pico.video.reg[1]&8)) { line = 8; dst += 512*8; src += 512*8; }
+
+       for (; line < line_to; line++, dst+=512, src+=512)
+               amips_clut(dst, src, localPal, line_len);
+}
+
+static void EmuScanPrepare(void)
+{
+       HighCol = (unsigned char *)VRAM_CACHED_STUFF + 8;
+       if (!(Pico.video.reg[1]&8)) HighCol += 8*512;
+
+       dynamic_palette = 0;
+       if (Pico.m.dirtyPal)
+               do_pal_update(1);
+}
+
+static int EmuScanSlow(unsigned int num, void *sdata)
+{
+       if (!(Pico.video.reg[1]&8)) num += 8;
+
+       if (Pico.m.dirtyPal) {
+               if (!dynamic_palette) {
+                       do_slowmode_lines(num);
+                       dynamic_palette = 1;
+               }
+               do_pal_update(1);
+       }
+
+       if (dynamic_palette) {
+               int line_len = (Pico.video.reg[12]&1) ? 320 : 256;
+               void *dst = (char *)VRAM_STUFF + 512*240 + 512*2*num;
+               amips_clut(dst, HighCol + 8, localPal, line_len);
+       } else
+               HighCol = (unsigned char *)VRAM_CACHED_STUFF + (num+1)*512 + 8;
 
        return 0;
 }
 
-static void osd_text(int x, const char *text)
+static void blitscreen_clut(void)
 {
-       int len = strlen(text) * 8 / 2;
-       int *p, h;
-       for (h = 0; h < 8; h++) {
-               p = (int *) ((unsigned short *) psp_screen+x+512*(264+h));
-               p = (int *) ((int)p & ~3); // align
-               memset32(p, 0, len);
+       int offs = fbimg_offs;
+       offs += (psp_screen == VRAM_FB0) ? VRAMOFFS_FB0 : VRAMOFFS_FB1;
+
+       sceGuSync(0,0); // sync with prev
+       sceGuStart(GU_DIRECT, guCmdList);
+       sceGuDrawBuffer(GU_PSM_5650, (void *)offs, 512); // point to back buffer
+
+       if (dynamic_palette)
+       {
+               if (!blit_16bit_mode) {
+                       sceGuTexMode(GU_PSM_5650, 0, 0, 0);
+                       sceGuTexImage(0,512,512,512,(char *)VRAM_STUFF + 512*240);
+
+                       blit_16bit_mode = 1;
+               }
        }
-       emu_textOut16(x, 264, text);
+       else
+       {
+               if (blit_16bit_mode) {
+                       sceGuClutMode(GU_PSM_5650,0,0xff,0);
+                       sceGuTexMode(GU_PSM_T8,0,0,0); // 8-bit image
+                       sceGuTexImage(0,512,512,512,(char *)VRAM_STUFF + 16);
+                       blit_16bit_mode = 0;
+               }
+
+               if ((PicoOpt&0x10) && Pico.m.dirtyPal)
+                       do_pal_update(0);
+
+               sceKernelDcacheWritebackAll();
+
+               if (need_pal_upload) {
+                       need_pal_upload = 0;
+                       sceGuClutLoad((256/8), localPal); // upload 32*8 entries (256)
+               }
+       }
+
+#if 1
+       if (g_vertices[0].u == 0 && g_vertices[1].u == g_vertices[1].x)
+       {
+               struct Vertex* vertices;
+               int x;
+
+               #define SLICE_WIDTH 32
+               for (x = 0; x < g_vertices[1].x; x += SLICE_WIDTH)
+               {
+                       // render sprite
+                       vertices = (struct Vertex*)sceGuGetMemory(2 * sizeof(struct Vertex));
+                       memcpy(vertices, g_vertices, 2 * sizeof(struct Vertex));
+                       vertices[0].u = vertices[0].x = x;
+                       vertices[1].u = vertices[1].x = x + SLICE_WIDTH;
+                       sceGuDrawArray(GU_SPRITES,GU_TEXTURE_16BIT|GU_VERTEX_16BIT|GU_TRANSFORM_2D,2,0,vertices);
+               }
+               // lprintf("listlen: %iB\n", sceGuCheckList()); // ~480 only
+       }
+       else
+#endif
+               sceGuDrawArray(GU_SPRITES,GU_TEXTURE_16BIT|GU_VERTEX_16BIT|GU_TRANSFORM_2D,2,0,g_vertices);
+
+       sceGuFinish();
 }
 
 
@@ -178,72 +361,66 @@ static void cd_leds(void)
 
        p = (unsigned int *)((short *)psp_screen + 512*2+4+2);
        col_g = (old_reg & 2) ? 0x06000600 : 0;
-       col_r = (old_reg & 1) ? 0xc000c000 : 0;
+       col_r = (old_reg & 1) ? 0x00180018 : 0;
        *p++ = col_g; *p++ = col_g; p+=2; *p++ = col_r; *p++ = col_r; p += 512/2 - 12/2;
        *p++ = col_g; *p++ = col_g; p+=2; *p++ = col_r; *p++ = col_r; p += 512/2 - 12/2;
        *p++ = col_g; *p++ = col_g; p+=2; *p++ = col_r; *p++ = col_r;
 }
 
 
-static short localPal[0x100];
-
-static void blit(const char *fps, const char *notice)
+static void dbg_text(void)
 {
-       int emu_opt = currentConfig.EmuOpt;
+       int *p, h, len;
+       char text[128];
 
-#if 0
-       if (PicoOpt&0x10)
-       {
-               int lines_flags = 224;
-               // 8bit fast renderer
-               if (Pico.m.dirtyPal) {
-                       Pico.m.dirtyPal = 0;
-                       vidConvCpyRGB565(localPal, Pico.cram, 0x40);
-               }
-               if (!(Pico.video.reg[12]&1)) lines_flags|=0x10000;
-               if (currentConfig.EmuOpt&0x4000)
-                       lines_flags|=0x40000; // (Pico.m.frame_count&1)?0x20000:0x40000;
-               vidCpy8to16((unsigned short *)giz_screen+321*8, PicoDraw2FB+328*8, localPal, lines_flags);
+       sprintf(text, "sl: %i, 16b: %i", g_vertices[0].u == 0 && g_vertices[1].u == g_vertices[1].x, blit_16bit_mode);
+       len = strlen(text) * 8 / 2;
+       for (h = 0; h < 8; h++) {
+               p = (int *) ((unsigned short *) psp_screen+2+512*(256+h));
+               p = (int *) ((int)p & ~3); // align
+               memset32(p, 0, len);
        }
-       else if (!(emu_opt&0x80))
+       emu_textOut16(2, 256, text);
+}
+
+
+/* called after rendering is done, but frame emulation is not finished */
+void blit1(void)
+{
+       if (PicoOpt&0x10)
        {
-               int lines_flags;
-               // 8bit accurate renderer
-               if (Pico.m.dirtyPal) {
-                       Pico.m.dirtyPal = 0;
-                       vidConvCpyRGB565(localPal, Pico.cram, 0x40);
-                       if (Pico.video.reg[0xC]&8) { // shadow/hilight mode
-                               //vidConvCpyRGB32sh(localPal+0x40, Pico.cram, 0x40);
-                               //vidConvCpyRGB32hi(localPal+0x80, Pico.cram, 0x40); // TODO?
-                               blockcpy(localPal+0xc0, localPal+0x40, 0x40*2);
-                               localPal[0xc0] = 0x0600;
-                               localPal[0xd0] = 0xc000;
-                               localPal[0xe0] = 0x0000; // reserved pixels for OSD
-                               localPal[0xf0] = 0xffff;
-                       }
-                       /* no support
-                       else if (rendstatus & 0x20) { // mid-frame palette changes
-                               vidConvCpyRGB565(localPal+0x40, HighPal, 0x40);
-                               vidConvCpyRGB565(localPal+0x80, HighPal+0x40, 0x40);
-                       } */
-               }
-               lines_flags = (Pico.video.reg[1]&8) ? 240 : 224;
-               if (!(Pico.video.reg[12]&1)) lines_flags|=0x10000;
-               if (currentConfig.EmuOpt&0x4000)
-                       lines_flags|=0x40000; // (Pico.m.frame_count&1)?0x20000:0x40000;
-               vidCpy8to16((unsigned short *)giz_screen+321*8, PicoDraw2FB+328*8, localPal, lines_flags);
+               int i;
+               unsigned char *pd;
+               // clear top and bottom trash
+               for (pd = PicoDraw2FB+8, i = 8; i > 0; i--, pd += 512)
+                       memset32((int *)pd, 0xe0e0e0e0, 320/4);
+               for (pd = PicoDraw2FB+512*232+8, i = 8; i > 0; i--, pd += 512)
+                       memset32((int *)pd, 0xe0e0e0e0, 320/4);
        }
-#endif
+
+       blitscreen_clut();
+}
+
+
+static void blit2(const char *fps, const char *notice, int lagging_behind)
+{
+       int vsync = 0, emu_opt = currentConfig.EmuOpt;
 
        if (notice || (emu_opt & 2)) {
-               if (notice)      osd_text(4, notice);
-               if (emu_opt & 2) osd_text(OSD_FPS_X, fps);
+               if (notice)      osd_text(4, notice, 0);
+               if (emu_opt & 2) osd_text(OSD_FPS_X, fps, 0);
        }
 
+       dbg_text();
+
        if ((emu_opt & 0x400) && (PicoMCD & 1))
                cd_leds();
 
-       psp_video_flip(0);
+       if (currentConfig.EmuOpt & 0x2000) { // want vsync
+               if (!(currentConfig.EmuOpt & 0x10000) || !lagging_behind) vsync = 1;
+       }
+
+       psp_video_flip(vsync);
 }
 
 // clears whole screen or just the notice area (in all buffers)
@@ -253,6 +430,8 @@ static void clearArea(int full)
                memset32(psp_screen, 0, 512*272*2/4);
                psp_video_flip(0);
                memset32(psp_screen, 0, 512*272*2/4);
+               memset32(VRAM_CACHED_STUFF, 0xe0e0e0e0, 512*240/4);
+               memset32((int *)VRAM_CACHED_STUFF+512*240/4, 0, 512*240*2/4);
        } else {
                void *fb = psp_video_get_active_fb();
                memset32((int *)((char *)psp_screen + 512*264*2), 0, 512*8*2/4);
@@ -262,31 +441,186 @@ static void clearArea(int full)
 
 static void vidResetMode(void)
 {
-       if (PicoOpt&0x10) {
-       } else if (currentConfig.EmuOpt&0x80) {
-               PicoDrawSetColorFormat(1);
-               PicoScan = EmuScan16;
-       } else {
-               PicoDrawSetColorFormat(-1);
-               PicoScan = EmuScan8;
+       // setup GU
+       sceGuSync(0,0); // sync with prev
+       sceGuStart(GU_DIRECT, guCmdList);
+
+       sceGuClutMode(GU_PSM_5650,0,0xff,0);
+       sceGuTexMode(GU_PSM_T8,0,0,0); // 8-bit image
+       sceGuTexFunc(GU_TFX_REPLACE,GU_TCC_RGB);
+       if (currentConfig.scaling)
+            sceGuTexFilter(GU_LINEAR, GU_LINEAR);
+       else sceGuTexFilter(GU_NEAREST, GU_NEAREST);
+       sceGuTexScale(1.0f,1.0f);
+       sceGuTexOffset(0.0f,0.0f);
+
+       sceGuTexImage(0,512,512,512,(char *)VRAM_STUFF + 16);
+
+       // slow rend.
+       PicoDrawSetColorFormat(-1);
+       PicoScan = EmuScanSlow;
+
+       localPal[0xe0] = 0;
+       Pico.m.dirtyPal = 1;
+       blit_16bit_mode = dynamic_palette = 0;
+
+       sceGuFinish();
+       set_scaling_params();
+       sceGuSync(0,0);
+}
+
+
+/* sound stuff */
+#define SOUND_BLOCK_SIZE_NTSC (1470*2) // 1024 // 1152
+#define SOUND_BLOCK_SIZE_PAL  (1764*2)
+#define SOUND_BLOCK_COUNT    4
+
+static short __attribute__((aligned(4))) sndBuffer[SOUND_BLOCK_SIZE_PAL*SOUND_BLOCK_COUNT + 44100/50*2];
+static short *snd_playptr = NULL, *sndBuffer_endptr = NULL;
+static int samples_made = 0, samples_done = 0, samples_block = 0;
+static int sound_thread_exit = 0;
+static SceUID sound_sem = -1;
+
+static void writeSound(int len);
+
+static int sound_thread(SceSize args, void *argp)
+{
+       int ret;
+
+       lprintf("sthr: started, priority %i\n", sceKernelGetThreadCurrentPriority());
+
+       while (!sound_thread_exit)
+       {
+               if (samples_made - samples_done < samples_block) {
+                       // wait for data (use at least 2 blocks)
+                       //lprintf("sthr: wait... (%i)\n", samples_made - samples_done);
+                       while (samples_made - samples_done <= samples_block*2 && !sound_thread_exit)
+                               ret = sceKernelWaitSema(sound_sem, 1, 0);
+                       //lprintf("sthr: sceKernelWaitSema: %i\n", ret);
+                       continue;
+               }
+
+               //lprintf("sthr: got data: %i\n", samples_made - samples_done);
+
+               ret = sceAudio_E0727056(PSP_AUDIO_VOLUME_MAX, snd_playptr);
+
+               samples_done += samples_block;
+               snd_playptr  += samples_block;
+               if (snd_playptr >= sndBuffer_endptr)
+                       snd_playptr = sndBuffer;
+               if (ret)
+                       lprintf("sthr: outf: %i; pos %i/%i\n", ret, samples_done, samples_made);
+
+               // shouln't happen, but just in case
+               if (samples_made - samples_done >= samples_block*3) {
+                       //lprintf("block skip (%i)\n", samples_made - samples_done);
+                       samples_done += samples_block; // skip
+                       snd_playptr  += samples_block;
+               }
+
        }
-       if ((PicoOpt&0x10) || !(currentConfig.EmuOpt&0x80)) {
-               // setup pal for 8-bit modes
-               localPal[0xc0] = 0x0600;
-               localPal[0xd0] = 0xc000;
-               localPal[0xe0] = 0x0000; // reserved pixels for OSD
-               localPal[0xf0] = 0xffff;
+
+       lprintf("sthr: exit\n");
+       sceKernelExitDeleteThread(0);
+       return 0;
+}
+
+static void sound_init(void)
+{
+       SceUID thid;
+
+       sound_sem = sceKernelCreateSema("sndsem", 0, 0, 1, NULL);
+       if (sound_sem < 0) lprintf("sceKernelCreateSema() failed: %i\n", sound_sem);
+
+       samples_made = samples_done = 0;
+       samples_block = SOUND_BLOCK_SIZE_NTSC; // make sure it goes to sema
+       sound_thread_exit = 0;
+       thid = sceKernelCreateThread("sndthread", sound_thread, 0x12, 0x10000, 0, NULL);
+       if (thid >= 0)
+       {
+               sceKernelStartThread(thid, 0, 0);
        }
-       Pico.m.dirtyPal = 1;
+       else
+               lprintf("sceKernelCreateThread failed: %i\n", thid);
+}
 
-       clearArea(1);
+static void sound_prepare(void)
+{
+       static int PsndRate_old = 0, PicoOpt_old = 0, pal_old = 0;
+       int ret, stereo;
+
+       samples_made = samples_done = 0;
+
+       if (PsndRate != PsndRate_old || (PicoOpt&0x0b) != (PicoOpt_old&0x0b) || Pico.m.pal != pal_old) {
+               PsndRerate(Pico.m.frame_count ? 1 : 0);
+       }
+       stereo=(PicoOpt&8)>>3;
+
+       samples_block = Pico.m.pal ? SOUND_BLOCK_SIZE_PAL : SOUND_BLOCK_SIZE_NTSC;
+       if (PsndRate <= 22050) samples_block /= 2;
+       sndBuffer_endptr = &sndBuffer[samples_block*SOUND_BLOCK_COUNT];
+
+       lprintf("starting audio: %i, len: %i, stereo: %i, pal: %i, block samples: %i\n",
+                       PsndRate, PsndLen, stereo, Pico.m.pal, samples_block);
+
+       while (sceAudioOutput2GetRestSample() > 0) psp_msleep(100);
+       sceAudio_5C37C0AE();
+       ret = sceAudio_38553111(samples_block/2, PsndRate, 2); // seems to not need that stupid 64byte alignment
+       if (ret < 0) {
+               lprintf("sceAudio_38553111() failed: %i\n", ret);
+               sprintf(noticeMsg, "sound init failed (%i), snd disabled", ret);
+               noticeMsgTime = sceKernelGetSystemTimeLow();
+               currentConfig.EmuOpt &= ~4;
+       } else {
+               PicoWriteSound = writeSound;
+               memset32((int *)(void *)sndBuffer, 0, sizeof(sndBuffer)/4);
+               snd_playptr = sndBuffer_endptr - samples_block;
+               samples_made = samples_block; // send 1 empty block first..
+               PsndOut = sndBuffer;
+               PsndRate_old = PsndRate;
+               PicoOpt_old  = PicoOpt;
+               pal_old = Pico.m.pal;
+       }
+}
+
+static void sound_end(void)
+{
+       samples_made = samples_done = 0;
+       while (sceAudioOutput2GetRestSample() > 0)
+               psp_msleep(100);
+       sceAudio_5C37C0AE();
 }
 
-static void updateSound(int len)
+static void sound_deinit(void)
 {
+       sound_thread_exit = 1;
+       sceKernelSignalSema(sound_sem, 1);
+       sceKernelDeleteSema(sound_sem);
+       sound_sem = -1;
+}
+
+static void writeSound(int len)
+{
+       int ret;
        if (PicoOpt&8) len<<=1;
 
-       // TODO..
+       PsndOut += len;
+       /*if (PsndOut > sndBuffer_endptr) {
+               memcpy32((int *)(void *)sndBuffer, (int *)endptr, (PsndOut - endptr + 1) / 2);
+               PsndOut = &sndBuffer[PsndOut - endptr];
+               lprintf("mov\n");
+       }
+       else*/
+       if (PsndOut >= sndBuffer_endptr)
+               PsndOut = sndBuffer;
+
+       // signal the snd thread
+       samples_made += len;
+       if (samples_made - samples_done > samples_block*2) {
+               // lprintf("signal, %i/%i\n", samples_done, samples_made);
+               ret = sceKernelSignalSema(sound_sem, 1);
+               // lprintf("signal ret %i\n", ret);
+       }
 }
 
 
@@ -306,11 +640,17 @@ void emu_forcedFrame(void)
        PicoOpt |=  0x4080; // soft_scale | acc_sprites
        currentConfig.EmuOpt |= 0x80;
 
-       PicoDrawSetColorFormat(1);
-       PicoScan = EmuScan16;
-       PicoScan((unsigned) -1, NULL);
-       Pico.m.dirtyPal = 1;
+       vidResetMode();
+       memset32(VRAM_CACHED_STUFF, 0xe0e0e0e0, 512*8/4); // borders
+       memset32((int *)VRAM_CACHED_STUFF + 512*232/4, 0xe0e0e0e0, 512*8/4);
+       memset32((int *)psp_screen + 512*264*2/4, 0, 512*8*2/4);
+
+       PicoDrawSetColorFormat(-1);
+       PicoScan = EmuScanSlow;
+       EmuScanPrepare();
        PicoFrameDrawOnly();
+       blit1();
+       sceGuSync(0,0);
 
        PicoOpt = po_old;
        currentConfig.EmuOpt = eo_old;
@@ -328,7 +668,8 @@ static void RunEvents(unsigned int which)
                                 (!(which & 0x1000) && (currentConfig.EmuOpt & 0x200))) ) // save
                {
                        int keys;
-                       blit("", (which & 0x1000) ? "LOAD STATE? (X=yes, O=no)" : "OVERWRITE SAVE? (X=yes, O=no)");
+                       sceGuSync(0,0);
+                       blit2("", (which & 0x1000) ? "LOAD STATE? (X=yes, O=no)" : "OVERWRITE SAVE? (X=yes, O=no)", 0);
                        while( !((keys = psp_pad_read(1)) & (BTN_X|BTN_CIRCLE)) )
                                psp_msleep(50);
                        if (keys & BTN_CIRCLE) do_it = 0;
@@ -339,8 +680,8 @@ static void RunEvents(unsigned int which)
 
                if (do_it)
                {
-                       osd_text(4, (which & 0x1000) ? "LOADING GAME" : "SAVING GAME");
-                       PicoStateProgressCB = emu_stateCb;
+                       osd_text(4, (which & 0x1000) ? "LOADING GAME" : "SAVING GAME", 1);
+                       PicoStateProgressCB = emu_msg_cb;
                        emu_SaveLoadGame((which & 0x1000) >> 12, 0);
                        PicoStateProgressCB = NULL;
                        psp_msleep(0);
@@ -355,13 +696,10 @@ static void RunEvents(unsigned int which)
 
                vidResetMode();
 
-               if (PicoOpt&0x10) {
-                       strcpy(noticeMsg, " 8bit fast renderer");
-               } else if (currentConfig.EmuOpt&0x80) {
-                       strcpy(noticeMsg, "16bit accurate renderer");
-               } else {
-                       strcpy(noticeMsg, " 8bit accurate renderer");
-               }
+               if (PicoOpt&0x10)
+                       strcpy(noticeMsg, "fast renderer");
+               else if (currentConfig.EmuOpt&0x80)
+                       strcpy(noticeMsg, "accurate renderer");
 
                noticeMsgTime = sceKernelGetSystemTimeLow();
        }
@@ -386,6 +724,9 @@ static void updateKeys(void)
        int i;
 
        keys = psp_pad_read(0);
+       if (keys & PSP_CTRL_HOME)
+               sceDisplayWaitVblankStart();
+
        if (keys & BTN_SELECT)
                engineState = PGS_Menu;
 
@@ -487,7 +828,6 @@ static void simpleWait(unsigned int until)
 
 void emu_Loop(void)
 {
-       //static int PsndRate_old = 0, PicoOpt_old = 0, pal_old = 0;
        char fpsbuff[24]; // fps count c string
        unsigned int tval, tval_prev = 0, tval_thissec = 0; // timing
        int frames_done = 0, frames_shown = 0, oldmodes = 0;
@@ -498,8 +838,16 @@ void emu_Loop(void)
 
        fpsbuff[0] = 0;
 
+       if (currentConfig.CPUclock != psp_get_cpu_clock()) {
+               lprintf("setting cpu clock to %iMHz... ", currentConfig.CPUclock);
+               i = psp_set_cpu_clock(currentConfig.CPUclock);
+               lprintf(i ? "failed\n" : "done\n");
+               currentConfig.CPUclock = psp_get_cpu_clock();
+       }
+
        // make sure we are in correct mode
        vidResetMode();
+       clearArea(1);
        Pico.m.dirtyPal = 1;
        oldmodes = ((Pico.video.reg[12]&1)<<2) ^ 0xc;
        find_combos();
@@ -514,36 +862,10 @@ void emu_Loop(void)
 
        // prepare sound stuff
        PsndOut = NULL;
-#if 0 // TODO
        if (currentConfig.EmuOpt & 4)
        {
-               int ret, snd_excess_add, stereo;
-               if (PsndRate != PsndRate_old || (PicoOpt&0x0b) != (PicoOpt_old&0x0b) || Pico.m.pal != pal_old) {
-                       sound_rerate(Pico.m.frame_count ? 1 : 0);
-               }
-               stereo=(PicoOpt&8)>>3;
-               snd_excess_add = ((PsndRate - PsndLen*target_fps)<<16) / target_fps;
-               snd_cbuf_samples = (PsndRate<<stereo) * 16 / target_fps;
-               lprintf("starting audio: %i len: %i (ex: %04x) stereo: %i, pal: %i\n",
-                       PsndRate, PsndLen, snd_excess_add, stereo, Pico.m.pal);
-               ret = FrameworkAudio_Init(PsndRate, snd_cbuf_samples, stereo);
-               if (ret != 0) {
-                       lprintf("FrameworkAudio_Init() failed: %i\n", ret);
-                       sprintf(noticeMsg, "sound init failed (%i), snd disabled", ret);
-                       noticeMsgTime = sceKernelGetSystemTimeLow();
-                       currentConfig.EmuOpt &= ~4;
-               } else {
-                       FrameworkAudio_SetVolume(currentConfig.volume, currentConfig.volume);
-                       PicoWriteSound = updateSound;
-                       snd_cbuff = FrameworkAudio_56448Buffer();
-                       PsndOut = snd_cbuff + snd_cbuf_samples / 2; // start writing at the middle
-                       snd_all_samples = 0;
-                       PsndRate_old = PsndRate;
-                       PicoOpt_old  = PicoOpt;
-                       pal_old = Pico.m.pal;
-               }
+               sound_prepare();
        }
-#endif
 
        // loop?
        while (engineState == PGS_Running)
@@ -577,25 +899,20 @@ void emu_Loop(void)
                if (modes != oldmodes) {
                        oldmodes = modes;
                        clearArea(1);
+                       set_scaling_params();
                }
 
                // second passed?
                if (tval - tval_thissec >= 1000000)
                {
-#ifdef BENCHMARK
-                       static int bench = 0, bench_fps = 0, bench_fps_s = 0, bfp = 0, bf[4];
-                       if(++bench == 10) {
-                               bench = 0;
-                               bench_fps_s = bench_fps;
-                               bf[bfp++ & 3] = bench_fps;
-                               bench_fps = 0;
+                       // missing 1 frame?
+                       if (currentConfig.Frameskip < 0 && frames_done < target_fps) {
+                               SkipFrame(); frames_done++;
                        }
-                       bench_fps += frames_shown;
-                       sprintf(fpsbuff, "%02i/%02i/%02i", frames_shown, bench_fps_s, (bf[0]+bf[1]+bf[2]+bf[3])>>2);
-#else
-                       if(currentConfig.EmuOpt & 2)
-                               sprintf(fpsbuff, "%02i/%02i", frames_shown, frames_done);
-#endif
+
+                       if (currentConfig.EmuOpt & 2)
+                               sprintf(fpsbuff, "%02i/%02i  ", frames_shown, frames_done);
+
                        tval_thissec += 1000000;
 
                        if (currentConfig.Frameskip < 0) {
@@ -632,12 +949,10 @@ void emu_Loop(void)
                        int tval_diff;
                        tval = sceKernelGetSystemTimeLow();
                        tval_diff = (int)(tval - tval_thissec) << 8;
-                       if (tval_diff > lim_time)
+                       if (tval_diff > lim_time && (frames_done/16 < frames_shown))
                        {
                                // no time left for this frame - skip
                                if (tval_diff - lim_time >= (300000<<8)) {
-                                       /* something caused a slowdown for us (disk access? cache flush?)
-                                        * try to recover by resetting timing... */
                                        reset_timing = 1;
                                        continue;
                                }
@@ -650,16 +965,18 @@ void emu_Loop(void)
                updateKeys();
 
                if (!(PicoOpt&0x10))
-                       PicoScan((unsigned) -1, NULL);
+                       EmuScanPrepare();
 
                PicoFrame();
 
-               blit(fpsbuff, notice);
+               sceGuSync(0,0);
 
                // check time
                tval = sceKernelGetSystemTimeLow();
                tval_diff = (int)(tval - tval_thissec) << 8;
 
+               blit2(fpsbuff, notice, tval_diff > lim_time);
+
                if (currentConfig.Frameskip < 0 && tval_diff - lim_time >= (300000<<8)) // slowdown detection
                        reset_timing = 1;
                else if (PsndOut != NULL || currentConfig.Frameskip < 0)
@@ -677,18 +994,21 @@ void emu_Loop(void)
 
 
        if (PicoMCD & 1) PicoCDBufferFree();
-/*
+
        if (PsndOut != NULL) {
-               PsndOut = snd_cbuff = NULL;
-               FrameworkAudio_Close();
+               PsndOut = NULL;
+               sound_end();
        }
-*/
+
        // save SRAM
        if ((currentConfig.EmuOpt & 1) && SRam.changed) {
-               emu_stateCb("Writing SRAM/BRAM..");
+               emu_msg_cb("Writing SRAM/BRAM..");
                emu_SaveLoadGame(0, 1);
                SRam.changed = 0;
        }
+
+       // clear fps counters and stuff
+       memset32((int *)psp_video_get_active_fb() + 512*264*2/4, 0, 512*8*2/4);
 }