final src and Makefile adjustments for PSP release
[picodrive.git] / platform / psp / emu.c
index df4a08e..aaa5a0e 100644 (file)
@@ -1,3 +1,8 @@
+// (c) Copyright 2007 notaz, All rights reserved.
+// Free for non-commercial use.
+
+// For commercial use, separate licencing terms must be obtained.
+
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/syslimits.h> // PATH_MAX
 #include "psp.h"
 #include "menu.h"
 #include "emu.h"
+#include "mp3.h"
 #include "../common/emu.h"
 #include "../common/lprintf.h"
 #include "../../Pico/PicoInt.h"
 
-#ifdef BENCHMARK
-#define OSD_FPS_X 380
-#else
-#define OSD_FPS_X 420
-#endif
+#define OSD_FPS_X 424
 
 // additional pspaudio imports, credits to crazyc
 int sceAudio_38553111(unsigned short samples, unsigned short freq, char unknown);  // play with conversion?
@@ -39,7 +41,7 @@ int reset_timing = 0; // do we need this?
 
 static void sound_init(void);
 static void sound_deinit(void);
-static void blit2(const char *fps, const char *notice);
+static void blit2(const char *fps, const char *notice, int lagging_behind);
 static void clearArea(int full);
 
 void emu_noticeMsgUpdated(void)
@@ -52,16 +54,16 @@ void emu_getMainDir(char *dst, int len)
        if (len > 0) *dst = 0;
 }
 
-static void osd_text(int x, const char *text, int is_active)
+static void osd_text(int x, const char *text, int is_active, int clear_all)
 {
        unsigned short *screen = is_active ? psp_video_get_active_fb() : psp_screen;
-       int len = strlen(text) * 8 / 2;
+       int len = clear_all ? (480 / 2) : (strlen(text) * 8 / 2);
        int *p, h;
        void *tmp;
        for (h = 0; h < 8; h++) {
                p = (int *) (screen+x+512*(264+h));
                p = (int *) ((int)p & ~3); // align
-               memset32(p, 0, len);
+               memset32_uncached(p, 0, len);
        }
        if (is_active) { tmp = psp_screen; psp_screen = screen; } // nasty pointer tricks
        emu_textOut16(x, 264, text);
@@ -70,7 +72,7 @@ static void osd_text(int x, const char *text, int is_active)
 
 void emu_msg_cb(const char *msg)
 {
-       osd_text(4, msg, 1);
+       osd_text(4, msg, 1, 1);
        noticeMsgTime = sceKernelGetSystemTimeLow() - 2000000;
 
        /* assumption: emu_msg_cb gets called only when something slow is about to happen */
@@ -130,14 +132,14 @@ void emu_setDefaultConfig(void)
 {
        memset(&currentConfig, 0, sizeof(currentConfig));
        currentConfig.lastRomFile[0] = 0;
-       currentConfig.EmuOpt  = 0x1f | 0x680; // | confirm_save, cd_leds, 16bit rend
-       currentConfig.PicoOpt = 0x07 | 0xc00; // | cd_pcm, cd_cdda
+       currentConfig.EmuOpt  = 0x1d | 0x680;  // | confirm_save, cd_leds, acc rend
+       currentConfig.PicoOpt = 0x0f | 0x1c00; // | gfx_cd, cd_pcm, cd_cdda
        currentConfig.PsndRate = 22050;
        currentConfig.PicoRegion = 0; // auto
        currentConfig.PicoAutoRgnOrder = 0x184; // US, EU, JP
        currentConfig.Frameskip = -1; // auto
        currentConfig.volume = 50;
-       currentConfig.CPUclock = 222;
+       currentConfig.CPUclock = 333;
        currentConfig.KeyBinds[ 4] = 1<<0; // SACB RLDU
        currentConfig.KeyBinds[ 6] = 1<<1;
        currentConfig.KeyBinds[ 7] = 1<<2;
@@ -146,11 +148,18 @@ void emu_setDefaultConfig(void)
        currentConfig.KeyBinds[13] = 1<<5;
        currentConfig.KeyBinds[15] = 1<<6;
        currentConfig.KeyBinds[ 3] = 1<<7;
+       currentConfig.KeyBinds[12] = 1<<26; // switch rnd
        currentConfig.KeyBinds[ 8] = 1<<27; // save state
        currentConfig.KeyBinds[ 9] = 1<<28; // load state
-       currentConfig.PicoCDBuffers = 0;
-       currentConfig.scaling = 1; // bilinear filtering for psp
-       currentConfig.scale = currentConfig.hscale32 = currentConfig.hscale40 = 1.0;
+       currentConfig.KeyBinds[28] = 1<<0; // num "buttons"
+       currentConfig.KeyBinds[30] = 1<<1;
+       currentConfig.KeyBinds[31] = 1<<2;
+       currentConfig.KeyBinds[29] = 1<<3;
+       currentConfig.PicoCDBuffers = 64;
+       currentConfig.scaling = 1;     // bilinear filtering for psp
+       currentConfig.scale = 1.20;    // fullscreen
+       currentConfig.hscale40 = 1.25;
+       currentConfig.hscale32 = 1.56;
 }
 
 
@@ -169,7 +178,7 @@ static int fbimg_offs = 0;
 
 static void set_scaling_params(void)
 {
-       int src_width, fbimg_width, fbimg_height, fbimg_xoffs, fbimg_yoffs;
+       int src_width, fbimg_width, fbimg_height, fbimg_xoffs, fbimg_yoffs, border_hack = 0;
        g_vertices[0].x = g_vertices[0].y =
        g_vertices[0].z = g_vertices[1].z = 0;
 
@@ -182,9 +191,13 @@ static void set_scaling_params(void)
                src_width = 256;
        }
 
+       if (fbimg_width  & 1) fbimg_width++;  // make even
+       if (fbimg_height & 1) fbimg_height++;
+
        if (fbimg_width >= 480) {
                g_vertices[0].u = (fbimg_width-480)/2;
-               g_vertices[1].u = src_width - (fbimg_width-480)/2;
+               g_vertices[1].u = src_width - (fbimg_width-480)/2 - 1;
+               if (fbimg_width == 480) border_hack = 1;
                fbimg_width = 480;
                fbimg_xoffs = 0;
        } else {
@@ -208,15 +221,23 @@ static void set_scaling_params(void)
        g_vertices[1].y = fbimg_height;
        if (fbimg_xoffs < 0) fbimg_xoffs = 0;
        if (fbimg_yoffs < 0) fbimg_yoffs = 0;
+       if (border_hack) {
+               g_vertices[0].u++;
+               g_vertices[0].x++;
+               g_vertices[1].u--;
+               g_vertices[1].x--;
+       }
        fbimg_offs = (fbimg_yoffs*512 + fbimg_xoffs) * 2; // dst is always 16bit
 
+       /*
        lprintf("set_scaling_params:\n");
        lprintf("offs: %i, %i\n", fbimg_xoffs, fbimg_yoffs);
        lprintf("xy0, xy1: %i, %i; %i, %i\n", g_vertices[0].x, g_vertices[0].y, g_vertices[1].x, g_vertices[1].y);
        lprintf("uv0, uv1: %i, %i; %i, %i\n", g_vertices[0].u, g_vertices[0].v, g_vertices[1].u, g_vertices[1].v);
+       */
 }
 
-static void do_slowmode_pal(void)
+static void do_pal_update(int allow_sh)
 {
        unsigned int *spal=(void *)Pico.cram;
        unsigned int *dpal=(void *)localPal;
@@ -225,11 +246,11 @@ static void do_slowmode_pal(void)
        for (i = 0x3f/2; i >= 0; i--)
                dpal[i] = ((spal[i]&0x000f000f)<< 1)|((spal[i]&0x00f000f0)<<3)|((spal[i]&0x0f000f00)<<4);
 
-       if (Pico.video.reg[0xC]&8) // shadow/hilight?
+       if (allow_sh && (Pico.video.reg[0xC]&8)) // shadow/hilight?
        {
                // shadowed pixels
                for (i = 0x3f/2; i >= 0; i--)
-                       dpal[0x20|i] = dpal[0x60|i] = (spal[i]>>1)&0x738e738e;
+                       dpal[0x20|i] = dpal[0x60|i] = (dpal[i]>>1)&0x738e738e;
                // hilighted pixels
                for (i = 0x3f; i >= 0; i--) {
                        int t=localPal[i]&0xe71c;t+=0x4208;
@@ -263,7 +284,7 @@ static void EmuScanPrepare(void)
 
        dynamic_palette = 0;
        if (Pico.m.dirtyPal)
-               do_slowmode_pal();
+               do_pal_update(1);
 }
 
 static int EmuScanSlow(unsigned int num, void *sdata)
@@ -275,7 +296,7 @@ static int EmuScanSlow(unsigned int num, void *sdata)
                        do_slowmode_lines(num);
                        dynamic_palette = 1;
                }
-               do_slowmode_pal();
+               do_pal_update(1);
        }
 
        if (dynamic_palette) {
@@ -293,8 +314,6 @@ static void blitscreen_clut(void)
        int offs = fbimg_offs;
        offs += (psp_screen == VRAM_FB0) ? VRAMOFFS_FB0 : VRAMOFFS_FB1;
 
-       sceKernelDcacheWritebackAll();
-
        sceGuSync(0,0); // sync with prev
        sceGuStart(GU_DIRECT, guCmdList);
        sceGuDrawBuffer(GU_PSM_5650, (void *)offs, 512); // point to back buffer
@@ -318,14 +337,9 @@ static void blitscreen_clut(void)
                }
 
                if ((PicoOpt&0x10) && Pico.m.dirtyPal)
-               {
-                       int i, *dpal = (void *)localPal, *spal = (int *)Pico.cram;
-                       for (i = 0x3f/2; i >= 0; i--)
-                               dpal[i] = ((spal[i]&0x000f000f)<< 1)|((spal[i]&0x00f000f0)<<3)|((spal[i]&0x0f000f00)<<4);
-                       localPal[0xe0] = 0;
-                       Pico.m.dirtyPal = 0;
-                       need_pal_upload = 1;
-               }
+                       do_pal_update(0);
+
+               sceKernelDcacheWritebackAll();
 
                if (need_pal_upload) {
                        need_pal_upload = 0;
@@ -361,21 +375,19 @@ static void blitscreen_clut(void)
 
 static void cd_leds(void)
 {
-       static int old_reg = 0;
-       unsigned int col_g, col_r, *p;
+       unsigned int reg, col_g, col_r, *p;
 
-       if (!((Pico_mcd->s68k_regs[0] ^ old_reg) & 3)) return; // no change
-       old_reg = Pico_mcd->s68k_regs[0];
+       reg = Pico_mcd->s68k_regs[0];
 
        p = (unsigned int *)((short *)psp_screen + 512*2+4+2);
-       col_g = (old_reg & 2) ? 0x06000600 : 0;
-       col_r = (old_reg & 1) ? 0xc000c000 : 0;
+       col_g = (reg & 2) ? 0x06000600 : 0;
+       col_r = (reg & 1) ? 0x00180018 : 0;
        *p++ = col_g; *p++ = col_g; p+=2; *p++ = col_r; *p++ = col_r; p += 512/2 - 12/2;
        *p++ = col_g; *p++ = col_g; p+=2; *p++ = col_r; *p++ = col_r; p += 512/2 - 12/2;
        *p++ = col_g; *p++ = col_g; p+=2; *p++ = col_r; *p++ = col_r;
 }
 
-
+#if 0
 static void dbg_text(void)
 {
        int *p, h, len;
@@ -386,11 +398,11 @@ static void dbg_text(void)
        for (h = 0; h < 8; h++) {
                p = (int *) ((unsigned short *) psp_screen+2+512*(256+h));
                p = (int *) ((int)p & ~3); // align
-               memset32(p, 0, len);
+               memset32_uncached(p, 0, len);
        }
        emu_textOut16(2, 256, text);
 }
-
+#endif
 
 /* called after rendering is done, but frame emulation is not finished */
 void blit1(void)
@@ -410,38 +422,40 @@ void blit1(void)
 }
 
 
-static void blit2(const char *fps, const char *notice)
+static void blit2(const char *fps, const char *notice, int lagging_behind)
 {
-       int emu_opt = currentConfig.EmuOpt;
-
-       sceGuSync(0,0);
+       int vsync = 0, emu_opt = currentConfig.EmuOpt;
 
        if (notice || (emu_opt & 2)) {
-               if (notice)      osd_text(4, notice, 0);
-               if (emu_opt & 2) osd_text(OSD_FPS_X, fps, 0);
+               if (notice)      osd_text(4, notice, 0, 0);
+               if (emu_opt & 2) osd_text(OSD_FPS_X, fps, 0, 0);
        }
 
-       dbg_text();
+       //dbg_text();
 
        if ((emu_opt & 0x400) && (PicoMCD & 1))
                cd_leds();
 
-       psp_video_flip(0);
+       if (currentConfig.EmuOpt & 0x2000) { // want vsync
+               if (!(currentConfig.EmuOpt & 0x10000) || !lagging_behind) vsync = 1;
+       }
+
+       psp_video_flip(vsync);
 }
 
 // clears whole screen or just the notice area (in all buffers)
 static void clearArea(int full)
 {
        if (full) {
-               memset32(psp_screen, 0, 512*272*2/4);
+               memset32_uncached(psp_screen, 0, 512*272*2/4);
                psp_video_flip(0);
-               memset32(psp_screen, 0, 512*272*2/4);
+               memset32_uncached(psp_screen, 0, 512*272*2/4);
                memset32(VRAM_CACHED_STUFF, 0xe0e0e0e0, 512*240/4);
                memset32((int *)VRAM_CACHED_STUFF+512*240/4, 0, 512*240*2/4);
        } else {
                void *fb = psp_video_get_active_fb();
-               memset32((int *)((char *)psp_screen + 512*264*2), 0, 512*8*2/4);
-               memset32((int *)((char *)fb         + 512*264*2), 0, 512*8*2/4);
+               memset32_uncached((int *)((char *)psp_screen + 512*264*2), 0, 512*8*2/4);
+               memset32_uncached((int *)((char *)fb         + 512*264*2), 0, 512*8*2/4);
        }
 }
 
@@ -477,12 +491,13 @@ static void vidResetMode(void)
 
 
 /* sound stuff */
-#define SOUND_DEF_BLOCK_SIZE 1024 // 1152 // 1024
-#define SOUND_BLOCK_COUNT    4
+#define SOUND_BLOCK_SIZE_NTSC (1470*2) // 1024 // 1152
+#define SOUND_BLOCK_SIZE_PAL  (1764*2)
+#define SOUND_BLOCK_COUNT    8
 
-static short __attribute__((aligned(4))) sndBuffer[SOUND_DEF_BLOCK_SIZE*SOUND_BLOCK_COUNT*2 + 44100/50*2];
-static short *snd_playptr = NULL;
-static int samples_made = 0, samples_done = 0, samples_block = SOUND_DEF_BLOCK_SIZE;
+static short __attribute__((aligned(4))) sndBuffer[SOUND_BLOCK_SIZE_PAL*SOUND_BLOCK_COUNT + 44100/50*2];
+static short *snd_playptr = NULL, *sndBuffer_endptr = NULL;
+static int samples_made = 0, samples_done = 0, samples_block = 0;
 static int sound_thread_exit = 0;
 static SceUID sound_sem = -1;
 
@@ -490,31 +505,40 @@ static void writeSound(int len);
 
 static int sound_thread(SceSize args, void *argp)
 {
-       short *endptr = &sndBuffer[SOUND_DEF_BLOCK_SIZE*SOUND_BLOCK_COUNT*2];
-       int ret;
+       int ret = 0;
 
-       lprintf("sound_thread: started, priority %i\n", sceKernelGetThreadCurrentPriority());
+       lprintf("sthr: started, priority %i\n", sceKernelGetThreadCurrentPriority());
 
        while (!sound_thread_exit)
        {
                if (samples_made - samples_done < samples_block) {
-                       // wait for data...
-                       //lprintf("sthr: wait... (%i/%i)\n", samples_done, samples_made);
-                       ret = sceKernelWaitSema(sound_sem, 1, 0);
-                       //lprintf("sthr: sceKernelWaitSema: %i\n", ret);
+                       // wait for data (use at least 2 blocks)
+                       //lprintf("sthr: wait... (%i)\n", samples_made - samples_done);
+                       while (samples_made - samples_done <= samples_block*2 && !sound_thread_exit)
+                               ret = sceKernelWaitSema(sound_sem, 1, 0);
+                       if (ret < 0) lprintf("sthr: sceKernelWaitSema: %i\n", ret);
                        continue;
                }
 
-               //lprintf("sthr: got data: %i\n", samples_made - samples_done);
+               // lprintf("sthr: got data: %i\n", samples_made - samples_done);
 
                ret = sceAudio_E0727056(PSP_AUDIO_VOLUME_MAX, snd_playptr);
 
                samples_done += samples_block;
                snd_playptr  += samples_block;
-               if (snd_playptr >= endptr)
+               if (snd_playptr >= sndBuffer_endptr)
                        snd_playptr = sndBuffer;
-               if (ret)
-                       lprintf("sthr: outf: %i; pos %i/%i\n", ret, samples_done, samples_made);
+               // 1.5 kernel returns 0, newer ones return # of samples queued
+               if (ret < 0)
+                       lprintf("sthr: sceAudio_E0727056: %08x; pos %i/%i\n", ret, samples_done, samples_made);
+
+               // shouln't happen, but just in case
+               if (samples_made - samples_done >= samples_block*3) {
+                       lprintf("sthr: block skip (%i)\n", samples_made - samples_done);
+                       samples_done += samples_block; // skip
+                       snd_playptr  += samples_block;
+               }
+
        }
 
        lprintf("sthr: exit\n");
@@ -525,15 +549,19 @@ static int sound_thread(SceSize args, void *argp)
 static void sound_init(void)
 {
        SceUID thid;
+       int ret;
 
        sound_sem = sceKernelCreateSema("sndsem", 0, 0, 1, NULL);
        if (sound_sem < 0) lprintf("sceKernelCreateSema() failed: %i\n", sound_sem);
 
+       samples_made = samples_done = 0;
+       samples_block = SOUND_BLOCK_SIZE_NTSC; // make sure it goes to sema
        sound_thread_exit = 0;
        thid = sceKernelCreateThread("sndthread", sound_thread, 0x12, 0x10000, 0, NULL);
        if (thid >= 0)
        {
-               sceKernelStartThread(thid, 0, 0);
+               ret = sceKernelStartThread(thid, 0, 0);
+               if (ret < 0) lprintf("sound_init: sceKernelStartThread returned %08x\n", ret);
        }
        else
                lprintf("sceKernelCreateThread failed: %i\n", thid);
@@ -547,31 +575,30 @@ static void sound_prepare(void)
        samples_made = samples_done = 0;
 
        if (PsndRate != PsndRate_old || (PicoOpt&0x0b) != (PicoOpt_old&0x0b) || Pico.m.pal != pal_old) {
-               sound_rerate(Pico.m.frame_count ? 1 : 0);
+               PsndRerate(Pico.m.frame_count ? 1 : 0);
        }
        stereo=(PicoOpt&8)>>3;
-       samples_block = SOUND_DEF_BLOCK_SIZE;
-       if (PsndRate < 44100) samples_block = SOUND_DEF_BLOCK_SIZE / 2;
-       if (PsndRate < 22050) samples_block = SOUND_DEF_BLOCK_SIZE / 4;
+
+       samples_block = Pico.m.pal ? SOUND_BLOCK_SIZE_PAL : SOUND_BLOCK_SIZE_NTSC;
+       if (PsndRate <= 22050) samples_block /= 2;
+       sndBuffer_endptr = &sndBuffer[samples_block*SOUND_BLOCK_COUNT];
 
        lprintf("starting audio: %i, len: %i, stereo: %i, pal: %i, block samples: %i\n",
                        PsndRate, PsndLen, stereo, Pico.m.pal, samples_block);
 
-       while (sceAudioOutput2GetRestSample() > 0) psp_msleep(100);
-       sceAudio_5C37C0AE();
-       ret = sceAudio_38553111(samples_block/2, PsndRate, 2/*stereo ? 2 : 1*/);
-               lprintf("sceAudio_38553111() ret: %i\n", ret);
+       // while (sceAudioOutput2GetRestSample() > 0) psp_msleep(100);
+       // sceAudio_5C37C0AE();
+       ret = sceAudio_38553111(samples_block/2, PsndRate, 2); // seems to not need that stupid 64byte alignment
        if (ret < 0) {
                lprintf("sceAudio_38553111() failed: %i\n", ret);
                sprintf(noticeMsg, "sound init failed (%i), snd disabled", ret);
                noticeMsgTime = sceKernelGetSystemTimeLow();
                currentConfig.EmuOpt &= ~4;
        } else {
-//             int ret = sceAudioSetChannelDataLen(ret, PsndLen); // a try..
-//             lprintf("sceAudioSetChannelDataLen: %i\n", ret);
                PicoWriteSound = writeSound;
                memset32((int *)(void *)sndBuffer, 0, sizeof(sndBuffer)/4);
-               snd_playptr = sndBuffer;
+               snd_playptr = sndBuffer_endptr - samples_block;
+               samples_made = samples_block; // send 1 empty block first..
                PsndOut = sndBuffer;
                PsndRate_old = PsndRate;
                PicoOpt_old  = PicoOpt;
@@ -581,38 +608,53 @@ static void sound_prepare(void)
 
 static void sound_end(void)
 {
-       int ret;
-       while (sceAudioOutput2GetRestSample() > 0) psp_msleep(100);
-       ret = sceAudio_5C37C0AE();
-       lprintf("sound_end: sceAudio_5C37C0AE ret %i\n", ret);
+       int i;
+       if (samples_done == 0)
+       {
+               // if no data is written between sceAudio_38553111 and sceAudio_5C37C0AE calls,
+               // we get a deadlock on next sceAudio_38553111 call
+               // so this is yet another workaround:
+               memset32((int *)(void *)sndBuffer, 0, samples_block*4/4);
+               samples_made = samples_block * 3;
+               sceKernelSignalSema(sound_sem, 1);
+       }
+       sceKernelDelayThread(100*1000);
+       samples_made = samples_done = 0;
+       for (i = 0; sceAudioOutput2GetRestSample() > 0 && i < 16; i++)
+               psp_msleep(100);
+       sceAudio_5C37C0AE();
 }
 
 static void sound_deinit(void)
 {
        sound_thread_exit = 1;
        sceKernelSignalSema(sound_sem, 1);
+       sceKernelDeleteSema(sound_sem);
+       sound_sem = -1;
 }
 
 static void writeSound(int len)
 {
        int ret;
-       short *endptr = &sndBuffer[SOUND_DEF_BLOCK_SIZE*SOUND_BLOCK_COUNT*2];
        if (PicoOpt&8) len<<=1;
 
        PsndOut += len;
-       if (PsndOut > endptr) {
+       /*if (PsndOut > sndBuffer_endptr) {
                memcpy32((int *)(void *)sndBuffer, (int *)endptr, (PsndOut - endptr + 1) / 2);
                PsndOut = &sndBuffer[PsndOut - endptr];
+               lprintf("mov\n");
        }
-       else if (PsndOut == endptr)
-               PsndOut = sndBuffer; // happy case
+       else*/
+       if (PsndOut > sndBuffer_endptr) lprintf("snd oflow %i!\n", PsndOut - sndBuffer_endptr);
+       if (PsndOut >= sndBuffer_endptr)
+               PsndOut = sndBuffer;
 
        // signal the snd thread
        samples_made += len;
-       if (samples_made - samples_done >= samples_block) {
-               if (!Pico.m.scanline) lprintf("signal, %i/%i\n", samples_done, samples_made);
+       if (samples_made - samples_done > samples_block*2) {
+               // lprintf("signal, %i/%i\n", samples_done, samples_made);
                ret = sceKernelSignalSema(sound_sem, 1);
-               if (!Pico.m.scanline) lprintf("signal ret %i\n", ret);
+               //if (ret < 0) lprintf("snd signal ret %08x\n", ret);
        }
 }
 
@@ -636,7 +678,7 @@ void emu_forcedFrame(void)
        vidResetMode();
        memset32(VRAM_CACHED_STUFF, 0xe0e0e0e0, 512*8/4); // borders
        memset32((int *)VRAM_CACHED_STUFF + 512*232/4, 0xe0e0e0e0, 512*8/4);
-       memset32((int *)psp_screen + 512*264*2/4, 0, 512*8*2/4);
+       memset32_uncached((int *)psp_screen + 512*264*2/4, 0, 512*8*2/4);
 
        PicoDrawSetColorFormat(-1);
        PicoScan = EmuScanSlow;
@@ -661,7 +703,8 @@ static void RunEvents(unsigned int which)
                                 (!(which & 0x1000) && (currentConfig.EmuOpt & 0x200))) ) // save
                {
                        int keys;
-                       blit2("", (which & 0x1000) ? "LOAD STATE? (X=yes, O=no)" : "OVERWRITE SAVE? (X=yes, O=no)");
+                       sceGuSync(0,0);
+                       blit2("", (which & 0x1000) ? "LOAD STATE? (X=yes, O=no)" : "OVERWRITE SAVE? (X=yes, O=no)", 0);
                        while( !((keys = psp_pad_read(1)) & (BTN_X|BTN_CIRCLE)) )
                                psp_msleep(50);
                        if (keys & BTN_CIRCLE) do_it = 0;
@@ -672,7 +715,7 @@ static void RunEvents(unsigned int which)
 
                if (do_it)
                {
-                       osd_text(4, (which & 0x1000) ? "LOADING GAME" : "SAVING GAME", 1);
+                       osd_text(4, (which & 0x1000) ? "LOADING GAME" : "SAVING GAME", 1, 0);
                        PicoStateProgressCB = emu_msg_cb;
                        emu_SaveLoadGame((which & 0x1000) >> 12, 0);
                        PicoStateProgressCB = NULL;
@@ -688,13 +731,10 @@ static void RunEvents(unsigned int which)
 
                vidResetMode();
 
-               if (PicoOpt&0x10) {
-                       strcpy(noticeMsg, " 8bit fast renderer");
-               } else if (currentConfig.EmuOpt&0x80) {
-                       strcpy(noticeMsg, "16bit accurate renderer");
-               } else {
-                       strcpy(noticeMsg, " 8bit accurate renderer");
-               }
+               if (PicoOpt&0x10)
+                       strcpy(noticeMsg, "fast renderer");
+               else if (currentConfig.EmuOpt&0x80)
+                       strcpy(noticeMsg, "accurate renderer");
 
                noticeMsgTime = sceKernelGetSystemTimeLow();
        }
@@ -789,16 +829,26 @@ static void find_combos(void)
        combo_keys = combo_acts = 0;
        for (act = 0; act < 32; act++)
        {
-               int keyc = 0;
+               int keyc = 0, keyc2 = 0;
                if (act == 16 || act == 17) continue; // player2 flag
-               for (u = 0; u < 32; u++)
+               if (act > 17)
                {
-                       if (currentConfig.KeyBinds[u] & (1 << act)) keyc++;
+                       for (u = 0; u < 28; u++) // 28 because nub can't produce combos
+                               if (currentConfig.KeyBinds[u] & (1 << act)) keyc++;
                }
-               if (keyc > 1)
+               else
+               {
+                       for (u = 0; u < 28; u++)
+                               if ((currentConfig.KeyBinds[u] & 0x30000) == 0 && // pl. 1
+                                       (currentConfig.KeyBinds[u] & (1 << act))) keyc++;
+                       for (u = 0; u < 28; u++)
+                               if ((currentConfig.KeyBinds[u] & 0x30000) == 1 && // pl. 2
+                                       (currentConfig.KeyBinds[u] & (1 << act))) keyc2++;
+               }
+               if (keyc > 1 || keyc2 > 1)
                {
                        // loop again and mark those keys and actions as combo
-                       for (u = 0; u < 32; u++)
+                       for (u = 0; u < 28; u++)
                        {
                                if (currentConfig.KeyBinds[u] & (1 << act)) {
                                        combo_keys |= 1 << u;
@@ -823,6 +873,7 @@ static void simpleWait(unsigned int until)
 
 void emu_Loop(void)
 {
+       static int mp3_init_done = 0;
        char fpsbuff[24]; // fps count c string
        unsigned int tval, tval_prev = 0, tval_thissec = 0; // timing
        int frames_done = 0, frames_shown = 0, oldmodes = 0;
@@ -852,8 +903,16 @@ void emu_Loop(void)
        target_frametime = Pico.m.pal ? (1000000<<8)/50 : (1000000<<8)/60+1;
        reset_timing = 1;
 
-       // prepare CD buffer
-       if (PicoMCD & 1) PicoCDBufferInit();
+       if (PicoMCD & 1) {
+               // prepare CD buffer
+               PicoCDBufferInit();
+               // mp3...
+               if (!mp3_init_done) {
+                       i = mp3_init();
+                       mp3_init_done = 1;
+                       if (i) { engineState = PGS_Menu; return; }
+               }
+       }
 
        // prepare sound stuff
        PsndOut = NULL;
@@ -862,6 +921,8 @@ void emu_Loop(void)
                sound_prepare();
        }
 
+       sceDisplayWaitVblankStart();
+
        // loop?
        while (engineState == PGS_Running)
        {
@@ -900,20 +961,14 @@ void emu_Loop(void)
                // second passed?
                if (tval - tval_thissec >= 1000000)
                {
-#ifdef BENCHMARK
-                       static int bench = 0, bench_fps = 0, bench_fps_s = 0, bfp = 0, bf[4];
-                       if(++bench == 10) {
-                               bench = 0;
-                               bench_fps_s = bench_fps;
-                               bf[bfp++ & 3] = bench_fps;
-                               bench_fps = 0;
+                       // missing 1 frame?
+                       if (currentConfig.Frameskip < 0 && frames_done < target_fps) {
+                               SkipFrame(); frames_done++;
                        }
-                       bench_fps += frames_shown;
-                       sprintf(fpsbuff, "%02i/%02i/%02i", frames_shown, bench_fps_s, (bf[0]+bf[1]+bf[2]+bf[3])>>2);
-#else
-                       if(currentConfig.EmuOpt & 2)
-                               sprintf(fpsbuff, "%02i/%02i", frames_shown, frames_done);
-#endif
+
+                       if (currentConfig.EmuOpt & 2)
+                               sprintf(fpsbuff, "%02i/%02i  ", frames_shown, frames_done);
+
                        tval_thissec += 1000000;
 
                        if (currentConfig.Frameskip < 0) {
@@ -950,12 +1005,10 @@ void emu_Loop(void)
                        int tval_diff;
                        tval = sceKernelGetSystemTimeLow();
                        tval_diff = (int)(tval - tval_thissec) << 8;
-                       if (tval_diff > lim_time)
+                       if (tval_diff > lim_time && (frames_done/16 < frames_shown))
                        {
                                // no time left for this frame - skip
                                if (tval_diff - lim_time >= (300000<<8)) {
-                                       /* something caused a slowdown for us (disk access? cache flush?)
-                                        * try to recover by resetting timing... */
                                        reset_timing = 1;
                                        continue;
                                }
@@ -972,12 +1025,14 @@ void emu_Loop(void)
 
                PicoFrame();
 
-               blit2(fpsbuff, notice);
+               sceGuSync(0,0);
 
                // check time
                tval = sceKernelGetSystemTimeLow();
                tval_diff = (int)(tval - tval_thissec) << 8;
 
+               blit2(fpsbuff, notice, tval_diff > lim_time);
+
                if (currentConfig.Frameskip < 0 && tval_diff - lim_time >= (300000<<8)) // slowdown detection
                        reset_timing = 1;
                else if (PsndOut != NULL || currentConfig.Frameskip < 0)
@@ -1008,8 +1063,8 @@ void emu_Loop(void)
                SRam.changed = 0;
        }
 
-       // draw a frame for bg..
-       emu_forcedFrame();
+       // clear fps counters and stuff
+       memset32_uncached((int *)psp_video_get_active_fb() + 512*264*2/4, 0, 512*8*2/4);
 }