From f6c49d38cbb965c502ca2af66d76e92f95acda7c Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 31 Dec 2009 15:51:40 +0000 Subject: [PATCH] simple profiler added git-svn-id: file:///home/notaz/opt/svn/PicoDrive@850 be3aeb3a-fb24-0410-a615-afba39da0efa --- pico/32x/32x.c | 16 ++++- pico/cd/pico.c | 4 ++ pico/draw.c | 4 ++ pico/memory.c | 10 ++- pico/pico.c | 19 +++++- pico/pico_int.h | 12 ++++ pico/sound/sound.c | 4 ++ platform/common/common.mak | 4 ++ platform/common/emu.c | 10 ++- platform/linux/Makefile | 3 + platform/linux/pprof.c | 126 +++++++++++++++++++++++++++++++++++++ platform/linux/pprof.h | 54 ++++++++++++++++ 12 files changed, 258 insertions(+), 8 deletions(-) create mode 100644 platform/linux/pprof.c create mode 100644 platform/linux/pprof.h diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 323b0b65..e945c8e2 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -165,6 +165,8 @@ static void p32x_start_blank(void) static __inline void run_m68k(int cyc) { + pprof_start(m68k); + #if defined(EMU_C68K) PicoCpuCM68k.cycles = cyc; CycloneRun(&PicoCpuCM68k); @@ -174,6 +176,8 @@ static __inline void run_m68k(int cyc) #elif defined(EMU_F68K) SekCycleCnt += fm68k_emulate(cyc+1, 0, 0); #endif + + pprof_end(m68k); } // ~1463.8, but due to cache misses and slow mem @@ -194,10 +198,18 @@ static __inline void run_m68k(int cyc) slice = SekCycleCnt - slice; /* real count from 68k */ \ if (SekCycleCnt < SekCycleAim) \ elprintf(EL_32X, "slice %d", slice); \ - if (!(Pico32x.emu_flags & (P32XF_SSH2POLL|P32XF_SSH2VPOLL))) \ + if (!(Pico32x.emu_flags & (P32XF_SSH2POLL|P32XF_SSH2VPOLL))) { \ + pprof_start(ssh2); \ sh2_execute(&ssh2, CYCLES_M68K2SH2(slice)); \ - if (!(Pico32x.emu_flags & (P32XF_MSH2POLL|P32XF_MSH2VPOLL))) \ + pprof_end(ssh2); \ + } \ + if (!(Pico32x.emu_flags & (P32XF_MSH2POLL|P32XF_MSH2VPOLL))) { \ + pprof_start(msh2); \ sh2_execute(&msh2, CYCLES_M68K2SH2(slice)); \ + pprof_end(msh2); \ + } \ + pprof_start(dummy); \ + pprof_end(dummy); \ } \ } diff --git a/pico/cd/pico.c b/pico/cd/pico.c index 8b65f4fa..383949c3 100644 --- a/pico/cd/pico.c +++ b/pico/cd/pico.c @@ -66,6 +66,9 @@ PICO_INTERNAL int PicoResetMCD(void) static __inline void SekRunM68k(int cyc) { int cyc_do; + + pprof_start(m68k); + SekCycleAim+=cyc; if ((cyc_do=SekCycleAim-SekCycleCnt) <= 0) return; #if defined(EMU_CORE_DEBUG) @@ -81,6 +84,7 @@ static __inline void SekRunM68k(int cyc) g_m68kcontext=&PicoCpuFM68k; SekCycleCnt+=fm68k_emulate(cyc_do, 0, 0); #endif + pprof_end(m68k); } static __inline void SekRunS68k(int cyc) diff --git a/pico/draw.c b/pico/draw.c index e474168c..6670ba3b 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -1485,6 +1485,8 @@ void PicoDrawSync(int to, int blank_last_line) int sh = (Pico.video.reg[0xC] & 8) >> 3; // shadow/hilight? int bgc = Pico.video.reg[7]; + pprof_start(draw); + if (rendlines != 240) offs = 8; @@ -1516,6 +1518,8 @@ void PicoDrawSync(int to, int blank_last_line) line++; } DrawScanline = line; + + pprof_end(draw); } void PicoDrawSetColorFormat(int which) diff --git a/pico/memory.c b/pico/memory.c index f6c0eeb7..d0b16974 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -257,8 +257,11 @@ void NOINLINE ctl_write_z80busreq(u32 d) else { z80stopCycle = SekCyclesDone(); - if ((PicoOpt&POPT_EN_Z80) && !Pico.m.z80_reset) + if ((PicoOpt&POPT_EN_Z80) && !Pico.m.z80_reset) { + pprof_start(m68k); PicoSyncZ80(z80stopCycle); + pprof_end_sub(m68k); + } } Pico.m.z80Run = d; } @@ -272,8 +275,11 @@ void NOINLINE ctl_write_z80reset(u32 d) { if (d) { - if ((PicoOpt&POPT_EN_Z80) && Pico.m.z80Run) + if ((PicoOpt&POPT_EN_Z80) && Pico.m.z80Run) { + pprof_start(m68k); PicoSyncZ80(SekCyclesDone()); + pprof_end_sub(m68k); + } YM2612ResetChip(); timers_reset(); } diff --git a/pico/pico.c b/pico/pico.c index d3123868..c63360c6 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -272,6 +272,8 @@ PICO_INTERNAL int CheckDMA(void) static __inline void SekRunM68k(int cyc) { int cyc_do; + pprof_start(m68k); + SekCycleAim+=cyc; if ((cyc_do=SekCycleAim-SekCycleCnt) <= 0) return; #if defined(EMU_CORE_DEBUG) @@ -286,6 +288,8 @@ static __inline void SekRunM68k(int cyc) #elif defined(EMU_F68K) SekCycleCnt+=fm68k_emulate(cyc_do+1, 0, 0); #endif + + pprof_end(m68k); } #include "pico_cmn.c" @@ -303,38 +307,47 @@ PICO_INTERNAL void PicoSyncZ80(int m68k_cycles_done) z80_cycle_aim = cycles_68k_to_z80(m68k_cycles_done); cnt = z80_cycle_aim - z80_cycle_cnt; + pprof_start(z80); + elprintf(EL_BUSREQ, "z80 sync %i (%i|%i -> %i|%i)", cnt, z80_cycle_cnt, z80_cycle_cnt / 228, z80_cycle_aim, z80_cycle_aim / 228); if (cnt > 0) z80_cycle_cnt += z80_run(cnt); + + pprof_end(z80); } void PicoFrame(void) { + pprof_start(frame); + Pico.m.frame_count++; if (PicoAHW & PAHW_SMS) { PicoFrameMS(); - return; + goto end; } // TODO: MCD+32X if (PicoAHW & PAHW_MCD) { PicoFrameMCD(); - return; + goto end; } if (PicoAHW & PAHW_32X) { PicoFrame32x(); - return; + goto end; } //if(Pico.video.reg[12]&0x2) Pico.video.status ^= 0x10; // change odd bit in interlace mode PicoFrameStart(); PicoFrameHints(); + +end: + pprof_end(frame); } void PicoFrameDrawOnly(void) diff --git a/pico/pico_int.h b/pico/pico_int.h index 2f4d1d38..f5481fcd 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -779,6 +779,18 @@ extern void lprintf(const char *fmt, ...); #define elprintf(w,f,...) #endif +// profiling +#ifdef PPROF +#include +#else +#define pprof_init() +#define pprof_finish() +#define pprof_start(x) +#define pprof_end(...) +#define pprof_end_sub(...) +#endif + +// misc #ifdef _MSC_VER #define cdprintf #else diff --git a/pico/sound/sound.c b/pico/sound/sound.c index dfa1d651..5bb8e9d8 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -308,6 +308,8 @@ static int PsndRender(int offset, int length) (Pico_mcd->pcm.control & 0x80) && Pico_mcd->pcm.enabled; offset <<= stereo; + pprof_start(sound); + #if !SIMPLE_WRITE_SOUND if (offset == 0) { // should happen once per frame // compensate for float part of PsndLen @@ -362,6 +364,8 @@ static int PsndRender(int offset, int length) // convert + limit to normal 16bit output PsndMix_32_to_16l(PsndOut+offset, buf32, length); + pprof_end(sound); + return length; } diff --git a/platform/common/common.mak b/platform/common/common.mak index f9958f45..3ddf3fb8 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -7,6 +7,10 @@ endif ifeq "$(profile)" "2" CFLAGS += -fprofile-use endif +ifeq "$(pprof)" "1" +DEFINES += PPROF +OBJS += platform/linux/pprof.o +endif # === Pico core === # Pico diff --git a/platform/common/emu.c b/platform/common/emu.c index bde10e4d..f86526c4 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -1347,6 +1347,8 @@ void emu_init(void) mkdir_path(path, pos, "srm"); mkdir_path(path, pos, "brm"); + pprof_init(); + make_config_cfg(path); config_readlrom(path); @@ -1373,6 +1375,8 @@ void emu_finish(void) #endif } + pprof_finish(); + PicoExit(); } @@ -1432,6 +1436,8 @@ void emu_loop(void) unsigned int timestamp; int diff, diff_lim; + pprof_start(main); + timestamp = get_ticks(); if (reset_timing) { reset_timing = 0; @@ -1537,7 +1543,7 @@ void emu_loop(void) PicoFrame(); pemu_finalize_frame(fpsbuff, notice_msg); - //plat_video_flip(); + // plat_video_flip(); /* frame limiter */ if (!reset_timing && !(currentConfig.EmuOpt & (EOPT_NO_FRMLIMIT|EOPT_EXT_FRMLIMIT))) @@ -1560,6 +1566,8 @@ void emu_loop(void) plat_video_flip(); pframes_done++; frames_done++; frames_shown++; + + pprof_end(main); } emu_set_fastforward(0); diff --git a/platform/linux/Makefile b/platform/linux/Makefile index 47a8dcc4..3362a738 100644 --- a/platform/linux/Makefile +++ b/platform/linux/Makefile @@ -88,6 +88,9 @@ PicoDrive : $(OBJS) @echo ">>>" $@ $(CC) $(CFLAGS) $^ $(LDFLAGS) -Wl,-Map=PicoDrive.map -o $@ +pprof: pprof.c + $(CROSS)gcc -O2 -ggdb -DPPROF -DPPROF_TOOL -I../../ -I. $^ -o $@ -lrt + %.o : %.asm @echo ">>>" $< nasm -f elf $< -o $@ diff --git a/platform/linux/pprof.c b/platform/linux/pprof.c new file mode 100644 index 00000000..e1ecd1fd --- /dev/null +++ b/platform/linux/pprof.c @@ -0,0 +1,126 @@ +#include +#include +#include +#include +#include +#include + +#include + +struct pp_counters *pp_counters; +static int shmemid; + +void pprof_init(void) +{ + int this_is_new_shmem = 1; + key_t shmemkey; + void *shmem; + +#ifndef PPROF_TOOL + unsigned int tmp = pprof_get_one(); + printf("pprof: measured diff is %u\n", pprof_get_one() - tmp); +#endif + + shmemkey = ftok(".", 0x02ABC32E); + if (shmemkey == -1) + { + perror("pprof: ftok failed"); + return; + } + +#ifndef PPROF_TOOL + shmemid = shmget(shmemkey, sizeof(*pp_counters), + IPC_CREAT | IPC_EXCL | 0644); + if (shmemid == -1) +#endif + { + shmemid = shmget(shmemkey, sizeof(*pp_counters), + 0644); + if (shmemid == -1) + { + perror("pprof: shmget failed"); + return; + } + this_is_new_shmem = 0; + } + + shmem = shmat(shmemid, NULL, 0); + if (shmem == (void *)-1) + { + perror("pprof: shmat failed"); + return; + } + + pp_counters = shmem; + if (this_is_new_shmem) { + memset(pp_counters, 0, sizeof(*pp_counters)); + printf("pprof: pp_counters cleared.\n"); + } +} + +void pprof_finish(void) +{ + shmdt(pp_counters); + shmctl(shmemid, IPC_RMID, NULL); +} + +#ifdef PPROF_TOOL + +#define IT(n) { pp_##n, #n } +static const struct { + enum pprof_points pp; + const char *name; +} pp_tab[] = { + IT(main), + IT(frame), + IT(draw), + IT(sound), + IT(m68k), + IT(z80), + IT(msh2), + IT(ssh2), + IT(dummy), +}; + +int main(int argc, char *argv[]) +{ + unsigned long long old[pp_total_points], new[pp_total_points]; + int base = 0; + int l, i; + + pprof_init(); + if (pp_counters == NULL) + return 1; + + if (argc >= 2) + base = atoi(argv[1]); + + memset(old, 0, sizeof(old)); + for (l = 0; ; l++) + { + if ((l & 0x1f) == 0) { + for (i = 0; i < ARRAY_SIZE(pp_tab); i++) + printf("%6s ", pp_tab[i].name); + printf("\n"); + } + + memcpy(new, pp_counters->counter, sizeof(new)); + for (i = 0; i < ARRAY_SIZE(pp_tab); i++) + { + unsigned long long idiff = new[i] - old[i]; + unsigned long long bdiff = (new[base] - old[base]) | 1; + printf("%6.2f ", (double)idiff * 100.0 / bdiff); + } + printf("\n"); + memcpy(old, new, sizeof(old)); + + if (argc < 3) + break; + usleep(atoi(argv[2])); + } + + return 0; +} + +#endif // PPROF_TOOL + diff --git a/platform/linux/pprof.h b/platform/linux/pprof.h new file mode 100644 index 00000000..88a97e3e --- /dev/null +++ b/platform/linux/pprof.h @@ -0,0 +1,54 @@ +#ifndef __PPROF_H__ +#define __PPROF_H__ + +enum pprof_points { + pp_main, + pp_frame, + pp_draw, + pp_sound, + pp_m68k, + pp_z80, + pp_msh2, + pp_ssh2, + pp_dummy, + pp_total_points +}; + +struct pp_counters +{ + unsigned long long counter[pp_total_points]; +}; + +extern struct pp_counters *pp_counters; + +#ifdef __i386__ +static __attribute__((always_inline)) inline unsigned int pprof_get_one(void) +{ + unsigned long long ret; + __asm__ __volatile__ ("rdtsc" : "=A" (ret)); + return (unsigned int)ret; +} + +#elif defined(__GP2X__) +// XXX: MMSP2 only +extern volatile unsigned long *gp2x_memregl; +#define pprof_get_one() (unsigned int)gp2x_memregl[0x0a00 >> 2] + +#else +#error no timer +#endif + +#define pprof_start(point) { \ + unsigned int pp_start_##point = pprof_get_one() +#define pprof_end(point) \ + pp_counters->counter[pp_##point] += pprof_get_one() - pp_start_##point; \ + } +// subtract for recursive stuff +#define pprof_end_sub(point) \ + pp_counters->counter[pp_##point] -= pprof_get_one() - pp_start_##point; \ + } + +extern void pprof_init(void); +extern void pprof_finish(void); + +#endif // __PPROF_H__ -- 2.39.5