| 1 | /* |
| 2 | * Copyright (C) 2011 Gilead Kutnick "Exophase" <exophase@gmail.com> |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public License as |
| 6 | * published by the Free Software Foundation; either version 2 of |
| 7 | * the License, or (at your option) any later version. |
| 8 | * |
| 9 | * This program is distributed in the hope that it will be useful, |
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 12 | * General Public License for more details. |
| 13 | */ |
| 14 | |
| 15 | #include <stdio.h> |
| 16 | #include <stdlib.h> |
| 17 | |
| 18 | #include "SDL.h" |
| 19 | #include "common.h" |
| 20 | |
| 21 | extern u32 span_pixels; |
| 22 | extern u32 span_pixel_blocks; |
| 23 | extern u32 span_pixel_blocks_unaligned; |
| 24 | extern u32 spans; |
| 25 | extern u32 triangles; |
| 26 | extern u32 sprites; |
| 27 | extern u32 sprites_4bpp; |
| 28 | extern u32 sprites_8bpp; |
| 29 | extern u32 sprites_16bpp; |
| 30 | extern u32 sprites_untextured; |
| 31 | extern u32 sprite_blocks; |
| 32 | extern u32 lines; |
| 33 | extern u32 texels_4bpp; |
| 34 | extern u32 texels_8bpp; |
| 35 | extern u32 texels_16bpp; |
| 36 | extern u32 texel_blocks_4bpp; |
| 37 | extern u32 texel_blocks_8bpp; |
| 38 | extern u32 texel_blocks_16bpp; |
| 39 | extern u32 texel_blocks_untextured; |
| 40 | extern u32 blend_blocks; |
| 41 | extern u32 untextured_pixels; |
| 42 | extern u32 blend_pixels; |
| 43 | extern u32 transparent_pixels; |
| 44 | extern u32 render_buffer_flushes; |
| 45 | extern u32 state_changes; |
| 46 | extern u32 trivial_rejects; |
| 47 | extern u32 left_split_triangles; |
| 48 | extern u32 flat_triangles; |
| 49 | extern u32 clipped_triangles; |
| 50 | extern u32 zero_block_spans; |
| 51 | extern u32 texture_cache_loads; |
| 52 | extern u32 false_modulated_triangles; |
| 53 | extern u32 false_modulated_sprites; |
| 54 | |
| 55 | static u32 mismatches; |
| 56 | |
| 57 | typedef struct |
| 58 | { |
| 59 | u16 vram[1024 * 512]; |
| 60 | u32 gpu_register[15]; |
| 61 | u32 status; |
| 62 | } gpu_dump_struct; |
| 63 | |
| 64 | static gpu_dump_struct state; |
| 65 | |
| 66 | psx_gpu_struct __attribute__((aligned(256))) _psx_gpu; |
| 67 | u16 __attribute__((aligned(256))) _vram[1024 * 512]; |
| 68 | |
| 69 | #define percent_of(numerator, denominator) \ |
| 70 | ((((double)(numerator)) / (denominator)) * 100.0) \ |
| 71 | |
| 72 | void clear_stats(void) |
| 73 | { |
| 74 | triangles = 0; |
| 75 | sprites = 0; |
| 76 | sprites_4bpp = 0; |
| 77 | sprites_8bpp = 0; |
| 78 | sprites_16bpp = 0; |
| 79 | sprites_untextured = 0; |
| 80 | sprite_blocks = 0; |
| 81 | lines = 0; |
| 82 | span_pixels = 0; |
| 83 | span_pixel_blocks = 0; |
| 84 | span_pixel_blocks_unaligned = 0; |
| 85 | spans = 0; |
| 86 | texels_4bpp = 0; |
| 87 | texels_8bpp = 0; |
| 88 | texels_16bpp = 0; |
| 89 | texel_blocks_untextured = 0; |
| 90 | texel_blocks_4bpp = 0; |
| 91 | texel_blocks_8bpp = 0; |
| 92 | texel_blocks_16bpp = 0; |
| 93 | blend_blocks = 0; |
| 94 | untextured_pixels = 0; |
| 95 | blend_pixels = 0; |
| 96 | transparent_pixels = 0; |
| 97 | render_buffer_flushes = 0; |
| 98 | state_changes = 0; |
| 99 | trivial_rejects = 0; |
| 100 | left_split_triangles = 0; |
| 101 | flat_triangles = 0; |
| 102 | clipped_triangles = 0; |
| 103 | zero_block_spans = 0; |
| 104 | texture_cache_loads = 0; |
| 105 | false_modulated_triangles = 0; |
| 106 | false_modulated_sprites = 0; |
| 107 | } |
| 108 | |
| 109 | void update_screen(psx_gpu_struct *psx_gpu, SDL_Surface *screen) |
| 110 | { |
| 111 | u32 x, y; |
| 112 | |
| 113 | for(y = 0; y < 512; y++) |
| 114 | { |
| 115 | for(x = 0; x < 1024; x++) |
| 116 | { |
| 117 | u32 pixel = psx_gpu->vram_ptr[(y * 1024) + x]; |
| 118 | ((u32 *)screen->pixels)[(y * 1024) + x] = |
| 119 | ((pixel & 0x1F) << (16 + 3)) | |
| 120 | (((pixel >> 5) & 0x1F) << (8 + 3)) | |
| 121 | (((pixel >> 10) & 0x1F) << 3); |
| 122 | } |
| 123 | } |
| 124 | |
| 125 | SDL_Flip(screen); |
| 126 | } |
| 127 | |
| 128 | #ifdef PANDORA_BUILD |
| 129 | |
| 130 | #include <fcntl.h> |
| 131 | #include <linux/fb.h> |
| 132 | #include <sys/mman.h> |
| 133 | #include <sys/ioctl.h> |
| 134 | |
| 135 | #endif |
| 136 | |
| 137 | int main(int argc, char *argv[]) |
| 138 | { |
| 139 | psx_gpu_struct *psx_gpu = &_psx_gpu; |
| 140 | SDL_Surface *screen; |
| 141 | SDL_Event event; |
| 142 | |
| 143 | u32 *list; |
| 144 | int size; |
| 145 | FILE *state_file; |
| 146 | FILE *list_file; |
| 147 | u32 no_display = 0; |
| 148 | |
| 149 | if((argc != 3) && (argc != 4)) |
| 150 | { |
| 151 | printf("usage:\n%s <state> <list>\n", argv[0]); |
| 152 | return 1; |
| 153 | } |
| 154 | |
| 155 | if((argc == 4) && !strcmp(argv[3], "-n")) |
| 156 | no_display = 1; |
| 157 | |
| 158 | state_file = fopen(argv[1], "rb"); |
| 159 | fread(&state, 1, sizeof(gpu_dump_struct), state_file); |
| 160 | fclose(state_file); |
| 161 | |
| 162 | list_file = fopen(argv[2], "rb"); |
| 163 | |
| 164 | fseek(list_file, 0, SEEK_END); |
| 165 | size = ftell(list_file); |
| 166 | fseek(list_file, 0, SEEK_SET); |
| 167 | //size = 0; |
| 168 | |
| 169 | list = malloc(size); |
| 170 | fread(list, 1, size, list_file); |
| 171 | fclose(list_file); |
| 172 | |
| 173 | if(no_display == 0) |
| 174 | { |
| 175 | SDL_Init(SDL_INIT_EVERYTHING); |
| 176 | screen = SDL_SetVideoMode(1024, 512, 32, 0); |
| 177 | } |
| 178 | |
| 179 | initialize_psx_gpu(psx_gpu, _vram); |
| 180 | |
| 181 | #ifdef PANDORA_BUILD |
| 182 | system("ofbset -fb /dev/fb1 -mem 6291456 -en 0"); |
| 183 | u32 fbdev_handle = open("/dev/fb1", O_RDWR); |
| 184 | psx_gpu->vram_ptr = (mmap((void *)0x50000000, 1024 * 1024 * 2, PROT_READ | PROT_WRITE, |
| 185 | MAP_SHARED | 0xA0000000, fbdev_handle, 0)); |
| 186 | psx_gpu->vram_ptr += 64; |
| 187 | #endif |
| 188 | |
| 189 | |
| 190 | |
| 191 | #ifdef PANDORA_BUILD |
| 192 | //triangle_benchmark(psx_gpu); |
| 193 | //return 0; |
| 194 | #endif |
| 195 | |
| 196 | #ifdef FULL_COMPARE_MODE |
| 197 | psx_gpu->pixel_count_mode = 1; |
| 198 | psx_gpu->pixel_compare_mode = 0; |
| 199 | memcpy(psx_gpu->vram_ptr, state.vram, 1024 * 512 * 2); |
| 200 | //render_block_fill(psx_gpu, 0, 0, 0, 1024, 512); |
| 201 | gpu_parse(psx_gpu, list, size); |
| 202 | |
| 203 | psx_gpu->pixel_count_mode = 0; |
| 204 | psx_gpu->pixel_compare_mode = 1; |
| 205 | memcpy(psx_gpu->compare_vram, state.vram, 1024 * 512 * 2); |
| 206 | memcpy(psx_gpu->vram_ptr, state.vram, 1024 * 512 * 2); |
| 207 | //render_block_fill(psx_gpu, 0, 0, 0, 1024, 512); |
| 208 | clear_stats(); |
| 209 | gpu_parse(psx_gpu, list, size); |
| 210 | flush_render_block_buffer(psx_gpu); |
| 211 | #else |
| 212 | memcpy(psx_gpu->vram_ptr, state.vram, 1024 * 512 * 2); |
| 213 | |
| 214 | psx_gpu->pixel_count_mode = 0; |
| 215 | psx_gpu->pixel_compare_mode = 0; |
| 216 | |
| 217 | clear_stats(); |
| 218 | |
| 219 | #ifdef PANDORA_BUILD |
| 220 | init_counter(); |
| 221 | #endif |
| 222 | |
| 223 | gpu_parse(psx_gpu, list, size); |
| 224 | flush_render_block_buffer(psx_gpu); |
| 225 | |
| 226 | clear_stats(); |
| 227 | |
| 228 | #ifdef PANDORA_BUILD |
| 229 | u32 cycles = get_counter(); |
| 230 | #endif |
| 231 | |
| 232 | gpu_parse(psx_gpu, list, size); |
| 233 | flush_render_block_buffer(psx_gpu); |
| 234 | |
| 235 | printf("%s: ", argv[1]); |
| 236 | #ifdef PANDORA_BUILD |
| 237 | u32 cycles_elapsed = get_counter() - cycles; |
| 238 | |
| 239 | printf("%d\n", cycles_elapsed); |
| 240 | #endif |
| 241 | |
| 242 | #if 1 |
| 243 | u32 i; |
| 244 | |
| 245 | for(i = 0; i < 1024 * 512; i++) |
| 246 | { |
| 247 | if((psx_gpu->vram_ptr[i] & 0x7FFF) != (state.vram[i] & 0x7FFF)) |
| 248 | { |
| 249 | printf("(%d %d %d) vs (%d %d %d) at (%d %d)\n", |
| 250 | psx_gpu->vram_ptr[i] & 0x1F, |
| 251 | (psx_gpu->vram_ptr[i] >> 5) & 0x1F, |
| 252 | (psx_gpu->vram_ptr[i] >> 10) & 0x1F, |
| 253 | state.vram[i] & 0x1F, |
| 254 | (state.vram[i] >> 5) & 0x1F, |
| 255 | (state.vram[i] >> 10) & 0x1F, i % 1024, i / 1024); |
| 256 | |
| 257 | mismatches++; |
| 258 | } |
| 259 | else |
| 260 | { |
| 261 | psx_gpu->vram_ptr[i] = |
| 262 | ((psx_gpu->vram_ptr[i] & 0x1F) / 4) | |
| 263 | ((((psx_gpu->vram_ptr[i] >> 5) & 0x1F) / 4) << 5) | |
| 264 | ((((psx_gpu->vram_ptr[i] >> 10) & 0x1F) / 4) << 10); |
| 265 | } |
| 266 | } |
| 267 | #endif |
| 268 | #endif |
| 269 | |
| 270 | #if 0 |
| 271 | printf("\n"); |
| 272 | printf(" %d pixels, %d pixel blocks (%d unaligned), %d spans\n" |
| 273 | " (%lf pixels per block (%lf unaligned, r %lf), %lf pixels per span),\n" |
| 274 | " %lf blocks per span (%lf per non-zero span), %lf overdraw)\n\n", |
| 275 | span_pixels, span_pixel_blocks, span_pixel_blocks_unaligned, spans, |
| 276 | (double)span_pixels / span_pixel_blocks, |
| 277 | (double)span_pixels / span_pixel_blocks_unaligned, |
| 278 | (double)span_pixel_blocks / span_pixel_blocks_unaligned, |
| 279 | (double)span_pixels / spans, |
| 280 | (double)span_pixel_blocks / spans, |
| 281 | (double)span_pixel_blocks / (spans - zero_block_spans), |
| 282 | (double)span_pixels / |
| 283 | ((psx_gpu->viewport_end_x - psx_gpu->viewport_start_x) * |
| 284 | (psx_gpu->viewport_end_y - psx_gpu->viewport_start_y))); |
| 285 | |
| 286 | printf(" %d triangles (%d false modulated)\n" |
| 287 | " (%d trivial rejects, %lf%% flat, %lf%% left split, %lf%% clipped)\n" |
| 288 | " (%lf pixels per triangle, %lf rows per triangle)\n\n", |
| 289 | triangles, false_modulated_triangles, trivial_rejects, |
| 290 | percent_of(flat_triangles, triangles), |
| 291 | percent_of(left_split_triangles, triangles), |
| 292 | percent_of(clipped_triangles, triangles), |
| 293 | (double)span_pixels / triangles, |
| 294 | (double)spans / triangles); |
| 295 | |
| 296 | printf(" Block data:\n"); |
| 297 | printf(" %7d 4bpp texel blocks (%lf%%)\n", texel_blocks_4bpp, |
| 298 | percent_of(texel_blocks_4bpp, span_pixel_blocks)); |
| 299 | printf(" %7d 8bpp texel blocks (%lf%%)\n", texel_blocks_8bpp, |
| 300 | percent_of(texel_blocks_8bpp, span_pixel_blocks)); |
| 301 | printf(" %7d 16bpp texel blocks (%lf%%)\n", texel_blocks_16bpp, |
| 302 | percent_of(texel_blocks_16bpp, span_pixel_blocks)); |
| 303 | printf(" %7d untextured blocks (%lf%%)\n", texel_blocks_untextured, |
| 304 | percent_of(texel_blocks_untextured, span_pixel_blocks)); |
| 305 | printf(" %7d sprite blocks (%lf%%)\n", sprite_blocks, |
| 306 | percent_of(sprite_blocks, span_pixel_blocks)); |
| 307 | printf(" %7d blended blocks (%lf%%)\n", blend_blocks, |
| 308 | percent_of(blend_blocks, span_pixel_blocks)); |
| 309 | printf("\n"); |
| 310 | printf(" %lf blocks per render buffer flush\n", (double)span_pixel_blocks / |
| 311 | render_buffer_flushes); |
| 312 | printf(" %d zero block spans\n", zero_block_spans); |
| 313 | printf(" %d state changes, %d texture cache loads\n", state_changes, |
| 314 | texture_cache_loads); |
| 315 | if(sprites) |
| 316 | { |
| 317 | printf(" %d sprites\n" |
| 318 | " 4bpp: %lf%%\n" |
| 319 | " 8bpp: %lf%%\n" |
| 320 | " 16bpp: %lf%%\n" |
| 321 | " untextured: %lf%%\n", |
| 322 | sprites, percent_of(sprites_4bpp, sprites), |
| 323 | percent_of(sprites_8bpp, sprites), percent_of(sprites_16bpp, sprites), |
| 324 | percent_of(sprites_untextured, sprites)); |
| 325 | } |
| 326 | printf("\n"); |
| 327 | printf(" %d mismatches\n\n\n", mismatches); |
| 328 | #endif |
| 329 | |
| 330 | fflush(stdout); |
| 331 | |
| 332 | if(no_display == 0) |
| 333 | { |
| 334 | while(1) |
| 335 | { |
| 336 | update_screen(psx_gpu, screen); |
| 337 | |
| 338 | if(SDL_PollEvent(&event)) |
| 339 | { |
| 340 | if((event.type == SDL_QUIT) || |
| 341 | ((event.type == SDL_KEYDOWN) && |
| 342 | (event.key.keysym.sym == SDLK_ESCAPE))) |
| 343 | { |
| 344 | break; |
| 345 | } |
| 346 | } |
| 347 | |
| 348 | SDL_Delay(20); |
| 349 | } |
| 350 | } |
| 351 | |
| 352 | return (mismatches != 0); |
| 353 | } |