X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=plugins%2Fdfxvideo%2Fgpulib_if.c;h=3a41cd7ccc965e2af5da5db5d2a9f9cbcee8f13e;hp=d7d69a7657e552bb43f34f4273d9fb488d700838;hb=HEAD;hpb=04bd10b132d06eff2a803125dc8da640be2454db diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index d7d69a76..af35f3cb 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -16,6 +16,11 @@ #include #include #include "../gpulib/gpu.h" +#include "../../include/arm_features.h" + +#if defined(__GNUC__) && (__GNUC__ >= 6 || (defined(__clang_major__) && __clang_major__ >= 10)) +#pragma GCC diagnostic ignored "-Wmisleading-indentation" +#endif #define u32 uint32_t @@ -37,8 +42,8 @@ // byteswappings -#define SWAP16(x) ({ uint16_t y=(x); (((y)>>8 & 0xff) | ((y)<<8 & 0xff00)); }) -#define SWAP32(x) ({ uint32_t y=(x); (((y)>>24 & 0xfful) | ((y)>>8 & 0xff00ul) | ((y)<<8 & 0xff0000ul) | ((y)<<24 & 0xff000000ul)); }) +#define SWAP16(x) __builtin_bswap16(x) +#define SWAP32(x) __builtin_bswap32(x) #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ @@ -68,16 +73,15 @@ #endif -#define GETLEs16(X) ((int16_t)GETLE16((uint16_t *)X)) -#define GETLEs32(X) ((int16_t)GETLE32((uint16_t *)X)) +#define GETLEs16(X) ((int16_t)GETLE16((uint16_t *)(X))) -#define GETLE16(X) LE2HOST16(*(uint16_t *)X) -#define GETLE32_(X) LE2HOST32(*(uint32_t *)X) -#define GETLE16D(X) ({uint32_t val = GETLE32(X); (val<<16 | val >> 16);}) -#define PUTLE16(X, Y) do{*((uint16_t *)X)=HOST2LE16((uint16_t)Y);}while(0) -#define PUTLE32_(X, Y) do{*((uint32_t *)X)=HOST2LE16((uint32_t)Y);}while(0) -#ifdef __arm__ -#define GETLE32(X) (*(uint16_t *)(X)|(((uint16_t *)(X))[1]<<16)) +#define GETLE16(X) LE2HOST16(*(uint16_t *)(X)) +#define GETLE32_(X) LE2HOST32(*(uint32_t *)(X)) +#define PUTLE16(X, Y) do{*((uint16_t *)(X))=HOST2LE16((uint16_t)(Y));}while(0) +#define PUTLE32_(X, Y) do{*((uint32_t *)(X))=HOST2LE32((uint32_t)(Y));}while(0) +#if defined(__arm__) && !defined(HAVE_ARMV6) +// for (very) old ARMs with no unaligned loads? +#define GETLE32(X) (*(uint16_t *)(X)|((uint32_t)((uint16_t *)(X))[1]<<16)) #define PUTLE32(X, Y) do{uint16_t *p_=(uint16_t *)(X);uint32_t y_=Y;p_[0]=y_;p_[1]=y_>>16;}while(0) #else #define GETLE32 GETLE32_ @@ -233,12 +237,8 @@ extern int32_t drawH; PSXDisplay_t PSXDisplay; unsigned char *psxVub; -signed char *psxVsb; unsigned short *psxVuw; unsigned short *psxVuw_eom; -signed short *psxVsw; -uint32_t *psxVul; -int32_t *psxVsl; long lGPUstatusRet; uint32_t lGPUInfoVals[16]; @@ -266,13 +266,7 @@ long lLowerpart; static void set_vram(void *vram) { psxVub=vram; - - psxVsb=(signed char *)psxVub; // different ways of accessing PSX VRAM - psxVsw=(signed short *)psxVub; - psxVsl=(int32_t *)psxVub; psxVuw=(unsigned short *)psxVub; - psxVul=(uint32_t *)psxVub; - psxVuw_eom=psxVuw+1024*512; // pre-calc of end of vram } @@ -305,17 +299,25 @@ void renderer_notify_res_change(void) { } +void renderer_notify_scanout_change(int x, int y) +{ +} + +#include "../gpulib/gpu_timing.h" extern const unsigned char cmd_lengths[256]; -int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) +int do_cmd_list(uint32_t *list, int list_len, + int *cycles_sum_out, int *cycles_last, int *last_cmd) { + int cpu_cycles_sum = 0, cpu_cycles = *cycles_last; unsigned int cmd = 0, len; - unsigned int *list_start = list; - unsigned int *list_end = list + list_len; + uint32_t *list_start = list; + uint32_t *list_end = list + list_len; for (; list < list_end; list += 1 + len) { - cmd = *list >> 24; + short *slist = (void *)list; + cmd = GETLE32(list) >> 24; len = cmd_lengths[cmd]; if (list + 1 + len > list_end) { cmd = -1; @@ -323,10 +325,10 @@ int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) } #ifndef TEST - if (cmd == 0xa0 || cmd == 0xc0) + if (0x80 <= cmd && cmd < 0xe0) break; // image i/o, forward to upper layer else if ((cmd & 0xf8) == 0xe0) - gpu.ex_regs[cmd & 7] = list[0]; + gpu.ex_regs[cmd & 7] = GETLE32(list); #endif primTableJ[cmd]((void *)list); @@ -340,12 +342,14 @@ int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) while(1) { + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); + if(list_position >= list_end) { cmd = -1; goto breakloop; } - if((*list_position & 0xf000f000) == 0x50005000) + if((*list_position & HOST2LE32(0xf000f000)) == HOST2LE32(0x50005000)) break; list_position++; @@ -363,12 +367,14 @@ int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) while(1) { + gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); + if(list_position >= list_end) { cmd = -1; goto breakloop; } - if((*list_position & 0xf000f000) == 0x50005000) + if((*list_position & HOST2LE32(0xf000f000)) == HOST2LE32(0x50005000)) break; list_position += 2; @@ -382,15 +388,43 @@ int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) #ifdef TEST case 0xA0: // sys -> vid { - short *slist = (void *)list; - u32 load_width = slist[4]; - u32 load_height = slist[5]; + u32 load_width = LE2HOST16(slist[4]); + u32 load_height = LE2HOST16(slist[5]); u32 load_size = load_width * load_height; len += load_size / 2; break; } #endif + + // timing + case 0x02: + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_fill(LE2HOST16(slist[4]) & 0x3ff, LE2HOST16(slist[5]) & 0x1ff)); + break; + case 0x20 ... 0x23: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base()); break; + case 0x24 ... 0x27: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t()); break; + case 0x28 ... 0x2B: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base()); break; + case 0x2C ... 0x2F: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t()); break; + case 0x30 ... 0x33: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g()); break; + case 0x34 ... 0x37: gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt()); break; + case 0x38 ... 0x3B: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g()); break; + case 0x3C ... 0x3F: gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt()); break; + case 0x40 ... 0x47: gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break; + case 0x50 ... 0x57: gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0)); break; + case 0x60 ... 0x63: + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_sprite(LE2HOST16(slist[4]) & 0x3ff, LE2HOST16(slist[5]) & 0x1ff)); + break; + case 0x64 ... 0x67: + gput_sum(cpu_cycles_sum, cpu_cycles, + gput_sprite(LE2HOST16(slist[6]) & 0x3ff, LE2HOST16(slist[7]) & 0x1ff)); + break; + case 0x68 ... 0x6B: gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1)); break; + case 0x70 ... 0x73: + case 0x74 ... 0x77: gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(8, 8)); break; + case 0x78 ... 0x7B: + case 0x7C ... 0x7F: gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(16, 16)); break; } } @@ -398,12 +432,22 @@ breakloop: gpu.ex_regs[1] &= ~0x1ff; gpu.ex_regs[1] |= lGPUstatusRet & 0x1ff; + *cycles_sum_out += cpu_cycles_sum; + *cycles_last = cpu_cycles; *last_cmd = cmd; return list - list_start; } -void renderer_sync_ecmds(uint32_t *ecmds) +void renderer_sync_ecmds(uint32_t *ecmds_) { +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + // the funcs below expect LE + uint32_t i, ecmds[8]; + for (i = 1; i <= 6; i++) + ecmds[i] = HTOLE32(ecmds_[i]); +#else + uint32_t *ecmds = ecmds_; +#endif cmdTexturePage((unsigned char *)&ecmds[1]); cmdTextureWindow((unsigned char *)&ecmds[2]); cmdDrawAreaStart((unsigned char *)&ecmds[3]); @@ -412,7 +456,7 @@ void renderer_sync_ecmds(uint32_t *ecmds) cmdSTP((unsigned char *)&ecmds[6]); } -void renderer_update_caches(int x, int y, int w, int h) +void renderer_update_caches(int x, int y, int w, int h, int state_changed) { } @@ -434,3 +478,5 @@ void renderer_set_config(const struct rearmed_cbs *cbs) cbs->pl_set_gpu_caps(0); set_vram(gpu.vram); } + +// vim:ts=2:shiftwidth=2:expandtab