From: notaz Date: Mon, 24 Sep 2007 21:06:34 +0000 (+0000) Subject: giz wip (acc 16bit faster than 8!) X-Git-Tag: v1.85~656 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=499a0be394af5257bbec4999d552be2ac0ec75b7;p=picodrive.git giz wip (acc 16bit faster than 8!) git-svn-id: file:///home/notaz/opt/svn/PicoDrive@260 be3aeb3a-fb24-0410-a615-afba39da0efa --- diff --git a/Pico/Draw.c b/Pico/Draw.c index ff2a48cc..20a18de1 100644 --- a/Pico/Draw.c +++ b/Pico/Draw.c @@ -1375,5 +1375,8 @@ void PicoDrawSetColorFormat(int which) case 0: FinalizeLine = FinalizeLineBGR444; break; default:FinalizeLine = NULL; break; } +#if OVERRIDE_HIGHCOL + if (which) HighCol=DefHighCol; +#endif } diff --git a/Pico/Draw.s b/Pico/Draw.s index 6338aabd..f00f69f9 100644 --- a/Pico/Draw.s +++ b/Pico/Draw.s @@ -1647,8 +1647,14 @@ FinalizeLineRGB555: addeq r0, r0, #32*2 .fl_no32colRGB555: -.fl_loopRGB555: +.if UNALIGNED_DRAWLINEDEST + @ this is basically for Gizmondo, which has unaligned odd lines in the framebuffer + tst r0, #2 + bne .fl_RGB555u +.endif + +.fl_loopRGB555: ldr r12, [r1], #4 ldr r7, [r1], #4 @@ -1670,12 +1676,12 @@ FinalizeLineRGB555: ldrh r6, [r3, r6] and r12,lr, r7, lsr #15 ldrh r12,[r3, r12] + and r7, lr, r7, lsr #23 + ldrh r7, [r3, r7] orr r8, r8, r6, lsl #16 - and r6, lr, r7, lsr #23 - ldrh r6, [r3, r6] @ 1 cycle interlock here (r6) subs r2, r2, #1 - orr r12,r12, r6, lsl #16 + orr r12,r12, r7, lsl #16 stmia r0!, {r4,r5,r8,r12} bne .fl_loopRGB555 @@ -1689,6 +1695,11 @@ FinalizeLineRGB555: mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 orr r9, r9, #0x00e7 +.if UNALIGNED_DRAWLINEDEST + tst r0, #2 + bne .fl_32scale_RGB555u +.endif + .fl_loop32scale_RGB555: ldr r12, [r1], #4 ldr r7, [r1], #4 @@ -1745,6 +1756,132 @@ FinalizeLineRGB555: ldmfd sp!, {r4-r8,lr} bx lr +.if UNALIGNED_DRAWLINEDEST + @ unaligned versions of loops + +.fl_RGB555u: + ldr r12, [r1], #4 + ldr r7, [r1], #4 + + and r4, lr, r12, lsl #1 + ldrh r4, [r3, r4] + and r5, lr, r12, lsr #7 + ldrh r5, [r3, r5] + strh r4, [r0], #2 + b .fl_loopRGB555u_enter + +.fl_loopRGB555u: + ldr r12, [r1], #4 + ldr r7, [r1], #4 + + and r4, lr, r12, lsl #1 + ldrh r4, [r3, r4] + and r5, lr, r12, lsr #7 + ldrh r5, [r3, r5] + + orr r4, r8, r4, lsl #16 + str r4, [r0], #4 + +.fl_loopRGB555u_enter: + and r6, lr, r12, lsr #15 + ldrh r6, [r3, r6] + and r8, lr, r12, lsr #23 + ldrh r8, [r3, r8] + orr r4, r5, r6, lsl #16 + + and r5, lr, r7, lsl #1 + ldrh r5, [r3, r5] + and r6, lr, r7, lsr #7 + ldrh r6, [r3, r6] + orr r5, r8, r5, lsl #16 + + and r12,lr, r7, lsr #15 + ldrh r12,[r3, r12] + and r8, lr, r7, lsr #23 + ldrh r8, [r3, r8] + orr r6, r6, r12,lsl #16 + + subs r2, r2, #1 + stmia r0!, {r4,r5,r6} + bne .fl_loopRGB555u + + strh r8, [r0], #2 + + ldmfd sp!, {r4-r8,lr} + bx lr + + +.fl_32scale_RGB555u: + + @ r9 f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 +.fl_loop32scale_RGB555u: + ldr r12, [r1], #4 + ldr r7, [r1], #4 + + and r4, lr, r12,lsl #1 + ldrh r4, [r3, r4] + and r5, lr, r12,lsr #7 + ldrh r5, [r3, r5] + and r4, r4, r9, lsl #2 +@ orr r4, rx, r4, lsl #16 + str r4, [r0], #4 @ pix_d -1, 0 + +.fl_loop32scale_RGB555u_enter: + and r4, r9, r4, lsr #2 @ r4=1/4 pix_s 0 + + and r5, r5, r9, lsl #2 + sub r6, r5, r5, lsr #2 @ r6 = 3/4 pix_s 1 + add r4, r6, r4, lsr #2 @ r4=(1/4 pix_s 0) + (3/4 pix_s 1) + orr r4, r4, r5, lsl #15 + + and r6, lr, r12,lsr #15 + ldrh r6, [r3, r6] + and r12,lr, r12,lsr #23 + ldrh r12,[r3, r12] + and r6, r6, r9, lsl #2 + add r4, r4, r6, lsl #15 @ pix_d 1, 2 + +@@ TODO... + + mov r5, r5, lsr #1 + sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2 + orr r5, r5, r6, lsl #16 + + and r6, lr, r7, lsl #1 + ldrh r6, [r3, r6] + and r12,r12,r9, lsl #2 + add r5, r5, r12,lsl #14 @ pix_d 2, 3 + and r6, r6, r9, lsl #2 + orr r6, r12,r6, lsl #16 @ pix_d 4, 5 + + and r12,lr, r7, lsr #7 + ldrh r12,[r3, r12] + and r10,lr, r7, lsr #15 + ldrh r10,[r3, r10] + and r12,r12,r9, lsl #2 + sub r8, r12,r12,lsr #2 @ r8 = 3/4 pix_s 1 + add r8, r8, r6, lsr #18 + and r7, lr, r7, lsr #23 + ldrh r7, [r3, r7] + and r10,r10,r9, lsl #2 + orr r8, r8, r10,lsl #15 + add r8, r8, r12,lsl #15 @ pix_d 6, 7 + sub r10,r10,r10,lsr #2 @ r10= 3/4 pix_s 2 + and r7, r7, r9, lsl #2 + add r10,r10,r7, lsr #2 @ += 1/4 pix_s 3 + orr r10,r10,r7, lsl #16 @ pix_d 8, 9 + + subs r2, r2, #1 + + stmia r0!, {r4,r5,r6,r8,r10} + bne .fl_loop32scale_RGB555u + + ldmfd sp!, {r9,r10} + ldmfd sp!, {r4-r8,lr} + bx lr + +.endif @ UNALIGNED_DRAWLINEDEST + @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ diff --git a/platform/gizmondo/Makefile b/platform/gizmondo/Makefile index 0bae95f1..01146cdf 100644 --- a/platform/gizmondo/Makefile +++ b/platform/gizmondo/Makefile @@ -106,7 +106,8 @@ all: PicoDrive.exe PicoDrive.exe : $(OBJS) @echo ">>>" $@ - $(GCC) -o $@ -static $(COPT) $^ -lm -lpng -Lkgsdk/ -lKGSDK -Wl,-Map=PicoDrive.map + $(GCC) -o $@ -static $(COPT) $^ -lm -lpng -Lkgsdk/ -lKGSDK -Wl,-Map=PicoDrive.map 2>&1 | \ + grep -v ".idata$$4" # | grep -v "supports interworking, whereas" ifeq ($(DEBUG),) $(STRIP) $@ endif diff --git a/platform/gizmondo/asm_utils.s b/platform/gizmondo/asm_utils.s index f0cb4b4c..ec2d55bd 100644 --- a/platform/gizmondo/asm_utils.s +++ b/platform/gizmondo/asm_utils.s @@ -65,7 +65,7 @@ vcloop_40_aligned: mul r4, r5, r6 sub r1, r1, r4 -vcloop_40_unaligned: +vcloop_40_unaligned_outer: ldr r12, [r1], #4 ldr r7, [r1], #4 @@ -73,42 +73,55 @@ vcloop_40_unaligned: ldrh r4, [r2, r4] and r5, lr, r12, lsr #7 ldrh r5, [r2, r5] - strh r4, [r0], #2 + strh r4, [r0], #2 + b vcloop_40_unaligned_enter + +vcloop_40_unaligned: + ldr r12, [r1], #4 + ldr r7, [r1], #4 + + and r6, lr, r12, lsl #1 + ldrh r6, [r2, r6] + and r5, lr, r12, lsr #7 + ldrh r5, [r2, r5] + orr r4, r4, r6, lsl #16 + str r4, [r0], #4 + +vcloop_40_unaligned_enter: and r6, lr, r12, lsr #15 ldrh r6, [r2, r6] and r4, lr, r12, lsr #23 ldrh r4, [r2, r4] - orr r5, r5, r6, lsl #16 + orr r5, r5, r6, lsl #16 and r8, lr, r7, lsl #1 ldrh r8, [r2, r8] - and r6, lr, r7, lsr #7 ldrh r6, [r2, r6] - orr r8, r4, r8, lsl #16 + orr r8, r4, r8, lsl #16 and r12,lr, r7, lsr #15 ldrh r12,[r2, r12] and r4, lr, r7, lsr #23 ldrh r4, [r2, r4] - orr r12,r6, r12,lsl #16 + orr r12,r6, r12,lsl #16 subs r3, r3, #1<<24 stmia r0!, {r5,r8,r12} - strh r4, [r0], #2 bpl vcloop_40_unaligned + strh r4, [r0], #2 + add r1, r1, #336 @ skip a line and 1 col add r0, r0, #320*2+2*2 add r3, r3, #(320/8)<<24 sub r3, r3, #1 tst r3, #0xff - bne vcloop_40_unaligned + bne vcloop_40_unaligned_outer ldmfd sp!, {r4-r9,lr} bx lr - diff --git a/platform/gizmondo/emu.c b/platform/gizmondo/emu.c index 24bb8b07..0828f3c1 100644 --- a/platform/gizmondo/emu.c +++ b/platform/gizmondo/emu.c @@ -160,17 +160,22 @@ static int EmuScan8(unsigned int num, void *sdata) static void osd_text(int x, int y, const char *text) { - int len = strlen(text) * 8; - int *p, i, h; - len = (len+1) >> 1; + int len = strlen(text) * 8 / 2; + int *p, h; for (h = 0; h < 8; h++) { p = (int *) ((unsigned short *) giz_screen+x+321*(y+h)); p = (int *) ((int)p & ~3); // align - for (i = len; i; i--, p++) *p = 0; + memset32(p, 0, len); } emu_textOut16(x, y, text); } +/* +void log1(void *p1, void *p2) +{ + lprintf("%p %p %p\n", p1, p2, DrawLineDest); +} +*/ short localPal[0x100]; static void (*vidCpy8to16)(void *dest, void *src, short *pal, int lines) = NULL; @@ -207,9 +212,7 @@ static void blit(const char *fps, const char *notice) } */ } // TODO... - //lprintf("vidCpy8to16 %p %p\n", (unsigned short *)giz_screen+321*8, PicoDraw2FB+328*8); vidCpy8to16((unsigned short *)giz_screen+321*8, PicoDraw2FB+328*8, localPal, 224); - //lprintf("after vidCpy8to16\n"); } if (notice || (emu_opt & 2)) { @@ -220,21 +223,6 @@ static void blit(const char *fps, const char *notice) // if ((emu_opt & 0x400) && (PicoMCD & 1)) // cd_leds(); - //gp2x_video_wait_vsync(); - - if (!(PicoOpt&0x10)) { - if (Pico.video.reg[1] & 8) { - if (currentConfig.EmuOpt&0x80) - DrawLineDest = (unsigned short *) giz_screen; - else - HighCol = gfx_buffer; - } else { - if (currentConfig.EmuOpt&0x80) - DrawLineDest = (unsigned short *) giz_screen + 320*8; - else - HighCol = gfx_buffer + 328*8; - } - } } // clears whole screen or just the notice area (in all buffers) @@ -243,21 +231,22 @@ static void clearArea(int full) if (giz_screen == NULL) giz_screen = Framework2D_LockBuffer(); if (full) memset32(giz_screen, 0, 320*240*2/4); - else memset32((int *)((char *)giz_screen + 320*232*2), 0, 320*8*2/4); + else memset32((int *)((char *)giz_screen + 321*232*2), 0, 321*8*2/4); } static void vidResetMode(void) { - void *screen; + giz_screen = Framework2D_LockBuffer(); + if (PicoOpt&0x10) { } else if (currentConfig.EmuOpt&0x80) { PicoDrawSetColorFormat(1); PicoScan = EmuScan16; } else { - PicoDrawSetColorFormat(0); + PicoDrawSetColorFormat(-1); PicoScan = EmuScan8; } - if ((PicoOpt&0x10)||!(currentConfig.EmuOpt&0x80)) { + if ((PicoOpt&0x10) || !(currentConfig.EmuOpt&0x80)) { // setup pal for 8-bit modes localPal[0xc0] = 0x0600; localPal[0xd0] = 0xc000; @@ -265,8 +254,8 @@ static void vidResetMode(void) localPal[0xf0] = 0xffff; } Pico.m.dirtyPal = 1; - screen = Framework2D_LockBuffer(); - memset32(screen, 0, 320*240*2/4); + + memset32(giz_screen, 0, 321*240*2/4); Framework2D_UnlockBuffer(); giz_screen = NULL; } @@ -520,10 +509,16 @@ void emu_Loop(void) updateKeys(); - if (giz_screen == NULL) + if (giz_screen == NULL && (currentConfig.EmuOpt&0x80)) giz_screen = Framework2D_LockBuffer(); + if (!(PicoOpt&0x10)) + PicoScan((unsigned) -1, NULL); PicoFrame(); + + if (giz_screen == NULL) + giz_screen = Framework2D_LockBuffer(); + blit(fpsbuff, notice); if (giz_screen != NULL) { diff --git a/platform/gizmondo/port_config.s b/platform/gizmondo/port_config.s index 99cd249a..4a2366e1 100644 --- a/platform/gizmondo/port_config.s +++ b/platform/gizmondo/port_config.s @@ -3,11 +3,12 @@ @ .equiv START_ROW, 1 @ .equiv END_ROW, 27 @ one row means 8 pixels. If above example was used, (27-1)*8=208 lines would be rendered. -.equiv START_ROW, 0 -.equiv END_ROW, 28 +.equiv START_ROW, 0 +.equiv END_ROW, 28 -.equiv OVERRIDE_HIGHCOL, 1 +.equiv OVERRIDE_HIGHCOL, 1 +.equiv UNALIGNED_DRAWLINEDEST, 1 @ this should be set to one only for GP2X port -.equiv EXTERNAL_YM2612, 0 +.equiv EXTERNAL_YM2612, 0 diff --git a/platform/gp2x/port_config.s b/platform/gp2x/port_config.s index 87c3e527..5a410364 100644 --- a/platform/gp2x/port_config.s +++ b/platform/gp2x/port_config.s @@ -3,11 +3,12 @@ @ .equiv START_ROW, 1 @ .equiv END_ROW, 27 @ one row means 8 pixels. If above example was used, (27-1)*8=208 lines would be rendered. -.equiv START_ROW, 0 -.equiv END_ROW, 28 +.equiv START_ROW, 0 +.equiv END_ROW, 28 -.equiv OVERRIDE_HIGHCOL, 0 +.equiv OVERRIDE_HIGHCOL, 0 +.equiv UNALIGNED_DRAWLINEDEST, 0 @ this should be set to one only for GP2X port -.equiv EXTERNAL_YM2612, 1 +.equiv EXTERNAL_YM2612, 1