From f5eb372f3bff0731642899f6aaec35e9001bd451 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 3 Jun 2007 15:48:13 +0000 Subject: [PATCH] soft scaler improved, menu bgs fixed git-svn-id: file:///home/notaz/opt/svn/fceu@150 be3aeb3a-fb24-0410-a615-afba39da0efa --- drivers/gp2x/asmutils.h | 1 + drivers/gp2x/asmutils.s | 155 ++++++++++++++++++------------------ drivers/gp2x/menu.c | 19 +++-- drivers/gp2x/unix-basedir.h | 0 4 files changed, 90 insertions(+), 85 deletions(-) delete mode 100644 drivers/gp2x/unix-basedir.h diff --git a/drivers/gp2x/asmutils.h b/drivers/gp2x/asmutils.h index aa1c87f..090684e 100644 --- a/drivers/gp2x/asmutils.h +++ b/drivers/gp2x/asmutils.h @@ -5,4 +5,5 @@ void block_andor(void *src, size_t n, int andpat, int orpat); void memset32(int *dest, int c, int count); void spend_cycles(int c); // utility void soft_scale(void *dst, unsigned short *pal, int line_offs, int lines); +void convert2RGB555(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); diff --git a/drivers/gp2x/asmutils.s b/drivers/gp2x/asmutils.s index a91cfaf..4b9e699 100644 --- a/drivers/gp2x/asmutils.s +++ b/drivers/gp2x/asmutils.s @@ -1,5 +1,8 @@ @ vim:filetype=armasm +@ Assembly optimized routines for gpfce - FCE Ultra port +@ (c) Copyright 2007, Grazvydas "notaz" Ignotas + @ test .global flushcache @ beginning_addr, end_addr, flags @@ -135,9 +138,11 @@ soft_scale: add r0, r4, r12,lsl #1 @ r0 = dst_end add r1, r5, r12 @ r1 = src_end + mov r2, r11 + soft_scale_loop: sub r1, r1, #64 @ skip borders - mov r2, #256/8 + orr r2, r2, #(256/8-1)<<24 soft_scale_loop_line: ldr r12, [r1, #-8]! @@ -147,110 +152,104 @@ soft_scale_loop_line: ldrh r4, [r3, r4] and r5, lr, r12,lsr #7 ldrh r5, [r3, r5] - and r4, r4, r9, lsl #2 - orr r4, r4, r4, lsl #14 @ r4[31:16] = 1/4 pix_s 0 - and r5, r5, r9, lsl #2 - sub r6, r5, r5, lsr #2 @ r6 = 3/4 pix_s 1 + and r11,r4, r9, lsl #2 + orr r4, r4, r11,lsl #14 @ r4[31:16] = 1/4 pix_s 0 + and r11,r5, r9, lsl #2 + sub r6, r5, r11,lsr #2 @ r6 = 3/4 pix_s 1 add r4, r4, r6, lsl #16 @ pix_d 0, 1 and r6, lr, r12,lsr #15 ldrh r6, [r3, r6] and r12,lr, r12,lsr #23 ldrh r12,[r3, r12] - and r6, r6, r9, lsl #2 - add r5, r5, r6 + bic r11,r6, #0x0820 + bic r5, r5, #0x0820 + add r5, r5, r11 mov r5, r5, lsr #1 - sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2 + and r11,r6, r9, lsl #2 + sub r6, r6, r11,lsr #2 @ r6 = 3/4 pix_s 2 orr r5, r5, r6, lsl #16 and r6, lr, r7, lsl #1 ldrh r6, [r3, r6] - and r12,r12,r9, lsl #2 - add r5, r5, r12,lsl #14 @ pix_d 2, 3 - and r6, r6, r9, lsl #2 + and r11,r12,r9, lsl #2 + add r5, r5, r11,lsl #14 @ pix_d 2, 3 orr r6, r12,r6, lsl #16 @ pix_d 4, 5 and r12,lr, r7, lsr #7 ldrh r12,[r3, r12] and r10,lr, r7, lsr #15 ldrh r10,[r3, r10] - and r12,r12,r9, lsl #2 - sub r8, r12,r12,lsr #2 @ r8 = 3/4 pix_s 1 - add r8, r8, r6, lsr #18 + and r11,r12,r9, lsl #2 + sub r8, r12,r11,lsr #2 @ r8 = 3/4 pix_s 1 + and r11,r6, r9, lsl #18 + add r8, r8, r11,lsr #18 + mov r8, r8, lsl #16 and r7, lr, r7, lsr #23 ldrh r7, [r3, r7] - and r10,r10,r9, lsl #2 - orr r8, r8, r10,lsl #15 - add r8, r8, r12,lsl #15 @ pix_d 6, 7 - sub r10,r10,r10,lsr #2 @ r10= 3/4 pix_s 2 - and r7, r7, r9, lsl #2 - add r10,r10,r7, lsr #2 @ += 1/4 pix_s 3 + bic r11,r10,#0x0820 + bic r12,r12,#0x0820 + add r12,r12,r11 + add r8, r8, r12,lsr #1 @ pix_d 6, 7 + mov r8, r8, ror #16 + and r11,r10,r9, lsl #2 + sub r10,r10,r11,lsr #2 @ r10= 3/4 pix_s 2 + and r11,r7, r9, lsl #2 + add r10,r10,r11,lsr #2 @ += 1/4 pix_s 3 orr r10,r10,r7, lsl #16 @ pix_d 8, 9 - subs r2, r2, #1 + subs r2, r2, #1<<24 stmdb r0!, {r4,r5,r6,r8,r10} - bne soft_scale_loop_line + bpl soft_scale_loop_line - subs r11,r11,#1 + add r2, r2, #1<<24 + subs r2, r2, #1 bne soft_scale_loop ldmfd sp!,{r4-r11,lr} bx lr -/* buggy and slow, probably because function call overhead -@ renderer helper, based on bitbank's method -.global draw8pix @ uint8 *P, uint8 *C, uint8 *PALRAM @ dest, src, pal - -draw8pix: - stmfd sp!, {r4,r5} - - ldrb r3, [r1] @ get bit 0 pixels - mov r12,#1 - orr r12,r12,r12,lsl #8 - orr r12,r12,r12,lsl #16 - ldrb r1, [r1, #8] @ get bit 1 pixels - orr r3, r3, r3, lsl #9 @ shift them over 1 byte + 1 bit - orr r3, r3, r3, lsl #18 @ now 4 pixels take up 4 bytes - and r4, r12,r3, lsr #7 @ mask off the upper nibble pixels we want - and r5, r12,r3, lsr #3 @ mask off the lower nibble pixels we want - ldr r2, [r2] - - orr r1, r1, r1, lsl #9 @ process the bit 1 pixels - orr r1, r1, r1, lsl #18 - and r3, r12,r1, lsr #7 @ mask off the upper nibble pixels we want - and r1, r12,r1, lsr #3 @ mask off the lower nibble - orr r4, r4, r3, lsl #1 - orr r5, r5, r1, lsl #5 - - @ can this be avoided? - mov r4, r4, lsl #3 @ *8 - mov r3, r2, ror r4 - strb r3, [r0], #1 - mov r4, r4, lsr #8 - mov r3, r2, ror r4 - strb r3, [r0], #1 - mov r4, r4, lsr #8 - mov r3, r2, ror r4 - strb r3, [r0], #1 - mov r4, r4, lsr #8 - mov r3, r2, ror r4 - strb r3, [r0], #1 - - mov r5, r5, lsl #3 @ *8 - mov r3, r2, ror r5 - strb r3, [r0], #1 - mov r5, r5, lsr #8 - mov r3, r2, ror r5 - strb r3, [r0], #1 - mov r5, r5, lsr #8 - mov r3, r2, ror r5 - strb r3, [r0], #1 - mov r5, r5, lsr #8 - mov r3, r2, ror r5 - strb r3, [r0], #1 - - ldmfd sp!, {r4,r5} - bx lr -*/ +@ void convert2RGB555(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); + +.global convert2RGB555 + +convert2RGB555: + stmfd sp!,{r4-r8,lr} + mov lr, #0xff + mov lr, lr, lsl #1 + + mov r3, r3, lsr #3 + +convert2RGB555_loop: + ldmia r1!,{r4,r5} + + and r6, lr, r4, lsl #1 + ldrh r6, [r2, r6] + and r7, lr, r4, lsr #7 + ldrh r7, [r2, r7] + and r8, lr, r4, lsr #15 + ldrh r8, [r2, r8] + and r4, lr, r4, lsr #23 + ldrh r4, [r2, r4] + + orr r6, r6, r7, lsl #16 + and r12,lr, r5, lsl #1 + ldrh r12, [r2, r12] + orr r7, r8, r4, lsl #16 + and r8, lr, r5, lsr #7 + ldrh r8, [r2, r8] + and r4, lr, r5, lsr #15 + ldrh r4, [r2, r4] + and r5, lr, r5, lsr #23 + ldrh r5, [r2, r5] + orr r8, r12,r8, lsl #16 + orr r12,r4, r5, lsl #16 + + stmia r0!,{r6,r7,r8,r12} + subs r3, r3, #1 + bne convert2RGB555_loop + + ldmfd sp!,{r4-r8,lr} + bx lr diff --git a/drivers/gp2x/menu.c b/drivers/gp2x/menu.c index e4b3488..5cedc85 100644 --- a/drivers/gp2x/menu.c +++ b/drivers/gp2x/menu.c @@ -1,7 +1,5 @@ +// menu system for gpfce - FCE Ultra port // (c) Copyright 2006,2007 notaz, All rights reserved. -// Free for non-commercial use. - -// For commercial use, separate licencing terms must be obtained. #include #include @@ -1386,10 +1384,17 @@ static void menu_prepare_bg(void) if (fceugi) { /* raw emu frame should now be at gp2x_screen */ - soft_scale((char *)gp2x_screen + 32, gp2x_palette16, srendline, erendline-srendline); - if (srendline) - memset32((int *)((char *)gp2x_screen + 32), 0, srendline*320*2/4); - memcpy(menu_bg, gp2x_screen + 32, 320*240*2); + if (Settings.scaling != 0) + { + soft_scale((char *)gp2x_screen + 32, gp2x_palette16, srendline, erendline-srendline); + if (srendline) + memset32((int *)((char *)gp2x_screen + 32), 0, srendline*320*2/4); + memcpy(menu_bg, gp2x_screen + 32, 320*240*2); + } + else + { + convert2RGB555(menu_bg, gp2x_screen, gp2x_palette16, 320*240); + } } else { diff --git a/drivers/gp2x/unix-basedir.h b/drivers/gp2x/unix-basedir.h deleted file mode 100644 index e69de29..0000000 -- 2.39.2