From 788343bb4cbc9ae133ac748f9ce59731bad67f22 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 5 Jun 2009 17:36:30 +0300 Subject: [PATCH] added upscaler --- gp2x/Makefile | 5 +- gp2x/upscale_aspect.s | 320 ++++++++++++++++++++++++++++++++++++++++++ video.c | 16 ++- 3 files changed, 338 insertions(+), 3 deletions(-) create mode 100644 gp2x/upscale_aspect.s diff --git a/gp2x/Makefile b/gp2x/Makefile index be06a86..cf0b3a5 100644 --- a/gp2x/Makefile +++ b/gp2x/Makefile @@ -16,7 +16,7 @@ endif OBJS = main.o cpu.o memory.u video.o input.o sound.o gp2x.o gui.o \ cheats.o zip.o cpu_threaded.z arm_stub.o video_blend.o \ - flush_cache.o warm.o sys_cacheflush.o + warm.o sys_cacheflush.o upscale_aspect.o BIN = gpsp.gpe # Platform specific definitions @@ -57,6 +57,9 @@ LIBS += -ggdb %.o: %.S $(CC) $(ASFLAGS) $(INCLUDES) -c -o $@ $< +%.o: %.s + $(CC) $(ASFLAGS) $(INCLUDES) -c -o $@ $< + all: $(OBJS) $(CC) $(OBJS) $(LIBS) -o $(BIN) # $(STRIP) $(BIN) diff --git a/gp2x/upscale_aspect.s b/gp2x/upscale_aspect.s new file mode 100644 index 0000000..b7694ca --- /dev/null +++ b/gp2x/upscale_aspect.s @@ -0,0 +1,320 @@ +/* + * 240x160 -> 320x213 upscaler for ARM with interpolation + * + * Written by Gražvydas "notaz" Ignotas + * Prototyped by Rokas + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the organization nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * 0 1 2 : 3 4 5 + * 6 7 8 : 9 10 11 + * 12 13 14 : 15 16 17 + * v + * 0 1 2 3 : 4 5 6 7 + * 8 9 10 11 : 12 13 14 15 + * 16 17 18 19 : 20 21 22 23 + * 24 25 26 27 : 28 29 30 31 + */ + +.macro unpack_hi dst, src + mov \dst, \src, lsr #16 + orr \dst, \dst, lsl #16 + and \dst, \dst, lr +.endm + +.macro unpack_lo dst, src + mov \dst, \src, lsl #16 + orr \dst, \dst, lsr #16 + and \dst, \dst, lr +.endm + +@ do 3:5 summing: r2 = (s1*3 + s2*5 + 4) / 8 +@ s2 != r2 +.macro do_3_5 s1, s2 + add r2,\s1,\s1, lsl #1 @ r2 = s1 * 3 + add r2, r2,\s2, lsl #2 + add r2, r2,\s2 @ r2 += s2 * 5 + add r2, r2, r12,lsl #2 @ sum += round * 4 + and r2, lr, r2, lsr #3 @ mask_to_unpacked(sum / 8) +.endm + +@ do 14:7:7:4: r2 = (s1*14 + s2*7 + s3*7 + s4*4 + 16) / 32 +@ {s2,s3,s4} != r2 +.macro do_14_7_7_4 s1, s2, s3, s4 + mov r2,\s1, lsl #4 + sub r2, r2,\s1, lsl #1 @ r2 = s1 * 14 + add r2, r2,\s2, lsl #3 + sub r2, r2,\s2 @ r2 += s2 * 7 + add r2, r2,\s3, lsl #3 + sub r2, r2,\s3 @ r2 += s3 * 7 + add r2, r2,\s4, lsl #2 @ r2 += s4 * 4 + add r2, r2, r12,lsl #3 @ sum += round * 16 + and r2, lr, r2, lsr #5 @ mask_to_unpacked(sum / 32) +.endm + +.global upscale_aspect @ u16 *dst, u16 *src +upscale_aspect: + stmfd sp!,{r4-r11,lr} + mov lr, #0x0000001f + orr lr, lr, #0x0000f800 @ for "unpacked" form of + orr lr, lr, #0x07e00000 @ 00000ggg'ggg00000'rrrrr000'000bbbbb + mov r12, #0x00000001 + orr r12,r12,#0x00000800 + orr r12,r12,#0x00200000 @ rounding constant + + mov r8, #((240/6)-1) << 24 @ cols + orr r8, r8, #160/3 @ rows + + add r0, r0, #320*2*13 +loop1: + ldr r10,[r1] + ldr r11,[r1, #320*2*1] + + unpack_lo r4, r10 + unpack_hi r5, r10 + unpack_lo r6, r11 + unpack_hi r7, r11 + + ldr r11,[r1, #4] + + do_3_5 r4, r5 + orr r2, r2, r2, lsr #16 + mov r3, r10, lsl #16 + mov r3, r3, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0] @ 0,1 + + ldr r10,[r1, #320*2*2] + + do_3_5 r4, r6 + orr r3, r2, r2, lsl #16 + mov r3, r3, lsr #16 @ 8 + + do_14_7_7_4 r7, r5, r6, r4 + orr r2, r2, r2, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0, #320*2*1] @ 8,9 + + unpack_lo r4, r10 + unpack_hi r9, r10 + + do_3_5 r4, r6 + orr r3, r2, r2, lsl #16 + mov r3, r3, lsr #16 + + do_14_7_7_4 r7, r9, r6, r4 + orr r2, r2, r2, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0, #320*2*2] @ 16,17 + + do_3_5 r4, r9 + orr r2, r2, r2, lsr #16 + mov r3, r10, lsl #16 + mov r3, r3, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0, #320*2*3] @ 24,25 + + ldr r10,[r1, #320*2*1+4] + + unpack_lo r6, r11 + unpack_lo r4, r10 + + do_3_5 r6, r5 + orr r2, r2, r2, lsl #16 + mov r3, r11, lsl #16 + orr r2, r3, r2, lsr #16 + str r2, [r0, #4] @ 2,3 + + do_14_7_7_4 r7, r4, r5, r6 + orr r2, r2, r2, lsl #16 + mov r3, r2, lsr #16 + + ldr r5, [r1, #320*2*2+4] + + do_3_5 r6, r4 + orr r2, r2, r2, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0, #320*2*1+4] @ 10,11 + + unpack_lo r6, r5 + + do_14_7_7_4 r7, r4, r9, r6 + orr r2, r2, r2, lsl #16 + mov r3, r2, lsr #16 + + do_3_5 r6, r4 + orr r2, r2, r2, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0, #320*2*2+4] @ 18,19 + + unpack_hi r4, r10 + + ldr r10,[r1, #8] + + do_3_5 r6, r9 + orr r2, r2, r2, lsl #16 + mov r2, r2, lsr #16 + orr r2, r2, r5, lsl #16 + str r2, [r0, #320*2*3+4] @ 26,27 + + unpack_hi r6, r11 + unpack_lo r7, r10 + + do_3_5 r6, r7 + orr r2, r2, r2, lsr #16 + mov r2, r2, lsl #16 + orr r2, r2, r11,lsr #16 + str r2, [r0, #8] @ 4,5 + + ldr r11,[r1, #320*2*1+8] + + unpack_hi r9, r10 + + do_3_5 r9, r7 + orr r2, r2, r2, lsr #16 + mov r2, r2, lsl #16 + orr r2, r2, r10,lsr #16 + mov r2, r2, ror #16 + str r2, [r0, #12] @ 6,7 + + unpack_lo r10,r11 + + do_3_5 r6, r4 + orr r2, r2, r2, lsl #16 + mov r3, r2, lsr #16 + + do_14_7_7_4 r10, r4, r7, r6 + orr r2, r2, r2, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0, #320*2*1+8] @ 12,13 + + unpack_hi r6, r11 + + ldr r11,[r1, #320*2*2+8] + + do_14_7_7_4 r10, r6, r7, r9 + orr r2, r2, r2, lsl #16 + mov r3, r2, lsr #16 + + do_3_5 r9, r6 + orr r2, r2, r2, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0, #320*2*1+12] @ 14,15 + + unpack_hi r7, r5 + unpack_lo r9, r11 + + do_3_5 r7, r4 + orr r2, r2, r2, lsl #16 + mov r3, r2, lsr #16 + + do_14_7_7_4 r10, r4, r9, r7 + orr r2, r2, r2, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0, #320*2*2+8] @ 20,21 + + do_3_5 r7, r9 + orr r2, r2, r2, lsr #16 + mov r2, r2, lsl #16 + orr r2, r2, r5, lsr #16 + str r2, [r0, #320*2*3+8] @ 28,29 + + unpack_hi r5, r11 + + do_14_7_7_4 r10, r6, r9, r5 + orr r2, r2, r2, lsl #16 + mov r3, r2, lsr #16 + + do_3_5 r5, r6 + orr r2, r2, r2, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0, #320*2*2+12] @ 22,23 + + do_3_5 r5, r9 + orr r2, r2, r2, lsr #16 + mov r3, r11, lsr #16 + orr r2, r3, r2, lsl #16 + mov r2, r2, ror #16 + str r2, [r0, #320*2*3+12] @ 30,31 + + add r0, r0, #16 + add r1, r1, #12 + + subs r8, r8, #1<<24 + bpl loop1 + + add r0, r0, #320*3*2 + add r1, r1, #(320*2+80)*2 + sub r8, r8, #1 + tst r8, #0xff + add r8, r8, #(240/6) << 24 @ cols + bne loop1 + + @@ last line + mov r8, #240/6 + +loop2: + ldmia r1!,{r9,r10,r11} + + unpack_lo r4, r9 + unpack_hi r5, r9 + + do_3_5 r4, r5 + orr r2, r2, r2, lsr #16 + mov r3, r9, lsl #16 + mov r3, r3, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0], #4 + + unpack_lo r6, r10 + unpack_hi r7, r10 + + do_3_5 r6, r5 + orr r2, r2, r2, lsl #16 + mov r2, r2, lsr #16 + orr r2, r2, r10,lsl #16 + str r2, [r0], #4 + + unpack_lo r4, r11 + unpack_hi r5, r11 + + do_3_5 r7, r4 + orr r2, r2, r2, lsr #16 + mov r3, r10, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0], #4 + + do_3_5 r5, r4 + orr r2, r2, r2, lsr #16 + mov r3, r11, lsr #16 + orr r2, r3, r2, lsl #16 + mov r2, r2, ror #16 + str r2, [r0], #4 + + subs r8, r8, #1 + bne loop2 + + ldmfd sp!,{r4-r11,pc} + +@ vim:filetype=armasm + diff --git a/video.c b/video.c index 7415c76..8edea82 100644 --- a/video.c +++ b/video.c @@ -3339,6 +3339,12 @@ void flip_screen() void flip_screen() { + if((screen_scale == scaled_aspect) && + (resolution_width == small_resolution_width) && + (resolution_height == small_resolution_height)) + { + upscale_aspect(gpsp_gp2x_screen, screen_pixels); + } pollux_video_flip(); screen_pixels = (u16 *)gpsp_gp2x_screen + screen_offset; } @@ -3692,13 +3698,19 @@ void video_resolution_large() void video_resolution_small() { - screen_offset = 320*40 + 40; + if(screen_scale == scaled_aspect) + screen_offset = 320*(80 - 14) + 80; + else + screen_offset = 320*40 + 40; resolution_width = 240; resolution_height = 160; - fb_use_buffers(999); + fb_use_buffers(3); clear_screen(0); flip_screen(); + clear_screen(0); + flip_screen(); + clear_screen(0); } void set_gba_resolution(video_scale_type scale) -- 2.39.5