X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=pico%2Fdraw_arm.s;h=ec5fcf7ecc0efe5338d659b2a08e89487d6dca79;hb=cff531af94bd9c9c89ae162e80f48ddc26a4e504;hp=5074a637a92314a11f4773b66c6ffb25ecb67670;hpb=1cfc5cc4ce06642b9bc45ca3b9d32793718e9455;p=picodrive.git diff --git a/pico/draw_arm.s b/pico/draw_arm.s index 5074a63..ec5fcf7 100644 --- a/pico/draw_arm.s +++ b/pico/draw_arm.s @@ -1,10 +1,12 @@ -@ vim:filetype=armasm - -@ ARM assembly versions of some funtions from draw.c -@ this is highly specialized, be careful if changing related C code! - -@ (c) Copyright 2007-2008, Grazvydas "notaz" Ignotas -@ All Rights Reserved +/* + * assembly optimized versions of most funtions from draw.c + * (C) notaz, 2006-2010 + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + * + * this is highly specialized, be careful if changing related C code! + */ .include "port_config.s" @@ -19,6 +21,8 @@ .extern DrawStripInterlace .extern HighCacheS_ptr +.equiv OVERRIDE_HIGHCOL, 1 + .equ PDRAW_SPRITES_MOVED, (1<<0) .equ PDRAW_WND_DIFF_PRIO, (1<<1) .equ PDRAW_ACC_SPRITES, (1<<2) @@ -72,7 +76,7 @@ .endif ldreqb r4, [r1,#\offs] orrne r4, r3, r4 - andeq r4, r4, #0x3f + andeq r4, r4, #0xbf strb r4, [r1,#\offs] .endm @@ -163,14 +167,12 @@ .endif beq 0f cmp r4, #0xe - ldrgeb r4, [r1,#\ofs] - orrlt r4, r3, r4 @ normal - - biceq r4, r4, #0xc0 @ hilight - orreq r4, r4, #0x80 - orrgt r4, r4, #0xc0 @ shadow + ldrgeb r7, [r1,#\ofs] + orrlt r7, r3, r4 @ normal - strb r4, [r1,#\ofs] + bicge r7, r7, #0xc0 + orrge r7, r7, r4, lsl #6 + strb r7, [r1,#\ofs] 0: .endm @@ -198,38 +200,41 @@ TileDoShGenPixel 16, 7 @ #0x000f0000 .endm -.macro TileDoShGenPixel_noop shift ofs +.macro TileDoShGenPixel_markop shift ofs .if \shift - and r4, r12, r2, lsr #\shift + ands r4, r12, r2, lsr #\shift .else - and r4, r12, r2 + ands r4, r12, r2 .endif - sub r7, r4, #1 - cmp r7, #0xd - orrcc r4, r3, r4 @ 0-0xc (was 1-0xd) - strccb r4, [r1,#\ofs] + beq 0f + cmp r4, #0xe + ldrgeb r4, [r1,#\ofs] + orrlt r4, r3, r4 + orrge r4, r4, #0x80 + strb r4, [r1,#\ofs] +0: .endm -.macro TileFlipSh_noop - TileDoShGenPixel_noop 16, 0 @ #0x000f0000 - TileDoShGenPixel_noop 20, 1 @ #0x00f00000 - TileDoShGenPixel_noop 24, 2 @ #0x0f000000 - TileDoShGenPixel_noop 28, 3 @ #0xf0000000 - TileDoShGenPixel_noop 0, 4 @ #0x0000000f - TileDoShGenPixel_noop 4, 5 @ #0x000000f0 - TileDoShGenPixel_noop 8, 6 @ #0x00000f00 - TileDoShGenPixel_noop 12, 7 @ #0x0000f000 +.macro TileFlipSh_markop + TileDoShGenPixel_markop 16, 0 @ #0x000f0000 + TileDoShGenPixel_markop 20, 1 @ #0x00f00000 + TileDoShGenPixel_markop 24, 2 @ #0x0f000000 + TileDoShGenPixel_markop 28, 3 @ #0xf0000000 + TileDoShGenPixel_markop 0, 4 @ #0x0000000f + TileDoShGenPixel_markop 4, 5 @ #0x000000f0 + TileDoShGenPixel_markop 8, 6 @ #0x00000f00 + TileDoShGenPixel_markop 12, 7 @ #0x0000f000 .endm -.macro TileNormSh_noop - TileDoShGenPixel_noop 12, 0 @ #0x0000f000 - TileDoShGenPixel_noop 8, 1 @ #0x00000f00 - TileDoShGenPixel_noop 4, 2 @ #0x000000f0 - TileDoShGenPixel_noop 0, 3 @ #0x0000000f - TileDoShGenPixel_noop 28, 4 @ #0xf0000000 - TileDoShGenPixel_noop 24, 5 @ #0x0f000000 - TileDoShGenPixel_noop 20, 6 @ #0x00f00000 - TileDoShGenPixel_noop 16, 7 @ #0x000f0000 +.macro TileNormSh_markop + TileDoShGenPixel_markop 12, 0 @ #0x0000f000 + TileDoShGenPixel_markop 8, 1 @ #0x00000f00 + TileDoShGenPixel_markop 4, 2 @ #0x000000f0 + TileDoShGenPixel_markop 0, 3 @ #0x0000000f + TileDoShGenPixel_markop 28, 4 @ #0xf0000000 + TileDoShGenPixel_markop 24, 5 @ #0x0f000000 + TileDoShGenPixel_markop 20, 6 @ #0x00f00000 + TileDoShGenPixel_markop 16, 7 @ #0x000f0000 .endm .macro TileDoShGenPixel_onlyop_lp shift ofs @@ -239,14 +244,13 @@ ands r7, r12, r2 .endif ldrneb r4, [r1,#\ofs] - tstne r4, #0x40 - beq 0f - cmp r7, #0xe - biceq r4, r4, #0xc0 @ hilight - orreq r4, r4, #0x80 - orrgt r4, r4, #0xc0 @ shadow - strgeb r4, [r1,#\ofs] + blt 0f + + tst r4, #0xc0 + bicne r4, r4, #0xc0 + orrne r4, r4, r7, lsl #6 + strneb r4, [r1,#\ofs] 0: .endm @@ -873,8 +877,8 @@ DrawTilesFromCache: .dtfc_shadow_blank: tst r1, #1 ldrneb r4, [r1] - mov r6, #0x3f - and r4, r4, #0x3f + mov r6, #0xbf + and r4, r4, #0xbf strneb r4, [r1], #1 ldrh r4, [r1] orr r6, r6, r6, lsl #8 @@ -925,7 +929,7 @@ DrawTilesFromCache: add r1, r11,#8 mov r3, #320/4/4 - mov r6, #0x3f + mov r6, #0xbf orr r6, r6, r6, lsl #8 orr r6, r6, r6, lsl #16 .dtfc_loop_shprep: @@ -991,7 +995,7 @@ DrawSpriteSHi: orr r9, r9, r4, lsl #16 orr r9, r9, #0x90000000 @ r9=scc1 ???? ... (s=shadow/hilight, cc=pal) cmp r12,r9, lsr #28 @ sh/hi with pal3? - cmpne r3, #1 @ if not, is ir hi prio? + cmpne r3, #1 @ if not, is it hi prio? bne DrawSpriteSHi @ non-operator low sprite, already drawn ldr r3, [r0] @ sprite[0] @@ -1089,7 +1093,7 @@ DrawSpriteSHi: tst r9, #0x0800 bne .dsprShi_TileFlip_sh - @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern + @ (r1=pdest, r2=pixels8, r3=pal) r4, r7: scratch, r12: helper pattern .dsprShi_TileNorm_sh: TileNormSh b .dsprShi_loop @@ -1271,7 +1275,7 @@ DrawSprite: @ was: unsigned int *sprite, int sh, int acc_sprites .dspr_singlec_sh: cmp r2, #0xe0000000 - bcs .dspr_loop @ operator tileline, ignore + bcs .dspr_TileNorm_sh @ op. tileline, markop. XXX: maybe add a spec. handler? .dspr_SingleColor: and r4, r2, #0xf @@ -1295,11 +1299,11 @@ DrawSprite: @ was: unsigned int *sprite, int sh, int acc_sprites @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern .dspr_TileNorm_sh: - TileNormSh_noop + TileNormSh_markop b .dspr_loop .dspr_TileFlip_sh: - TileFlipSh_noop + TileFlipSh_markop b .dspr_loop @@ -1593,53 +1597,26 @@ FinalizeLineBGR444: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -@ hilights 2 pixels in RGB555/BGR555 format -.macro TileDoShHi2Pixels555 reg - adds \reg, \reg, #0x40000000 - orrcs \reg, \reg, #0xf0000000 - mov \reg, \reg, ror #27 - adds \reg, \reg, #0x40000000 - orrcs \reg, \reg, #0xf0000000 - mov \reg, \reg, ror #26 - adds \reg, \reg, #0x40000000 - orrcs \reg, \reg, #0xf0000000 - mov \reg, \reg, ror #27 - adds \reg, \reg, #0x40000000 - orrcs \reg, \reg, #0xf0000000 - mov \reg, \reg, ror #27 - adds \reg, \reg, #0x40000000 - orrcs \reg, \reg, #0xf0000000 - mov \reg, \reg, ror #26 - adds \reg, \reg, #0x40000000 - orrcs \reg, \reg, #0xf0000000 - mov \reg, \reg, ror #27 -.endm - - @ Convert 0000bbb0 ggg0rrr0 @ to rrrrrggg gggbbbbb -@ r2,r3,r9 - scratch, lr = 0x001c001c, r8 = 0x00030003 +@ r2,r3 - scratch, lr = 0x001c001c, r8 = 0x08610861 .macro convRGB565 reg - and r2, lr, \reg,lsl #1 - and r9, r8, \reg,lsr #2 - orr r2, r2, r9 @ r2=red - and r3, lr, \reg,lsr #7 - and r9, r8, \reg,lsr #10 - orr r3, r3, r9 @ r3=blue - and \reg, \reg, lr, lsl #3 - orr \reg, \reg, \reg,lsl #3 @ green - orr \reg, \reg, r2, lsl #11 @ add red back - orr \reg, \reg, r3 @ add blue back + and r2, lr, \reg,lsr #7 @ b + and r3, lr, \reg,lsr #3 @ g + and \reg, lr, \reg,lsl #1 @ r + orr r2, r2, r3, lsl #6 + orr \reg, r2, \reg,lsl #11 + + and r2, r8, \reg,lsr #4 + orr \reg, \reg, r2 .endm -@ trashes: r2-r9,r12,lr; r0,r1 are advanced +@ trashes: r2-r8,r12,lr; r8 = 0x08610861; r0,r1 are advanced .macro vidConvCpyRGB565_local mov r12, r2, lsr #3 @ repeats mov lr, #0x001c0000 orr lr, lr, #0x01c @ lr == pattern 0x001c001c - mov r8, #0x00030000 - orr r8, r8, #0x003 0: ldmia r1!, {r4-r7} @@ -1661,6 +1638,9 @@ FinalizeLineBGR444: vidConvCpyRGB565: @ void *to, void *from, int pixels stmfd sp!, {r4-r9,lr} + mov r8, #0x0061 + orr r8, r8, #0x0800 + orr r8, r8, r8, lsl #16 vidConvCpyRGB565_local ldmfd sp!, {r4-r9,lr} bx lr @@ -1674,9 +1654,8 @@ PicoDoHighPal555: ldr r8, =(Pico+0x22228) @ Pico.video PicoDoHighPal555_nopush: - str r1, [sp, #-8] @ is called from FinalizeLineRGB555? + orr r9, r1, r0, lsl #31 @ 0:called from FinalizeLine555, 31: s/h - str r0, [sp, #-4] ldr r0, =HighPal mov r1, #0 @@ -1684,10 +1663,13 @@ PicoDoHighPal555_nopush: sub r1, r8, #0x128 @ r1=Pico.cram mov r2, #0x40 + mov r8, #0x0061 + orr r8, r8, #0x0800 + orr r8, r8, r8, lsl #16 + vidConvCpyRGB565_local - ldr r0, [sp, #-4] - tst r0, r0 + tst r9, #(1<<31) beq PicoDoHighPal555_end ldr r3, =HighPal @@ -1709,28 +1691,37 @@ PicoDoHighPal555_nopush: bne .fl_loopcpRGB555_sh @ hilighted pixels: + @ t = ((dpal[i] >> 1) & 0x738e738e) + 0x738e738e; + @ t |= (t >> 4) & 0x08610861; + @ r8=0x08610861 sub r3, r3, #0x40*2 - mov lr, #0x40/2 + mov lr, #0x40/4 .fl_loopcpRGB555_hi: - ldr r1, [r3], #4 - TileDoShHi2Pixels555 r1 - str r1, [r4], #4 + ldmia r3!, {r1,r6} + and r1, r12, r1, lsr #1 + and r6, r12, r6, lsr #1 + add r1, r12, r1 + add r6, r12, r6 + and r5, r8, r1, lsr #4 + and r7, r8, r6, lsr #4 + orr r1, r1, r5 + orr r6, r6, r7 + stmia r4!, {r1,r6} subs lr, lr, #1 bne .fl_loopcpRGB555_hi mov r0, #1 PicoDoHighPal555_end: - ldr r1, [sp, #-8] - tst r1, r1 + tst r9, #1 ldmeqfd sp!, {r4-r9,pc} ldr r8, =(Pico+0x22228) @ Pico.video b FinalizeLineRGB555_pal_done -.global FinalizeLineRGB555 @ int sh +.global FinalizeLine555 @ int sh -FinalizeLineRGB555: +FinalizeLine555: stmfd sp!, {r4-r9,lr} ldr r8, =(Pico+0x22228) @ Pico.video @@ -2037,3 +2028,4 @@ blockcpy_loop_or: ldmfd sp!, {r4-r6} bx lr +@ vim:filetype=armasm