From: kub Date: Wed, 21 Sep 2022 23:26:48 +0000 (+0000) Subject: sms, add fast renderer, remove 1st column (8 px) if blanked X-Git-Tag: v2.00~296 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=96948bdfc88d5f66c3e7047b7fa3256813fd657d;p=picodrive.git sms, add fast renderer, remove 1st column (8 px) if blanked --- diff --git a/pico/draw.c b/pico/draw.c index bf4ef38e..38820e8f 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -1646,13 +1646,17 @@ void FinalizeLine555(int sh, int line, struct PicoEState *est) PicoDrawUpdateHighPal(); - if ((PicoIn.AHW & PAHW_SMS) && (Pico.m.hardware & 0x3) == 0x3) - len = 160; - else if (Pico.video.reg[12]&1) len = 320; - else len = 256; + len = 256; + if ((PicoIn.AHW & PAHW_SMS) && (Pico.m.hardware & (PMS_HW_GG|PMS_HW_LCD)) == (PMS_HW_GG|PMS_HW_LCD)) + len = 160; + else if (!(PicoIn.AHW & PAHW_SMS) && (Pico.video.reg[12]&1)) + len = 320; + if ((PicoIn.AHW & PAHW_SMS) && (Pico.video.reg[0] & 0x20) && len == 256) + len -= 8, ps += 8; if ((*est->PicoOpt & POPT_EN_SOFTSCALE) && len < 320) { - if (len == 256) { + if (len >= 240 && len <= 256) { + pd += (256-len)>>1; switch (PicoIn.filter) { case 3: h_upscale_bl4_4_5(pd, 320, ps, 256, len, f_pal); break; case 2: h_upscale_bl2_4_5(pd, 320, ps, 256, len, f_pal); break; @@ -1706,10 +1710,13 @@ void FinalizeLine8bit(int sh, int line, struct PicoEState *est) Pico.m.dirtyPal = 2; } - if ((PicoIn.AHW & PAHW_SMS) && (Pico.m.hardware & 0x3) == 0x3) - len = 160; - else if (Pico.video.reg[12]&1) len = 320; - else len = 256; + len = 256; + if ((PicoIn.AHW & PAHW_SMS) && (Pico.m.hardware & (PMS_HW_GG|PMS_HW_LCD)) == (PMS_HW_GG|PMS_HW_LCD)) + len = 160; + else if (!(PicoIn.AHW & PAHW_SMS) && (Pico.video.reg[12]&1)) + len = 320; + if ((PicoIn.AHW & PAHW_SMS) && (Pico.video.reg[0] & 0x20) && len == 256) + len -= 8, ps += 8; if (DrawLineDestIncrement == 0) pd = est->HighCol+8; @@ -1720,7 +1727,7 @@ void FinalizeLine8bit(int sh, int line, struct PicoEState *est) if (!sh && (est->rendstatus & PDRAW_SONIC_MODE)) pal = est->SonicPalCount*0x40; // Smoothing can't be used with CLUT, hence it's always Nearest Neighbour. - if (len == 256) + if (len >= 240) // use reverse version since src and dest ptr may be the same. rh_upscale_nn_4_5(pd, 320, ps, 256, len, f_or); else diff --git a/pico/draw_arm.S b/pico/draw_arm.S index c5997025..962811ea 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -1651,34 +1651,47 @@ FinalizeLine555: ldr r4, [r5] ldr r7, [r5, #OFS_PicoIn_AHW-OFS_PicoIn_opt] ldrb r12,[r8, #OFS_Pico_video_reg+12] + ldrb r6, [r8, #OFS_Pico_video_reg+0] ldr r2, [r8, #OFS_Pico_m_hardware] add r1, r1, #8 - tst r7, #0x10 - beq .fl_no20colRGB555 + tst r7, #0x10 @ SMS ? + beq .fl_noSMS + and r7, r2, #0x3 cmp r7, #0x3 @ Game Gear, LCD? - bne .fl_no20colRGB555 + beq .fl_gg20col + + tst r6, #0x20 + movne r2, #248/8 @ len = 248 + addne r1, r1, #8 @ ps += 8 + moveq r2, #256/8 @ len = 256 + b .fl_check32scaling +.fl_gg20col: mov r2, #160/8 @ len = 160 tst r4, #0x4000 @ EN_SOFTSCALE? bne .fl_20scale_RGB555 @ scale 160->320 - beq .fl_checkborder + b .fl_checkborder -.fl_no20colRGB555: +.fl_noSMS: tst r12, #1 @ h32? movne r2, #320/8 @ len = 320 - bne .fl_no32colRGB555 - moveq r2, #256/8 @ len = 256 + bne .fl_40colRGB555 + mov r2, #256/8 @ len = 256 + +.fl_check32scaling: tst r4, #0x4000 @ EN_SOFTSCALE? + rsbne r7, r2, #256/8 + addne r0, r0, r7, lsl #3 @ pd += (256-len)>>1 bne .fl_32scale_RGB555 @ scale 256->320 .fl_checkborder: tst r4, #0x0100 @ DIS_32C_BORDER? - rsbeq r4, r2, #320/8 @ pd += (320-len)/2 - addeq r0, r0, r4, lsl #3 + rsbeq r7, r2, #320/8 @ pd += (320-len)/2 + addeq r0, r0, r7, lsl #3 -.fl_no32colRGB555: +.fl_40colRGB555: #ifdef UNALIGNED_DRAWLINEDEST @ this is basically for Gizmondo, which has unaligned odd lines in the framebuffer tst r0, #2 @@ -2250,24 +2263,51 @@ blockcpy: cmp r0, r1 bhs blockcpyhi - mov r2, r2, lsr #4 + subs r2, r2, #16 + blt blockcpy2 blockcpy_loop: ldmia r1!, {r3-r5,r12} - subs r2, r2, #1 + subs r2, r2, #16 stmia r0!, {r3-r5,r12} - bne blockcpy_loop + bge blockcpy_loop + +blockcpy2: + adds r2, r2, #16-4 + ldmltfd sp!, {r4,r5} + bxlt lr + +blockcpy_loop2: + ldr r3, [r1], #4 + subs r2, r2, #4 + str r3, [r0], #4 + bge blockcpy_loop2 + ldmfd sp!, {r4,r5} bx lr blockcpyhi: add r0, r0, r2 add r1, r1, r2 - mov r2, r2, lsr #4 + + subs r2, r2, #16 + blt blockcpyhi2 blockcpyhi_loop: ldmdb r1!, {r3-r5,r12} - subs r2, r2, #1 + subs r2, r2, #16 stmdb r0!, {r3-r5,r12} - bne blockcpyhi_loop + bge blockcpyhi_loop + +blockcpyhi2: + adds r2, r2, #16-4 + ldmltfd sp!, {r4,r5} + bxlt lr + +blockcpyhi_loop2: + ldr r3, [r1, #-4]! + subs r2, r2, #4 + str r3, [r0, #-4]! + bge blockcpyhi_loop2 + ldmfd sp!, {r4,r5} bx lr @@ -2281,32 +2321,61 @@ blockcpy_or: cmp r0, r1 bhs blockcpyhi_or - mov r2, r2, lsr #4 + subs r2, r2, #16 + blt blockcpy_or2 blockcpy_loop_or: ldmia r1!, {r4-r6,r12} - subs r2, r2, #1 + subs r2, r2, #16 orr r4, r4, r3 orr r5, r5, r3 orr r6, r6, r3 orr r12,r12,r3 stmia r0!, {r4-r6,r12} - bne blockcpy_loop_or + bge blockcpy_loop_or + +blockcpy_or2: + adds r2, r2, #16-4 + ldmltfd sp!, {r4-r6} + bxlt lr + +blockcpy_loop_or2: + ldr r4, [r1], #4 + subs r2, r2, #4 + orr r4, r4, r3 + str r4, [r0], #4 + bge blockcpy_loop_or2 + ldmfd sp!, {r4-r6} bx lr blockcpyhi_or: add r0, r0, r2 add r1, r1, r2 - mov r2, r2, lsr #4 + + subs r2, r2, #16 + blt blockcpyhi_or2 blockcpyhi_loop_or: ldmdb r1!, {r4-r6,r12} - subs r2, r2, #1 + subs r2, r2, #16 orr r4, r4, r3 orr r5, r5, r3 orr r6, r6, r3 orr r12,r12,r3 stmdb r0!, {r4-r6,r12} - bne blockcpyhi_loop_or + bge blockcpyhi_loop_or + +blockcpyhi_or2: + adds r2, r2, #16-4 + ldmltfd sp!, {r4-r6} + bxlt lr + +blockcpyhi_loop_or2: + ldr r4, [r1, #-4]! + subs r2, r2, #4 + orr r4, r4, r3 + str r4, [r0, #-4]! + bge blockcpyhi_loop_or2 + ldmfd sp!, {r4-r6} bx lr diff --git a/pico/mode4.c b/pico/mode4.c index 451bddfb..d28b1f57 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -154,15 +154,14 @@ static void ParseSpritesM4(int scanline) { struct PicoVideo *pv = &Pico.video; u8 *sat; - int xoff = 8; // relative to HighCol, which is (screen - 8) + int xoff = line_offset; int sprite_base, addr_mask; int zoomed = pv->reg[1] & 0x1; // zoomed sprites, e.g. Earthworm Jim unsigned int pack; int i, s, h, m; if (pv->reg[0] & 8) - xoff = 0; - xoff += line_offset; + xoff -= 8; // sprite shift if ((Pico.m.hardware & (PMS_HW_GG|PMS_HW_LCD)) == (PMS_HW_GG|PMS_HW_LCD)) xoff -= 48; // GG LCD, adjust to center 160 px @@ -204,6 +203,9 @@ static void ParseSpritesM4(int scanline) // make sprite pixel map by merging the 4 bitplanes pack = ((pack | (pack>>16)) | ((pack | (pack>>16))>>8)) & 0xff; if (!m) m = CollisionDetect(sprites_map, sprites_x[s], pack, zoomed); + // no collision detection in 1st column if it's masked + if (pv->reg[0] & 0x20) + sprites_map[1] = 0; } s++; } @@ -290,28 +292,26 @@ static void DrawDisplayM4(int scanline) if (scanline < 16 && (pv->reg[0] & 0x40)) dx = 0; // hscroll disabled for top 2 rows (e.g. Fantasy Zone II) - tilex = ((-dx >> 3) + cellskip) & 0x1f; + tilex = (32 - (dx >> 3) + cellskip) & 0x1f; ty = (line & 7) << 1; // Y-Offset into tile cells = maxcells - cellskip; - dx = ((dx - 1) & 7) + 1; - if (dx != 8) - cells++; // have hscroll, need to draw 1 cell more + dx = (dx & 7); dx += cellskip << 3; dx += line_offset; // tiles if (!(pv->debug_p & PVD_KILL_B)) { if ((Pico.m.hardware & (PMS_HW_GG|PMS_HW_LCD)) == (PMS_HW_GG|PMS_HW_LCD)) { - // on GG render only the center 160 px - DrawStripM4(nametab , dx | ((cells-12)<< 16),(tilex+6) | (ty << 16)); + // on GG render only the center 160 px, but mind hscroll + DrawStripM4(nametab , (dx-8) | ((cells-11)<< 16),(tilex+5) | (ty << 16)); } else if (pv->reg[0] & 0x80) { // vscroll disabled for rightmost 8 columns (e.g. Gauntlet) int dx2 = dx + (cells-8)*8, tilex2 = tilex + (cells-8), ty2 = scanline&7; - DrawStripM4(nametab, dx | ((cells-8) << 16), tilex | (ty << 16)); - DrawStripM4(nametab2, dx2 | (8 << 16), tilex2 | (ty2 << 17)); + DrawStripM4(nametab, dx | ((cells-8) << 16), tilex | (ty << 16)); + DrawStripM4(nametab2, dx2 | (8 << 16), tilex2 | (ty2 << 17)); } else - DrawStripM4(nametab , dx | ( cells << 16), tilex | (ty << 16)); + DrawStripM4(nametab , dx | ( cells << 16), tilex | (ty << 16)); } // sprites @@ -320,9 +320,9 @@ static void DrawDisplayM4(int scanline) if ((pv->reg[0] & 0x20) && (Pico.m.hardware & (PMS_HW_GG|PMS_HW_LCD)) != (PMS_HW_GG|PMS_HW_LCD)) { // first column masked with background, caculate offset to start of line - dx = (dx&~0x1f) / 4; + dx = line_offset / 4; ty = ((pv->reg[7]&0x0f)|0x10) * 0x01010101; - ((u32 *)Pico.est.HighCol)[dx+2] = ((u32 *)Pico.est.HighCol)[dx+3] = ty; + ((u32 *)Pico.est.HighCol)[dx] = ((u32 *)Pico.est.HighCol)[dx+1] = ty; } } @@ -432,12 +432,12 @@ static void ParseSpritesTMS(int scanline) struct PicoVideo *pv = &Pico.video; unsigned int pack; u8 *sat; - int xoff = 8; // relative to HighCol, which is (screen - 8) + int xoff; int sprite_base, addr_mask; int zoomed = pv->reg[1] & 0x1; // zoomed sprites int i, s, h, m; - xoff += line_offset; + xoff = line_offset; sat = (u8 *)PicoMem.vramb + ((pv->reg[5] & 0x7e) << 7); if (pv->reg[1] & 2) { @@ -561,7 +561,7 @@ static void DrawDisplayM1(int scanline) tilex = cellskip & 0x1f; cells = maxcells - cellskip; - dx = (cellskip << 3) + line_offset + 8; + dx = (cellskip << 3) + line_offset; // tiles if (!(pv->debug_p & PVD_KILL_B)) @@ -607,7 +607,7 @@ static void DrawDisplayM2(int scanline) tilex = cellskip & 0x1f; cells = maxcells - cellskip; - dx = (cellskip << 3) + line_offset + 8; + dx = (cellskip << 3) + line_offset; // tiles if (!(pv->debug_p & PVD_KILL_B)) @@ -658,7 +658,7 @@ static void DrawDisplayM3(int scanline) tilex = cellskip & 0x1f; cells = maxcells - cellskip; - dx = (cellskip << 3) + line_offset + 8; + dx = (cellskip << 3) + line_offset; // tiles if (!(pv->debug_p & PVD_KILL_B)) @@ -708,7 +708,7 @@ static void DrawDisplayM0(int scanline) tilex = cellskip & 0x1f; cells = maxcells - cellskip; - dx = (cellskip << 3) + line_offset + 8; + dx = (cellskip << 3) + line_offset; // tiles if (!(pv->debug_p & PVD_KILL_B)) @@ -729,6 +729,7 @@ static void FinalizeLine8bitSMS(int line); void PicoFrameStartSMS(void) { int lines = 192, columns = 256, loffs, coffs; + skip_next_line = 0; loffs = screen_offset = 24; // 192 lines is really 224 with top/bottom bars Pico.est.rendstatus = PDRAW_32_COLS; @@ -741,9 +742,14 @@ void PicoFrameStartSMS(void) // Copy LCD enable flag for easier handling Pico.m.hardware &= ~PMS_HW_LCD; - if (PicoIn.opt & POPT_EN_GG_LCD) + if ((PicoIn.opt & POPT_EN_GG_LCD) && (Pico.m.hardware & PMS_HW_GG)) Pico.m.hardware |= PMS_HW_LCD; + if (!(Pico.m.hardware & PMS_HW_LCD) && (mode & 4) && (Pico.video.reg[0] & 0x20)) { + // SMS mode 4 with 1st column blanked + columns = 248; + Pico.est.rendstatus |= PDRAW_SMS_BLANK_1; + } if ((Pico.m.hardware & (PMS_HW_GG|PMS_HW_LCD)) == (PMS_HW_GG|PMS_HW_LCD)) { // GG LCD always has 160x144 regardless of settings screen_offset = 24; // nonetheless the vdp timing has 224 lines @@ -761,15 +767,20 @@ void PicoFrameStartSMS(void) lines = 224; break; } - if (PicoIn.opt & POPT_EN_SOFTSCALE) { + + line_offset = 8; // FinalizeLine requires HighCol+8 + // ugh... nonetheless has offset in 8-bit fast mode if 1st col blanked! + coffs = (FinalizeLineSMS == NULL && columns == 248 ? 8 : 0); + if (FinalizeLineSMS != NULL && (PicoIn.opt & POPT_EN_SOFTSCALE)) { + // softscaling always generates 320px, but no scaling in 8bit fast coffs = 0; columns = 320; - } else - coffs = PicoIn.opt & POPT_DIS_32C_BORDER ? 0:(320-columns)/2; - line_offset = (FinalizeLineSMS == NULL ? coffs : 0); - - if (FinalizeLineSMS == FinalizeLineRGB555SMS) - line_offset = 0 /* done in FinalizeLine */; + } else if (!(PicoIn.opt & POPT_DIS_32C_BORDER)) { + line_offset -= coffs; + coffs = (320-columns) / 2; + if (FinalizeLineSMS == NULL) + line_offset += coffs; // ... else centering done in FinalizeLine + } if (Pico.est.rendstatus != rendstatus_old || lines != rendlines) { emu_video_mode_change(loffs, lines, coffs, columns); @@ -836,13 +847,16 @@ norender: /* Palette for TMS9918 mode, see https://www.smspower.org/Development/Palette */ // RGB values: #000000 #000000 #21c842 #5edc78 #5455ed #7d76fc #d4524d #42ebf5 // #fc5554 #ff7978 #d4c154 #e6ce80 #21b03b #c95bba #cccccc #ffffff -static u16 tmspal[32] = { +static u16 tmspal[] = { // SMS palette 0x0000, 0x0000, 0x00a0, 0x00f0, 0x0a00, 0x0f00, 0x0005, 0x0ff0, 0x000a, 0x000f, 0x00aa, 0x00ff, 0x0050, 0x0f0f, 0x0aaa, 0x0fff, - // GG palette + // TMS palette 0x0000, 0x0000, 0x04c2, 0x07d6, 0x0e55, 0x0f77, 0x055c, 0x0ee4, 0x055f, 0x077f, 0x05bc, 0x08ce, 0x03a2, 0x0b5c, 0x0ccc, 0x0fff, + // SMS palette, closer to the TMS one + 0x0000, 0x0000, 0x05f0, 0x05f5, 0x0a50, 0x0f55, 0x055a, 0x0ff0, + 0x055f, 0x0aaf, 0x05aa, 0x05af, 0x00a0, 0x0f5f, 0x0aaa, 0x0fff, }; void PicoDoHighPal555SMS(void) @@ -853,11 +867,11 @@ void PicoDoHighPal555SMS(void) unsigned int t; int i, j; - if (FinalizeLineSMS != FinalizeLine8bitSMS || Pico.m.dirtyPal == 2) + if (FinalizeLineSMS == FinalizeLineRGB555SMS || Pico.m.dirtyPal == 2) Pico.m.dirtyPal = 0; - // use hardware palette for 16bit accurate mode - if (FinalizeLineSMS == FinalizeLineRGB555SMS) + // use hardware palette if not in 8bit accurate mode + if (FinalizeLineSMS != FinalizeLine8bitSMS) spal = (void *)PicoMem.cram; /* SMS 6 bit cram data was already converted to MD/GG format by vdp write, @@ -906,8 +920,7 @@ void PicoDrawSetOutputSMS(pdso_t which) { case PDF_8BIT: FinalizeLineSMS = FinalizeLine8bitSMS; break; case PDF_RGB555: FinalizeLineSMS = FinalizeLineRGB555SMS; break; - // there's no fast renderer yet, just treat it like PDF_8BIT - default: FinalizeLineSMS = FinalizeLine8bitSMS; + default: FinalizeLineSMS = NULL; // no multiple palettes, no scaling PicoDrawSetInternalBuf(Pico.est.Draw2FB, 328); break; } rendstatus_old = -1; diff --git a/pico/pico.h b/pico/pico.h index 9b8df316..3675f96f 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -227,6 +227,7 @@ void PicoDoHighPal555(int sh, int line, struct PicoEState *est); #define PDRAW_SKIP_FRAME (1<<10) // frame is skipped #define PDRAW_30_ROWS (1<<11) // 30 rows mode (240 lines) #define PDRAW_32X_SCALE (1<<12) // scale CLUT layer for 32X +#define PDRAW_SMS_BLANK_1 (1<<13) // 1st column blanked extern int rendstatus_old; extern int rendlines; diff --git a/pico/sms.c b/pico/sms.c index eb8da7e5..91d61e5d 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -461,7 +461,7 @@ static void write_bank_xor(unsigned short a, unsigned char d) if ((a&0x6000) != 0x2000) return; if (Pico.ms.mapper != PMS_MAP_XOR && Pico.ms.mapper) return; - elprintf(EL_Z80BNK, "bank 32k %04x %02x @ %04x", a, d, z80_pc()); + elprintf(EL_Z80BNK, "bank xor %04x %02x @ %04x", a, d, z80_pc()); Pico.ms.mapper = PMS_MAP_XOR; Pico.ms.carthw[0] = d; @@ -544,7 +544,7 @@ static void xwrite(unsigned int a, unsigned char d) } } -// TMR product codes and hardware type for know 50Hz-only games +// TMR product codes and hardware type for known 50Hz-only games static u32 region_pal[] = { // cf. GX+, core/cart_hw/sms_cartc.c 0x40207067 /* Addams Family */, 0x40207020 /* Back.Future 3 */, 0x40207058 /* Battlemaniacs */, 0x40007105 /* Cal.Games 2 */, diff --git a/pico/sound/resampler.c b/pico/sound/resampler.c index ebf50914..5761bc54 100644 --- a/pico/sound/resampler.c +++ b/pico/sound/resampler.c @@ -46,6 +46,10 @@ #include "../pico_types.h" #include "resampler.h" +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + static double besseli0(double x) { unsigned i; diff --git a/platform/common/arm_utils.h b/platform/common/arm_utils.h index 628daf65..213186e5 100644 --- a/platform/common/arm_utils.h +++ b/platform/common/arm_utils.h @@ -2,8 +2,9 @@ void bgr444_to_rgb32(void *to, void *from, unsigned entries); void bgr444_to_rgb32_sh(void *to, void *from); -void vidcpy_m2(void *dest, void *src, int m32col, int with_32c_border); -void vidcpy_m2_rot(void *dest, void *src, int m32col, int with_32c_border); +void vidcpy_8bit(void *dest, void *src, int x_y, int w_h); +void vidcpy_8bit_rot(void *dest, void *src, int x_y, int w_h); + void spend_cycles(int c); // utility void rotated_blit8 (void *dst, void *linesx4, int y, int is_32col); diff --git a/platform/common/arm_utils.s b/platform/common/arm_utils.s index b4f739d9..68ef3e41 100644 --- a/platform/common/arm_utils.s +++ b/platform/common/arm_utils.s @@ -124,72 +124,138 @@ bgr444_to_rgb32_sh: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - -@ mode2 blitter -.global vidcpy_m2 @ void *dest, void *src, int m32col, int with_32c_border -vidcpy_m2: +.global vidcpy_8bit @ void *dest, void *src, int x_y, int w_h +vidcpy_8bit: stmfd sp!, {r4-r6,lr} - mov r12, #224 @ lines - add r0, r0, #320*8 + mov r12, r2, lsl #16 @ y + + mov r4, r12, lsr #16-8 @ 320*y = 256*y+64*y + add r4, r4, r12, lsr #16-6 + add r0, r0, r4 @ pd += 320*y + x + add r0, r0, r2, lsr #16 + + add r4, r4, r12, lsr #16-3 @ 328*y = 320*y + 8*y + add r1, r1, r4 @ ps += 328*y + x + 8 + add r1, r1, r2, lsr #16 add r1, r1, #8 - mov lr, #0 - tst r2, r2 - movne lr, #64 - tstne r3, r3 - addne r0, r0, #32 - addne r1, r1, #32 + mov lr, r3, lsr #16 @ w + mov r12, r3, lsl #16 @ h -vidCpyM2_loop_out: - mov r6, #10 - sub r6, r6, lr, lsr #5 @ -= 2 in 32col mode -vidCpyM2_loop: +vidCpy8bit_loop_out: + lsrs r6, lr, #5 +@ beq vidCpy8bit_loop_end +vidCpy8bit_loop: subs r6, r6, #1 ldmia r1!, {r2-r5} stmia r0!, {r2-r5} ldmia r1!, {r2-r5} stmia r0!, {r2-r5} - bne vidCpyM2_loop - - subs r12,r12,#1 - add r0, r0, lr - add r1, r1, #8 - add r1, r1, lr - bne vidCpyM2_loop_out + bne vidCpy8bit_loop + + ands r6, lr, #0x0018 + beq vidCpy8bit_loop_end +vidCpy8bit_loop2: + ldmia r1!, {r2-r3} + subs r6, r6, #8 + stmia r0!, {r2-r3} + bne vidCpy8bit_loop2 + +vidCpy8bit_loop_end: + subs r12,r12,#1<<16 + add r0, r0, #320 + sub r0, r0, lr + add r1, r1, #328 + sub r1, r1, lr + bne vidCpy8bit_loop_out ldmfd sp!, {r4-r6,pc} -.global vidcpy_m2_rot @ void *dest, void *src, int m32col, int with_32c_border -vidcpy_m2_rot: - stmfd sp!,{r4-r8,lr} +.global vidcpy_8bit_rot @ void *dest, void *src, int x_y, int w_h +vidcpy_8bit_rot: + stmfd sp!, {r4-r10,lr} + + mov r12, r2, lsl #16 @ y + + add r0, r0, r12, lsr #16 @ pd += y + (319-x)*240 + mov r4, #320 + sub r4, r4, #1 + sub r4, r4, r2, lsr #16 @ (319-x) + add r0, r0, r4, lsl #8 + sub r0, r0, r4, lsl #4 + + mov r4, r12, lsr #16-8 @ 328*y = 256*y + 64*y + 8*y + add r4, r4, r12, lsr #16-6 + add r4, r4, r12, lsr #16-3 + add r1, r1, r4 @ ps += 328*y + x + 8 + add r1, r1, r2, lsr #16 add r1, r1, #8 - tst r2, r2 - subne r1, r1, #32 @ adjust - - mov r4, r0 - mov r5, r1 - mov r6, r2 - mov r7, #8+4 - -vidcpy_m2_rot_loop: - @ a bit lame but oh well.. - mov r0, r4 - mov r1, r5 - mov r2, r7 - mov r3, r6 + + mov lr, r3, lsr #16 @ w + mov r12, r3, lsl #16 @ h + mov r8, #328 - adr lr, after_rot_blit8 - stmfd sp!,{r4-r8,lr} - b rotated_blit8_2 - -after_rot_blit8: - add r5, r5, #328*4 - add r7, r7, #4 - cmp r7, #224+8+4 - ldmgefd sp!,{r4-r8,pc} - b vidcpy_m2_rot_loop +vidCpy8bitrot_loop_out: + mov r10, r0 + lsrs r9, lr, #2 +@ beq vidCpy8bitrot_loop_end +vidCpy8bitrot_loop: + mov r6, r1 + ldr r2, [r6], r8 + ldr r3, [r6], r8 + ldr r4, [r6], r8 + ldr r5, [r6], r8 + + mov r6, r2, lsl #24 + mov r6, r6, lsr #8 + orr r6, r6, r3, lsl #24 + mov r6, r6, lsr #8 + orr r6, r6, r4, lsl #24 + mov r6, r6, lsr #8 + orr r6, r6, r5, lsl #24 + str r6, [r0], #-240 + + and r6, r3, #0xff00 + and r7, r2, #0xff00 + orr r6, r6, r7, lsr #8 + and r7, r4, #0xff00 + orr r6, r6, r7, lsl #8 + and r7, r5, #0xff00 + orr r6, r6, r7, lsl #16 + str r6, [r0], #-240 + + and r6, r4, #0xff0000 + and r7, r2, #0xff0000 + orr r6, r6, r7, lsr #16 + and r7, r3, #0xff0000 + orr r6, r6, r7, lsr #8 + and r7, r5, #0xff0000 + orr r6, r6, r7, lsl #8 + str r6, [r0], #-240 + + mov r6, r5, lsr #24 + mov r6, r6, lsl #8 + orr r6, r6, r4, lsr #24 + mov r6, r6, lsl #8 + orr r6, r6, r3, lsr #24 + mov r6, r6, lsl #8 + orr r6, r6, r2, lsr #24 + str r6, [r0], #-240 + + subs r9, r9, #1 + add r1, r1, #4 + bne vidCpy8bitrot_loop + +vidCpy8bitrot_loop_end: + subs r12,r12,#4<<16 + add r0, r10, #4 + sub r1, r1, lr + add r1, r1, #4*328 + bne vidCpy8bitrot_loop_out + + ldmfd sp!, {r4-r10,pc} .global rotated_blit8 @ void *dst, void *linesx4, u32 y, int is_32col diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index a72953f1..39c7fafe 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -146,12 +146,6 @@ static void copy_bg(int dir) } } -static void menu_enter(int is_rom_loaded) -{ - plat_video_menu_enter(is_rom_loaded); - menu_w = menu_h = 0; -} - static void menu_draw_prep(void) { if (menu_w == g_menuscreen_w && menu_h == g_menuscreen_h) @@ -198,6 +192,13 @@ static void draw_savestate_bg(int slot) PicoTmpStateRestore(tmp_state); } +static void menu_enter(int is_rom_loaded) +{ + plat_video_menu_enter(is_rom_loaded); + menu_w = menu_h = 0; + menu_draw_prep(); +} + // --------- loading ROM screen ---------- static int cdload_called = 0; diff --git a/platform/gp2x/emu.c b/platform/gp2x/emu.c index 92ea2ec6..c6a708f4 100644 --- a/platform/gp2x/emu.c +++ b/platform/gp2x/emu.c @@ -85,8 +85,6 @@ static int get_renderer(void) { if (doing_bg_frame) return RT_16BIT; - if ((PicoIn.AHW & PAHW_SMS) && currentConfig.renderer == RT_8BIT_FAST) - return RT_8BIT_ACC; // 8bpp fast is not there (yet?) if (PicoIn.AHW & PAHW_32X) return currentConfig.renderer32x; else @@ -217,6 +215,26 @@ static void draw_pico_ptr(void) p[pitch*2] ^= 0xffff; } +static void clear_1st_column(int firstcol, int firstline, int linecount) +{ + int size = is_16bit_mode() ? 2 : 1; + int black = is_16bit_mode() ? 0 : 0xe0; + int i; + + // SMS 1st column blanked, replace with black + if ((currentConfig.EmuOpt & EOPT_WIZ_TEAR_FIX) && !doing_bg_frame) { + int pitch = 240*size; + char *p = (char *)g_screen_ptr + (319-(firstcol-8))*pitch; + for (i = 0; i < 8; i++, p -= pitch) + memset(p+(firstline)*size, black, linecount*size); + } else { + int pitch = 320*size; + char *p = (char *)g_screen_ptr + (firstline)*pitch; + for (i = 0; i < linecount; i++, p += pitch) + memset(p+(firstcol-8)*size, black, 8*size); + } +} + /* rot thing for Wiz */ static unsigned char __attribute__((aligned(4))) rot_buff[320*4*2]; @@ -246,7 +264,7 @@ static int EmuScanEnd8_rot(unsigned int num) if ((num & 3) != 3) return 0; rotated_blit8(g_screen_ptr, rot_buff, num + 1, - !(Pico.video.reg[12] & 1)); + !(Pico.video.reg[12] & 1) && !(PicoIn.opt & POPT_EN_SOFTSCALE)); return 0; } @@ -296,7 +314,7 @@ static int EmuScanEnd16_ld(unsigned int num) static int localPal[0x100]; static int localPalSize; -static void (*vidcpyM2)(void *dest, void *src, int m32col, int with_32c_border); +static void (*vidcpy8bit)(void *dest, void *src, int x_y, int w_h); static int (*make_local_pal)(int fast_mode); static int make_local_pal_md(int fast_mode) @@ -351,6 +369,11 @@ static int make_local_pal_sms(int fast_mode) bgr444_to_rgb32(localPal+i*0x40, tmspal, 32); memcpy(localPal+i*0x40+0x20, localPal+i*0x40, 0x20*4); } + } else if (fast_mode) { + for (i = 0;i >= 0; i--) { + bgr444_to_rgb32(localPal+i*0x40, PicoMem.cram+i*0x40, 32); + memcpy(localPal+i*0x40+0x20, localPal+i*0x40, 0x20*4); + } } else { for (i = Pico.est.SonicPalCount; i >= 0; i--) { bgr444_to_rgb32(localPal+i*0x40, Pico.est.SonicPal+i*0x40, 32); @@ -362,11 +385,15 @@ static int make_local_pal_sms(int fast_mode) return (Pico.est.SonicPalCount+1)*0x40; } +static int is_1stblanked; +static int firstline, linecount; +static int firstcol, colcount; + void pemu_finalize_frame(const char *fps, const char *notice) { int emu_opt = currentConfig.EmuOpt; - if (PicoIn.AHW & PAHW_32X) + if (is_16bit_mode()) localPalSize = 0; // nothing to do else if (get_renderer() == RT_8BIT_FAST) { @@ -377,13 +404,14 @@ void pemu_finalize_frame(const char *fps, const char *notice) if (PicoIn.AHW & PAHW_SVP) memset32((int *)(Pico.est.Draw2FB+328*8+328*223), 0xe0e0e0e0, 328/4); // clear top and bottom of overlap trash - if (!(Pico.est.rendstatus & PDRAW_30_ROWS)) { - memset32((int *)(Pico.est.Draw2FB+8*(224+8)), 0xe0e0e0e0, 328*8/4); - memset32((int *)(Pico.est.Draw2FB), 0xe0e0e0e0, 328*8/4); + if (firstline >= 8) { + unsigned char *p = Pico.est.Draw2FB + firstline*328; + memset32((int *)(p- 8*328), 0xe0e0e0e0, 328*8/4); + memset32((int *)(p+linecount*328), 0xe0e0e0e0, 328*8/4); } // do actual copy - vidcpyM2(g_screen_ptr, Pico.est.Draw2FB+328*8, - !(Pico.video.reg[12] & 1), !(PicoIn.opt & POPT_DIS_32C_BORDER)); + vidcpy8bit(g_screen_ptr, Pico.est.Draw2FB, + (firstcol << 16) | firstline, (colcount << 16) | linecount); } else if (get_renderer() == RT_8BIT_ACC) { @@ -391,7 +419,9 @@ void pemu_finalize_frame(const char *fps, const char *notice) if (Pico.m.dirtyPal) localPalSize = make_local_pal(0); } - else localPalSize = 0; // no palette in 16bit mode + + if (is_1stblanked) + clear_1st_column(firstcol, firstline, linecount); if (notice) osd_text(4, osd_y, notice); @@ -522,7 +552,7 @@ static void vid_reset_mode(void) case RT_8BIT_FAST: PicoIn.opt |= POPT_ALT_RENDERER; PicoDrawSetOutFormat(PDF_NONE, 0); - vidcpyM2 = vidcpy_m2; + vidcpy8bit = vidcpy_8bit; gp2x_mode = 8; break; default: @@ -546,13 +576,15 @@ static void vid_reset_mode(void) if ((PicoIn.AHW & PAHW_32X) || renderer == RT_16BIT) { emu_scan_begin = EmuScanBegin16_rot; emu_scan_end = EmuScanEnd16_rot; + memset(rot_buff, 0, 320*4*2); } else if (renderer == RT_8BIT_ACC) { emu_scan_begin = EmuScanBegin8_rot; emu_scan_end = EmuScanEnd8_rot; + memset(rot_buff, 0xe0, 320*4); } else if (renderer == RT_8BIT_FAST) - vidcpyM2 = vidcpy_m2_rot; + vidcpy8bit = vidcpy_8bit_rot; } PicoDrawSetCallbacks(emu_scan_begin, emu_scan_end); @@ -588,18 +620,26 @@ void emu_video_mode_change(int start_line, int line_count, int start_col, int co int scalex = 320, scaley = 240; int ln_offs = 0; - /* NTSC always has 224 visible lines, anything smaller has bars */ - if (line_count < 224 && line_count > 144) { - start_line -= (224-line_count) /2; - line_count = 224; - } + if (currentConfig.vscaling != EOPT_SCALE_NONE && + (is_16bit_mode() || get_renderer() != RT_8BIT_FAST)) { + /* NTSC always has 224 visible lines, anything smaller has bars */ + if (line_count < 224 && line_count > 144) { + start_line -= (224-line_count) /2; + line_count = 224; + } - /* line doubling for swscaling, also needed for bg frames */ - if (currentConfig.vscaling == EOPT_SCALE_SW && line_count < 240) { - ld_lines = ld_left = 2*line_count / (240 - line_count); - PicoDrawSetCallbacks(EmuScanBegin16_ld, EmuScanEnd16_ld); + /* line doubling for swscaling, also needed for bg frames */ + if (currentConfig.vscaling == EOPT_SCALE_SW && line_count < 240) { + ld_lines = ld_left = 2*line_count / (240 - line_count); + PicoDrawSetCallbacks(EmuScanBegin16_ld,EmuScanEnd16_ld); + } } + /* blanking for SMS with 1st tile blanked */ + is_1stblanked = (col_count == 248); + firstline = start_line; linecount = line_count; + firstcol = start_col; colcount = col_count; + if (doing_bg_frame) return; @@ -754,11 +794,16 @@ void pemu_forced_frame(int no_scale, int do_emu) Pico.m.dirtyPal = 1; PicoIn.opt &= ~POPT_DIS_32C_BORDER; gp2x_current_bpp = 16; + // always render in screen 3 since menu uses 0-2 + g_screen_ptr = gp2x_screens[3]; if (!no_scale) no_scale = currentConfig.scaling == EOPT_SCALE_NONE; emu_cmn_forced_frame(no_scale, do_emu, g_screen_ptr); + if (is_1stblanked) + clear_1st_column(firstcol, firstline, linecount); + g_menubg_src_ptr = g_screen_ptr; doing_bg_frame = 0; } @@ -791,6 +836,12 @@ void pemu_loop_end(void) { pemu_sound_stop(); + if (g_screen_ptr == gp2x_screens[0]) { + /* currently on screen 3, which is needed for forced_frame */ + int size = gp2x_current_bpp / 8; + gp2x_memcpy_all_buffers(g_screen_ptr, 0, 320*240 * size); + gp2x_video_flip(); + } /* do one more frame for menu bg */ pemu_forced_frame(0, 1); } diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index b213b6af..f20cf861 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -535,7 +535,7 @@ void emu_video_mode_change(int start_line, int line_count, int start_col, int co // calculate the borders of the real image inside the picodrive image vout_width = (vout_16bit ? VOUT_MAX_WIDTH : VOUT_8BIT_WIDTH); vout_height = (vout_16bit ? VOUT_MAX_HEIGHT : VOUT_8BIT_HEIGHT); - vout_offset = (vout_16bit ? 0 : 8); // 8bit has 8 px overlap area on the left + vout_offset = (vout_16bit ? 0 : col_count == 248 ? 16 : 8); // 8bit has overlap area on the left padding = (struct retro_hw_ps2_insets){start_line, vout_offset, vout_height - line_count - start_line, vout_width - col_count - vout_offset}; int pxsz = (vout_16bit ? 2 : 1); // pixel size: RGB = 16 bits, CLUT = 8 bits @@ -2247,6 +2247,9 @@ void retro_run(void) int x; if (Pico.m.dirtyPal) PicoDrawUpdateHighPal(); + /* 8 bit renderers have an extra offset for SMS wíth 1st tile blanked */ + if (vout_width == 248) + ps += 8; /* Copy, and skip the leftmost 8 columns again */ for (i = 0; i < vout_height; i++, ps += 8) { for (x = 0; x < vout_width; x+=4) { diff --git a/platform/linux/blit.c b/platform/linux/blit.c index b70262eb..ee2b6d0e 100644 --- a/platform/linux/blit.c +++ b/platform/linux/blit.c @@ -6,6 +6,8 @@ * See COPYING file in the top-level directory. */ +#include + // Convert 0000bbb0 ggg0rrr0 0000bbb0 ggg0rrr0 // to 00000000 rrr00000 ggg00000 bbb00000 ... // TODO: rm when gp2x/emu.c is no longer used @@ -52,40 +54,88 @@ void bgr444_to_rgb32_sh(void *to, void *from) } } -void vidcpy_m2(void *dest, void *src, int m32col, int with_32c_border) +#define X (x_y >> 16) +#define Y (x_y & 0xffff) +#define W (w_h >> 16) +#define H (w_h & 0xffff) + +// gp2x: 0-> X wiz: Y <-0 +// | | +// v v +// +// Y X + +void vidcpy_8bit(void *dest, void *src, int x_y, int w_h) { unsigned char *pd = dest, *ps = src; - int i, u; + int i; - if (m32col) { - for (i = 0; i < 224; i++) - { - ps += 8; - ps += 32; - pd += 32; - for (u = 0; u < 256; u++) - *pd++ = *ps++; - ps += 32; - pd += 32; - } - } else { - for (i = 0; i < 224; i++) - { - ps += 8; - for (u = 0; u < 320; u++) - *pd++ = *ps++; - } + pd += X + Y*320; + ps += X + Y*328 + 8; + for (i = 0; i < H; i++) { + memcpy(pd, ps, W); + ps += 328; pd += 320; } } -void vidcpy_m2_rot(void *dest, void *src, int m32col, int with_32c_border) +void vidcpy_8bit_rot(void *dest, void *src, int x_y, int w_h) { + unsigned char *pd = dest, *ps = src; + int i, u; + + pd += Y + (319-X)*240; + ps += X + Y*328 + 8; + for (i = 0; i < H; i += 4) { + unsigned char *p = (void *)ps; + unsigned int *q = (void *)pd; + for (u = 0; u < W; u++) { + *q = (p[3*328]<<24) + (p[2*328]<<16) + (p[1*328]<<8) + p[0*328]; + p += 1; + q -= 240/4; + } + ps += 4*328; pd += 4; + } } void rotated_blit8 (void *dst, void *linesx4, int y, int is_32col) { + unsigned char *pd = dst, *ps = linesx4; + int x, w, u; + + x = (is_32col ? 32 : 0); + w = (is_32col ? 256 : 320); + y -= 4; + + pd += y + (319-x)*240; + ps += x; + + unsigned char *p = (void *)ps; + unsigned int *q = (void *)pd; + for (u = 0; u < w; u++) { + *q = (p[3*328]<<24) + (p[2*328]<<16) + (p[1*328]<<8) + p[0*328]; + p += 1; + q -= 240/4; + } } void rotated_blit16(void *dst, void *linesx4, int y, int is_32col) { + unsigned short *pd = dst, *ps = linesx4; + int x, w, u; + + x = (is_32col ? 32 : 0); + w = (is_32col ? 256 : 320); + y -= 4; + + pd += y + (319-x)*240; + ps += x; + + unsigned short *p = (void *)ps; + unsigned int *q = (void *)pd; + for (u = 0; u < w; u++) { + q[0] = (p[1*328]<<16) + p[0*328]; + q[1] = (p[3*328]<<16) + p[2*328]; + p += 1; + q -= 2*240/4; + } } diff --git a/platform/linux/emu.c b/platform/linux/emu.c index 0a05cdd8..ee796f57 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -138,13 +138,13 @@ void screen_blit(u16 *pd, int pp, u8* ps, int ss, u16 *pal) if (currentConfig.scaling == EOPT_SCALE_SW && out_w <= 256) { if (currentConfig.vscaling == EOPT_SCALE_SW && out_h <= 224) // h+v scaling - upscale = out_w >= 256 ? upscale_256_224_hv: upscale_160_144_hv; + upscale = out_w >= 240 ? upscale_256_224_hv: upscale_160_144_hv; else // h scaling - upscale = out_w >= 256 ? upscale_256_____h : upscale_160_____h; + upscale = out_w >= 240 ? upscale_256_____h : upscale_160_____h; } else if (currentConfig.vscaling == EOPT_SCALE_SW && out_h <= 224) // v scaling - upscale = out_w >= 256 ? upscale_____224_v : upscale_____144_v; + upscale = out_w >= 240 ? upscale_____224_v : upscale_____144_v; if (!upscale) { // no scaling for (y = 0; y < out_h; y++) @@ -165,6 +165,8 @@ void pemu_finalize_frame(const char *fps, const char *notice) PicoDrawUpdateHighPal(); + if (out_w == 248 && currentConfig.scaling == EOPT_SCALE_SW) + pd += (320 - out_w*320/256) / 2; // SMS with 1st tile blanked, recenter screen_blit(pd, g_screen_ppitch, ps, 328, Pico.est.HighPal); } @@ -205,16 +207,13 @@ void plat_video_set_buffer(void *buf) static void apply_renderer(void) { - PicoIn.opt &= ~(POPT_ALT_RENDERER|POPT_EN_SOFTSCALE|POPT_DIS_32C_BORDER); + PicoIn.opt |= POPT_DIS_32C_BORDER; + PicoIn.opt &= ~(POPT_ALT_RENDERER|POPT_EN_SOFTSCALE); if (is_16bit_mode()) { if (currentConfig.scaling == EOPT_SCALE_SW) PicoIn.opt |= POPT_EN_SOFTSCALE; - else if (currentConfig.scaling == EOPT_SCALE_HW) - // hw scaling, render without any padding - PicoIn.opt |= POPT_DIS_32C_BORDER; PicoIn.filter = currentConfig.filter; - } else - PicoIn.opt |= POPT_DIS_32C_BORDER; + } switch (get_renderer()) { case RT_16BIT: @@ -374,8 +373,9 @@ void emu_video_mode_change(int start_line, int line_count, int start_col, int co switch (currentConfig.scaling) { case EOPT_SCALE_HW: - screen_w = out_w; - screen_x = 0; + // mind aspect ratio for SMS with 1st column blanked + screen_w = (out_w == 248 ? 256 : out_w); + screen_x = (screen_w - out_w)/2; break; case EOPT_SCALE_SW: screen_x = (screen_w - 320)/2; diff --git a/platform/psp/emu.c b/platform/psp/emu.c index 6747b07d..a76d5839 100644 --- a/platform/psp/emu.c +++ b/platform/psp/emu.c @@ -189,7 +189,8 @@ static void set_scaling_params(void) g_vertices[1].y = fbimg_yoffs + fbimg_height; if (!is_16bit_mode()) { // 8-bit modes have an 8 px overlap area on the left - g_vertices[0].u += 8; g_vertices[1].u += 8; + int offs = out_w == 248 ? 16 : 8; + g_vertices[0].u += offs; g_vertices[1].u += offs; } if (border_hack) { g_vertices[0].u++; g_vertices[1].u--; @@ -210,12 +211,16 @@ static void do_pal_update_sms(void) // SMS palette 0x0000, 0x0000, 0x00a0, 0x00f0, 0x0500, 0x0f00, 0x0005, 0x0ff0, 0x000a, 0x000f, 0x0055, 0x00ff, 0x0050, 0x0f0f, 0x0555, 0x0fff, + // TMS palette + 0x0000, 0x0000, 0x04c2, 0x07d6, 0x0e55, 0x0f77, 0x055c, 0x0ee4, + 0x055f, 0x077f, 0x05bc, 0x08ce, 0x03a2, 0x0b5c, 0x0ccc, 0x0fff, }; int i; if (!(Pico.video.reg[0] & 0x4)) { + int sg = !!(Pico.m.hardware & PMS_HW_SG); for (i = Pico.est.SonicPalCount; i >= 0; i--) - do_pal_convert(localPal+i*0x40, tmspal, currentConfig.gamma, currentConfig.gamma2); + do_pal_convert(localPal+i*0x40, tmspal+sg*0x10, currentConfig.gamma, currentConfig.gamma2); } else { for (i = Pico.est.SonicPalCount; i >= 0; i--) do_pal_convert(localPal+i*0x40, Pico.est.SonicPal+i*0x40, currentConfig.gamma, currentConfig.gamma2); diff --git a/platform/psp/menu.c b/platform/psp/menu.c index 42081edc..81356b29 100644 --- a/platform/psp/menu.c +++ b/platform/psp/menu.c @@ -13,6 +13,8 @@ static const char *men_filter_opts[] = { "nearest", "bilinear" }; #define MENU_OPTIONS_ADV +static menu_entry e_menu_sms_options[]; + void psp_menu_init(void) { me_enable(e_menu_sms_options, MA_SMSOPT_GHOSTING, 0);