// slot (signalling the display of background color) is processed in this case
// is however unclear and might lead to glitches due to race conditions by the
// different video clocks for H32 and H40.
+// NB: there is an offset of 4 pixels between MD and 32X layers in H32 mode.
+#define H32_OFFSET 4
// BGR555 to native conversion
#if defined(USE_BGR555)
} \
}
+#define MD_LAYER_CODE_H32 \
+ *dst = dst[H32_OFFSET]
+
// this is almost never used (Wiz and menu bg gen only)
void FinalizeLine32xRGB555(int sh, int line, struct PicoEState *est)
{
- unsigned short *pd = est->DrawLineDest;
+ unsigned short *dst = est->DrawLineDest;
unsigned short *pal = Pico32xMem->pal_native;
unsigned char *pmd = est->HighCol + 8;
+ unsigned short *palmd = est->HighPal;
unsigned short *dram, *p32x;
unsigned char mdbg;
+ int h32 = !(Pico.video.reg[12] & 0x1);
FinalizeLine555(sh, line, est);
dram = (void *)Pico32xMem->dram[Pico32x.vdp_regs[0x0a/2] & P32XV_FS];
p32x = dram + dram[line];
mdbg = Pico.video.reg[7] & 0x3f;
+ if (h32) pmd += H32_OFFSET;
if ((Pico32x.vdp_regs[0] & P32XV_Mx) == 2) { // Direct Color Mode
int inv_bit = (Pico32x.vdp_regs[0] & P32XV_PRI) ? 0x8000 : 0;
- do_line_dc(pd, p32x, pmd, inv_bit,);
+ if (h32) {
+ do_line_dc(dst, p32x, pmd, inv_bit, MD_LAYER_CODE_H32);
+ } else
+ do_line_dc(dst, p32x, pmd, inv_bit,);
return;
}
unsigned char *p32xb = (void *)p32x;
if (Pico32x.vdp_regs[2 / 2] & P32XV_SFT)
p32xb++;
- do_line_pp(pd, p32xb, pmd,);
+ if (h32) {
+ do_line_pp(dst, p32xb, pmd, MD_LAYER_CODE_H32);
+ } else
+ do_line_pp(dst, p32xb, pmd,);
}
else { // Run Length Mode
- do_line_rl(pd, p32x, pmd,);
+ if (h32) {
+ do_line_rl(dst, p32x, pmd, MD_LAYER_CODE_H32);
+ } else
+ do_line_rl(dst, p32x, pmd,);
}
}
unsigned short *p32x; \
int lines = (lines_sft_offs >> 16) & 0xff; \
int l; \
+ if (lines_sft_offs & (2<<8)) pmd += H32_OFFSET; \
(void)palmd; \
for (l = 0; l < lines; l++, pmd += 8) { \
pre_code; \
unsigned char *p32x; \
int lines = (lines_sft_offs >> 16) & 0xff; \
int l; \
+ if (lines_sft_offs & (2<<8)) pmd += H32_OFFSET; \
(void)palmd; \
for (l = 0; l < lines; l++, pmd += 8) { \
pre_code; \
unsigned short *p32x; \
int lines = (lines_sft_offs >> 16) & 0xff; \
int l; \
+ if (lines_sft_offs & (2<<8)) pmd += H32_OFFSET; \
(void)palmd; \
for (l = 0; l < lines; l++, pmd += 8) { \
pre_code; \
make_do_loop(,,,)
make_do_loop(_md, , , MD_LAYER_CODE)
+make_do_loop(_h32, , , MD_LAYER_CODE_H32)
make_do_loop(_scan, PICOSCAN_PRE, PICOSCAN_POST, )
+make_do_loop(_scan_h32, PICOSCAN_PRE, PICOSCAN_POST, MD_LAYER_CODE_H32)
make_do_loop(_scan_md, PICOSCAN_PRE, PICOSCAN_POST, MD_LAYER_CODE)
typedef void (*do_loop_func)(unsigned short *dst, unsigned short *dram, unsigned lines, int mdbg);
-enum { DO_LOOP, DO_LOOP_MD, DO_LOOP_SCAN, DO_LOOP_MD_SCAN };
+enum { DO_LOOP, DO_LOOP_H32, DO_LOOP_MD, DO_LOOP_SCAN, DO_LOOP_H32_SCAN, DO_LOOP_MD_SCAN };
-static const do_loop_func do_loop_dc_f[] = { do_loop_dc, do_loop_dc_md, do_loop_dc_scan, do_loop_dc_scan_md };
-static const do_loop_func do_loop_pp_f[] = { do_loop_pp, do_loop_pp_md, do_loop_pp_scan, do_loop_pp_scan_md };
-static const do_loop_func do_loop_rl_f[] = { do_loop_rl, do_loop_rl_md, do_loop_rl_scan, do_loop_rl_scan_md };
+static const do_loop_func do_loop_dc_f[] = { do_loop_dc, do_loop_dc_h32, do_loop_dc_md, do_loop_dc_scan, do_loop_dc_scan_h32, do_loop_dc_scan_md };
+static const do_loop_func do_loop_pp_f[] = { do_loop_pp, do_loop_pp_h32, do_loop_pp_md, do_loop_pp_scan, do_loop_pp_scan_h32, do_loop_pp_scan_md };
+static const do_loop_func do_loop_rl_f[] = { do_loop_rl, do_loop_rl_h32, do_loop_rl_md, do_loop_rl_scan, do_loop_rl_scan_h32, do_loop_rl_scan_md };
void PicoDraw32xLayer(int offs, int lines, int md_bg)
{
}
do_it:
+ // In 8bit modes MD+32X layers are merged together in 32X rendering, while in
+ // 16bit mode the MD layer is directly created in the target buffer and the
+ // 32X layer is overlaid onto that.
if (Pico32xDrawMode == PDM32X_BOTH)
which_func = have_scan ? DO_LOOP_MD_SCAN : DO_LOOP_MD;
+ else if (!(Pico.video.reg[12] & 1)) // H32, mind 4 px offset
+ which_func = have_scan ? DO_LOOP_H32_SCAN : DO_LOOP_H32;
else
which_func = have_scan ? DO_LOOP_SCAN : DO_LOOP;
lines_sft_offs = (Pico32x.sync_line << 24) | (lines << 16) | offs;
if (Pico32x.vdp_regs[2 / 2] & P32XV_SFT)
lines_sft_offs |= 1 << 8;
+ if (!(Pico.video.reg[12] & 1)) // offset flag for H32
+ lines_sft_offs |= 2 << 8;
do_loop[which_func](Pico.est.DrawLineDest, dram, lines_sft_offs, md_bg);
}
// we'll draw via FinalizeLine32xRGB555 (rare)
Pico32xDrawMode = PDM32X_OFF;
else
- // in RGB555 mode the 32x layer is drawn over the MD layer, in the other
+ // in RGB555 mode the 32x layer is overlaid on the MD layer, in the other
// modes 32x and MD layer are merged together by the 32x renderer
Pico32xDrawMode = (which == PDF_RGB555) ? PDM32X_32X_ONLY : PDM32X_BOTH;
}
@ direct color
@ unsigned short *dst, unsigned short *dram, int lines_sft_offs, int mdbg
-.macro make_do_loop_dc name call_scan do_md
+.macro make_do_loop_dc name call_scan do_md do_h32
.global \name
\name:
stmfd sp!, {r4-r11,lr}
mov r5, #328
mov r3, r3, lsl #26 @ mdbg << 26
mla r11,r4,r5,r11 @ r11 = pmd = PicoDraw2FB + offs*328: md data
+ tst r2, #(2<<8)
+ addne r11,r11,#4 @ H32 offset
tst r10,#P32XV_PRI
movne r10,#0
moveq r10,#0x8000 @ r10 = !inv_bit
orr r12,r12, lr, lsl #16 @ combine 2 pixels to optimize memory bandwidth
str r12,[r0], #4 @ (no write combining on ARM9)
+.else
+.if \do_h32
+ cmp r3, r12, lsl #26 @ replace MD bg info into prio bit
+ tstne r7, #0x20<<16
+ ldrneh r7, [r0, #8]
+ moveq r7, r7, lsr #16
+
+ cmp r3, lr, lsl #26
+ tstne r8, #0x20<<16
+ ldrneh r8, [r0, #10]
+ moveq r8, r8, lsr #16
+
+ orr r7, r7, r8, lsl #16 @ combine 2 pixels to optimize memory bandwidth
+ str r7, [r0], #4 @ (no write combining on ARM9)
.else
cmp r3, r12, lsl #26 @ replace MD bg info into prio bit
tstne r7, #0x20<<16
streqh r8, [r0, #2]
add r0, r0, #4 @ store 32x pixels if 32X prio or MD bg
+.endif
.endif
b 2b @ loop_inner
moveq lr, r7
orr r12,r12,lr, lsl #16 @ combine 2 pixels to optimize memory bandwidth
str r12,[r0], #4 @ (no write combining on ARM9)
+.else
+.if \do_h32
+ add r0, r0, #4
+ cmp r3, r12,lsl #26 @ MD pixel 0 has bg?
+ ldrneh lr, [r0, #4]
+ streqh r7, [r0, #-4]
+ strneh lr, [r0, #-4]
+ cmp r3, lr, lsl #26 @ MD pixel 1 has bg?
+ ldrneh lr, [r0, #6]
+ streqh r7, [r0, #-2]
+ strneh lr, [r0, #-2]
.else
add r0, r0, #4
cmp r3, r12,lsl #26 @ MD pixel 0 has bg?
streqh r7, [r0, #-4]
cmp r3, lr, lsl #26 @ MD pixel 1 has bg?
streqh r7, [r0, #-2]
+.endif
.endif
subs r8, r8, #2
bgt 9b @ bg_loop
@ note: this may read a few bytes over the end of PicoDraw2FB and dram,
@ so those should have a bit more alloc'ed than really needed.
@ unsigned short *dst, unsigned short *dram, int lines_sft_offs, int mdbg
-.macro make_do_loop_pp name call_scan do_md
+.macro make_do_loop_pp name call_scan do_md do_h32
.global \name
\name:
stmfd sp!, {r4-r11,lr}
mov r5, #328
mov r3, r3, lsl #26 @ mdbg << 26
mla r11,r4,r5,r11 @ r11 = pmd = PicoDraw2FB + offs*328: md data
+ tst r2, #(2<<8)
+ addne r11,r11,#4 @ H32 offset
call_scan_prep \call_scan lr
mov r4, #0 @ line
orr r7, r7, r8, lsl #16 @ combine 2 pixels to optimize memory bandwidth
str r7, [r0], #4 @ (no write combining on ARM9)
.else
+.if \do_h32
+ cmp r3, r12, lsl #26 @ replace MD bg info into prio bit
+ orreq r7, r7, #0x20
+ cmp r3, lr, lsl #26
+ orreq r8, r8, #0x20
+
+ tst r7, #0x20
+ ldreqh r7, [r0, #8]
+ tst r8, #0x20
+ ldreqh r8, [r0, #10]
+
+ orr r7, r7, r8, lsl #16 @ combine 2 pixels to optimize memory bandwidth
+ str r7, [r0], #4 @ (no write combining on ARM9)
+.else
cmp r3, r12, lsl #26 @ replace MD bg info into prio bit
orreq r7, r7, #0x20
cmp r3, lr, lsl #26
strneh r7, [r0, #-4]
tst r8, #0x20
strneh r8, [r0, #-2]
+.endif
.endif
b 2b @ loop_inner
moveq lr, r7
orr r12,r12,lr, lsl #16 @ combine 2 pixels to optimize memory bandwidth
str r12,[r0], #4 @ (no write combining on ARM9)
+.else
+.if \do_h32
+ add r0, r0, #4
+ cmp r3, r12,lsl #26 @ MD pixel 0 has bg?
+ ldrneh lr, [r0, #4]
+ streqh r7, [r0, #-4]
+ strneh lr, [r0, #-4]
+ cmp r3, lr, lsl #26 @ MD pixel 1 has bg?
+ ldrneh lr, [r0, #6]
+ streqh r7, [r0, #-2]
+ strneh lr, [r0, #-2]
.else
add r0, r0, #4
cmp r3, r12,lsl #26 @ MD pixel 0 has bg?
streqh r7, [r0, #-4]
cmp r3, lr, lsl #26 @ MD pixel 1 has bg?
streqh r7, [r0, #-2]
+.endif
.endif
subs r8, r8, #2
bgt 9b @ bg_loop
@ run length
@ unsigned short *dst, unsigned short *dram, int lines_sft_offs, int mdbg
-.macro make_do_loop_rl name call_scan do_md
+.macro make_do_loop_rl name call_scan do_md do_h32
.global \name
\name:
stmfd sp!, {r4-r11,lr}
mov r5, #328
mov r3, r3, lsl #26 @ mdbg << 26
mla r11,r4,r5,r11 @ r11 = pmd = PicoDraw2FB + offs*328: md data
+ tst r2, #(2<<8)
+ addne r11,r11,#4 @ H32 offset
call_scan_prep \call_scan lr
mov r4, #0 @ line
ldrneh r12,[r9, r7] @ t = palmd[*pmd]
streqh lr, [r0], #2
strneh r12,[r0], #2 @ *dst++ = t
+.else
+.if \do_h32
+ ldrneh lr, [r0, #8]
+ strh lr, [r0], #2
.else
streqh lr, [r0]
add r0, r0, #2
+.endif
.endif
subs r8, r8, #0x100
bge 3b @ loop_innermost
.endm
-make_do_loop_dc do_loop_dc, 0, 0
-make_do_loop_dc do_loop_dc_md, 0, 1
-make_do_loop_dc do_loop_dc_scan, 1, 0
-make_do_loop_dc do_loop_dc_scan_md, 1, 1
+make_do_loop_dc do_loop_dc, 0, 0, 0
+make_do_loop_dc do_loop_dc_h32, 0, 0, 1
+make_do_loop_dc do_loop_dc_md, 0, 1, 0
+make_do_loop_dc do_loop_dc_scan, 1, 0, 0
+make_do_loop_dc do_loop_dc_scan_h32,1, 0, 1
+make_do_loop_dc do_loop_dc_scan_md, 1, 1, 0
.pool
-make_do_loop_pp do_loop_pp, 0, 0
-make_do_loop_pp do_loop_pp_md, 0, 1
-make_do_loop_pp do_loop_pp_scan, 1, 0
-make_do_loop_pp do_loop_pp_scan_md, 1, 1
+make_do_loop_pp do_loop_pp, 0, 0, 0
+make_do_loop_pp do_loop_pp_h32, 0, 0, 1
+make_do_loop_pp do_loop_pp_md, 0, 1, 0
+make_do_loop_pp do_loop_pp_scan, 1, 0, 0
+make_do_loop_pp do_loop_pp_scan_h32,1, 0, 1
+make_do_loop_pp do_loop_pp_scan_md, 1, 1, 0
.pool
-make_do_loop_rl do_loop_rl, 0, 0
-make_do_loop_rl do_loop_rl_md, 0, 1
-make_do_loop_rl do_loop_rl_scan, 1, 0
-make_do_loop_rl do_loop_rl_scan_md, 1, 1
+make_do_loop_rl do_loop_rl, 0, 0, 0
+make_do_loop_rl do_loop_rl_h32, 0, 0, 1
+make_do_loop_rl do_loop_rl_md, 0, 1, 0
+make_do_loop_rl do_loop_rl_scan, 1, 0, 0
+make_do_loop_rl do_loop_rl_scan_h32,1, 0, 1
+make_do_loop_rl do_loop_rl_scan_md, 1, 1, 0
.pool
@ vim:filetype=armasm