+static void blt_tr(void *dst, void *src, u32 trc, int w)
+{
+ u16 *d = (u16 *)((long)dst & ~1);
+ u16 *s = (u16 *)((long)src & ~1);
+
+ // XXX: optimize
+ for (; w > 0; d++, s++, w--)
+ if (*s != trc)
+ *d = *s;
+}
+
+#define dump_blitter() \
+{ \
+ u32 *r = &blitter.dstctrl; \
+ int i; \
+ for (i = 0; i < 4*4; i++, r++) { \
+ printf("%08x ", *r); \
+ if ((i & 3) == 3) \
+ printf("\n"); \
+ } \
+}
+
+static void *uppermem_lookup(u32 addr, u8 **mem_end)
+{
+ struct uppermem_block *ub;
+
+ for (ub = upper_mem; ub != NULL; ub = ub->next) {
+ if (ub->addr <= addr && addr < ub->addr + ub->size) {
+ *mem_end = (u8 *)ub->mem + ub->size;
+ return (u8 *)ub->mem + addr - ub->addr;
+ }
+ }
+
+ return NULL;
+}
+
+static void blitter_do(void)
+{
+ u8 *dst, *dste, *src = NULL, *srce = NULL;
+ int w, h, sstrd, dstrd;
+ int to_screen = 0;
+ u32 bpp, addr;
+
+ w = blitter.size & 0x7ff;
+ h = (blitter.size >> 16) & 0x7ff;
+ sstrd = blitter.srcstride;
+ dstrd = blitter.dststride;
+
+ // XXX: need to confirm this..
+ addr = (blitter.dstaddr & ~3) | ((blitter.dstctrl & 0x1f) >> 3);
+
+ // use dst bpp.. How does it do blits with different src bpp?
+ bpp = (blitter.dstctrl & 0x20) ? 16 : 8;
+
+ // maybe the screen?
+ if (((w == 320 && h == 240) || // blit whole screen
+ (w * h >= 320*240/2)) && // ..or at least half of the area
+ mmsp2.mlc_stl_adr <= addr && addr < mmsp2.mlc_stl_adr + 320*240*2)
+ to_screen = 1;
+
+ dst = uppermem_lookup(addr, &dste);
+
+ // XXX: assume fill if no SRCENB, but it could be pattern blit..
+ if (blitter.srcctrl & SRCCTRL_SRCENB) {
+ if (!(blitter.srcctrl & SRCCTRL_INVIDEO))
+ goto bad_blit;
+
+ addr = (blitter.srcaddr & ~3) | ((blitter.srcctrl & 0x1f) >> 3);
+ src = uppermem_lookup(addr, &srce);
+ if (src == NULL)
+ goto bad_blit;
+
+ if (src + sstrd * h > srce) {
+ err("blit %08x->%08x %dx%d did not fit src\n",
+ blitter.srcaddr, blitter.dstaddr, w, h);
+ h = (srce - src) / sstrd;
+ }
+ }
+
+ if (dst == NULL)
+ goto bad_blit;
+
+ if (dst + dstrd * h > dste) {
+ err("blit %08x->%08x %dx%d did not fit dst\n",
+ blitter.srcaddr, blitter.dstaddr, w, h);
+ h = (dste - dst) / dstrd;
+ }
+
+ if (src != NULL) {
+ // copy
+ if (bpp == 16 && (blitter.ctrl & CTRL_TRANSPARENCYENB)) {
+ u32 trc = blitter.ctrl >> 16;
+ for (; h > 0; h--, dst += dstrd, src += sstrd)
+ blt_tr(dst, src, trc, w);
+ }
+ else {
+ for (; h > 0; h--, dst += dstrd, src += sstrd)
+ memcpy(dst, src, w * bpp / 8);
+ }
+ }
+ else {
+ // fill. Assume the pattern is cleared and bg color is used
+ u32 bgc = blitter.patbackcolor & 0xffff;
+ if (bpp == 16) {
+ for (; h > 0; h--, dst += dstrd)
+ memset16(dst, bgc, w);
+ }
+ else {
+ for (; h > 0; h--, dst += dstrd)
+ memset(dst, bgc, w); // bgc?
+ }
+ }
+
+ if (to_screen)
+ pthread_cond_signal(&fb_cond);
+ return;
+
+bad_blit:
+ err("blit %08x->%08x %dx%d translated to %p->%p\n",
+ blitter.srcaddr, blitter.dstaddr, w, h, src, dst);
+ dump_blitter();
+}
+
+// TODO: hw scaler stuff
+static void mlc_flip(u8 *src, int bpp)
+{
+ u16 *dst = host_screen;
+ u16 *hpal = mmsp2.host_pal;
+ int i, u;
+
+ if (bpp <= 8 && mmsp2.dirty_pal) {
+ u32 *srcp = mmsp2.mlc_stl_pallt_d32;
+ u16 *dstp = hpal;
+
+ for (i = 0; i < 256; i++, srcp++, dstp++) {
+ u32 t = *srcp;
+ *dstp = ((t >> 8) & 0xf800) | ((t >> 5) & 0x07e0) | ((t >> 3) & 0x001f);
+ }
+ mmsp2.dirty_pal = 0;
+ }
+
+ switch (bpp) {
+ case 4:
+ for (i = 0; i < 240; i++, dst += host_stride / 2 - 320) {
+ for (u = 320 / 2; u > 0; u--, src++) {
+ *dst++ = hpal[*src >> 4];
+ *dst++ = hpal[*src & 0x0f];
+ }
+ }
+ break;
+
+ case 8:
+ for (i = 0; i < 240; i++, dst += host_stride / 2 - 320) {
+ for (u = 320 / 4; u > 0; u--) {
+ *dst++ = hpal[*src++];
+ *dst++ = hpal[*src++];
+ *dst++ = hpal[*src++];
+ *dst++ = hpal[*src++];
+ }
+ }
+ break;
+
+ case 16:
+ for (i = 0; i < 240; i++, dst += host_stride / 2, src += 320*2)
+ memcpy(dst, src, 320*2);
+ break;
+
+ case 24:
+ // TODO
+ break;
+ }
+
+ host_screen = host_video_flip();
+}
+
+#define ts_add_nsec(ts, ns) { \
+ ts.tv_nsec += ns; \
+ if (ts.tv_nsec >= 1000000000) { \
+ ts.tv_sec++; \
+ ts.tv_nsec -= 1000000000; \
+ } \
+}
+
+static void *fb_sync_thread(void *arg)