From: kub Date: Mon, 22 Feb 2021 21:25:03 +0000 (+0100) Subject: add big endian platform support X-Git-Tag: v2.00~599 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=57c5a5e5059258d03526e1407f32fb6a0cd9c2e2;p=picodrive.git add big endian platform support --- diff --git a/cpu/cz80/cz80.h b/cpu/cz80/cz80.h index f3f79b39..4404e72e 100644 --- a/cpu/cz80/cz80.h +++ b/cpu/cz80/cz80.h @@ -21,36 +21,38 @@ extern "C" { #endif +#include + /******************************/ /* Compiler dependant defines */ /******************************/ #ifndef UINT8 -#define UINT8 unsigned char +#define UINT8 u8 #endif #ifndef INT8 -#define INT8 signed char +#define INT8 s8 #endif #ifndef UINT16 -#define UINT16 unsigned short +#define UINT16 u16 #endif #ifndef INT16 -#define INT16 signed short +#define INT16 s16 #endif #ifndef UINT32 -#define UINT32 unsigned int +#define UINT32 u32 #endif #ifndef INT32 -#define INT32 signed int +#define INT32 s32 #endif #ifndef FPTR -#define FPTR uintptr_t +#define FPTR uptr #endif /*************************************/ @@ -62,16 +64,16 @@ extern "C" { #define CZ80_FETCH_SFT (16 - CZ80_FETCH_BITS) #define CZ80_FETCH_BANK (1 << CZ80_FETCH_BITS) -#define PICODRIVE_HACKS 1 -#define CZ80_LITTLE_ENDIAN 1 +#define PICODRIVE_HACKS 1 +#define CZ80_LITTLE_ENDIAN CPU_IS_LE #define CZ80_USE_JUMPTABLE 1 -#define CZ80_BIG_FLAGS_ARRAY 1 +#define CZ80_BIG_FLAGS_ARRAY 1 //#ifdef BUILD_CPS1PSP //#define CZ80_ENCRYPTED_ROM 1 //#else #define CZ80_ENCRYPTED_ROM 0 //#endif -#define CZ80_EMULATE_R_EXACTLY 1 +#define CZ80_EMULATE_R_EXACTLY 1 #define zR8(A) (*CPU->pzR8[A]) #define zR16(A) (CPU->pzR16[A]->W) diff --git a/cpu/cz80/cz80macro.h b/cpu/cz80/cz80macro.h index 36fdacb7..5cabba3b 100644 --- a/cpu/cz80/cz80macro.h +++ b/cpu/cz80/cz80macro.h @@ -48,11 +48,7 @@ #define READ_OP() GET_OP(); PC++ #define READ_ARG() (*(UINT8 *)PC++) -#if CZ80_LITTLE_ENDIAN #define READ_ARG16() (*(UINT8 *)PC | (*(UINT8 *)(PC + 1) << 8)); PC += 2 -#else -#define READ_ARG16() (*(UINT8 *)(PC + 1) | (*(UINT8 *)PC << 8)); PC += 2 -#endif //#ifndef BUILD_CPS1PSP //#define READ_MEM8(A) memory_region_cpu2[(A)] @@ -63,11 +59,7 @@ #define READ_MEM8(A) CPU->Read_Byte(A) #endif //#endif -#if CZ80_LITTLE_ENDIAN #define READ_MEM16(A) (READ_MEM8(A) | (READ_MEM8((A) + 1) << 8)) -#else -#define READ_MEM16(A) ((READ_MEM8(A) << 8) | READ_MEM8((A) + 1)) -#endif #if PICODRIVE_HACKS #define WRITE_MEM8(A, D) { \ @@ -82,11 +74,7 @@ #else #define WRITE_MEM8(A, D) CPU->Write_Byte(A, D); #endif -#if CZ80_LITTLE_ENDIAN #define WRITE_MEM16(A, D) { WRITE_MEM8(A, D); WRITE_MEM8((A) + 1, (D) >> 8); } -#else -#define WRITE_MEM16(A, D) { WRITE_MEM8((A) + 1, D); WRITE_MEM8(A, (D) >> 8); } -#endif #define PUSH_16(A) { UINT32 sp; zSP -= 2; sp = zSP; WRITE_MEM16(sp, A); } #define POP_16(A) { UINT32 sp; sp = zSP; A = READ_MEM16(sp); zSP = sp + 2; } diff --git a/cpu/fame/fame.h b/cpu/fame/fame.h index 310efe56..ee1140b8 100644 --- a/cpu/fame/fame.h +++ b/cpu/fame/fame.h @@ -98,44 +98,7 @@ extern "C" { /* Data definition */ /*******************/ -#include -/* -#ifdef u8 -#undef u8 -#endif - -#ifdef s8 -#undef s8 -#endif - -#ifdef u16 -#undef u16 -#endif - -#ifdef s16 -#undef s16 -#endif - -#ifdef u32 -#undef u32 -#endif - -#ifdef s32 -#undef s32 -#endif - -#ifdef uptr -#undef uptr -#endif - -#define u8 unsigned char -#define s8 signed char -#define u16 unsigned short -#define s16 signed short -#define u32 unsigned int -#define s32 signed int -#define uptr uintptr_t -*/ +#include /* typedef unsigned char u8; @@ -148,10 +111,10 @@ typedef signed int s32; typedef union { - u8 B; - s8 SB; - u16 W; - s16 SW; + u8 B[4]; + s8 SB[4]; + u16 W[2]; + s16 SW[2]; u32 D; s32 SD; } famec_union32; diff --git a/cpu/fame/famec.c b/cpu/fame/famec.c index 7167629f..9e832bc4 100644 --- a/cpu/fame/famec.c +++ b/cpu/fame/famec.c @@ -183,19 +183,22 @@ // internals core macros ///////////////////////// +#define XB MEM_LE4(0) +#define XW MEM_LE2(0) + #define DREG(X) (ctx->dreg[(X)].D) #define DREGu32(X) (ctx->dreg[(X)].D) #define DREGs32(X) (ctx->dreg[(X)].SD) -#define DREGu16(X) (ctx->dreg[(X)].W) -#define DREGs16(X) (ctx->dreg[(X)].SW) -#define DREGu8(X) (ctx->dreg[(X)].B) -#define DREGs8(X) (ctx->dreg[(X)].SB) +#define DREGu16(X) (ctx->dreg[(X)].W[XW]) +#define DREGs16(X) (ctx->dreg[(X)].SW[XW]) +#define DREGu8(X) (ctx->dreg[(X)].B[XB]) +#define DREGs8(X) (ctx->dreg[(X)].SB[XB]) #define AREG(X) (ctx->areg[(X)].D) #define AREGu32(X) (ctx->areg[(X)].D) #define AREGs32(X) (ctx->areg[(X)].SD) -#define AREGu16(X) (ctx->areg[(X)].W) -#define AREGs16(X) (ctx->areg[(X)].SW) +#define AREGu16(X) (ctx->areg[(X)].W[XW]) +#define AREGs16(X) (ctx->areg[(X)].SW[XW]) #define ASP (ctx->asp) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 84ac12b5..f658103b 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -2568,6 +2568,28 @@ static void rcache_init(void) // --------------------------------------------------------------- +// swap 32 bit value read from mem in generated code (same as CPU_BE2) +static void emit_le_swap(int cond, int r) +{ +#if CPU_IS_LE + if (cond == -1) + emith_ror(r, r, 16); + else + emith_ror_c(cond, r, r, 16); +#endif +} + +// fix memory byte ptr in generated code (same as MEM_BE2) +static void emit_le_ptr8(int cond, int r) +{ +#if CPU_IS_LE + if (cond == -1) + emith_eor_r_imm_ptr(r, 1); + else + emith_eor_r_imm_ptr_c(cond, r, 1); +#endif +} + // NB may return either REG or TEMP static int emit_get_rbase_and_offs(SH2 *sh2, sh2_reg_e r, int rmode, u32 *offs) { @@ -2608,14 +2630,16 @@ static int emit_get_rbase_and_offs(SH2 *sh2, sh2_reg_e r, int rmode, u32 *offs) // if r is in rcache or needed soon anyway, and offs is relative to region, // and address translation fits in add_ptr_imm (s32), then use rcached const - if (la == (s32)la && !(*offs & ~mask) && rcache_is_cached(r)) { - u32 odd = a & 1; // need to fix odd address for correct byte addressing - la -= (s32)((a & ~mask) - *offs - odd); // diff between reg and memory + if (la == (s32)la && !(((a & mask) + *offs) & ~mask) && rcache_is_cached(r)) { +#if CPU_IS_LE // need to fix odd address for correct byte addressing + if (a & 1) *offs += (*offs&1) ? 2 : -2; +#endif + la -= (s32)((a & ~mask) - *offs); // diff between reg and memory hr = hr2 = rcache_get_reg(r, rmode, NULL); if ((s32)a < 0) emith_uext_ptr(hr2); - if ((la & ~omask) - odd) { + if (la & ~omask) { hr = rcache_get_tmp(); - emith_add_r_r_ptr_imm(hr, hr2, (la & ~omask) - odd); + emith_add_r_r_ptr_imm(hr, hr2, la & ~omask); rcache_free(hr2); } *offs = (la & omask); @@ -2792,9 +2816,9 @@ static int emit_memhandler_read_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, u32 off else hr2 = rcache_get_reg(rd, RC_GR_WRITE, NULL); switch (size & MF_SIZEMASK) { - case 0: emith_read8s_r_r_offs(hr2, hr, offs ^ 1); break; // 8 - case 1: emith_read16s_r_r_offs(hr2, hr, offs); break; // 16 - case 2: emith_read_r_r_offs(hr2, hr, offs); emith_ror(hr2, hr2, 16); break; + case 0: emith_read8s_r_r_offs(hr2, hr, MEM_BE2(offs)); break; // 8 + case 1: emith_read16s_r_r_offs(hr2, hr, offs); break; // 16 + case 2: emith_read_r_r_offs(hr2, hr, offs); emit_le_swap(-1, hr2); break; } rcache_free(hr); if (size & MF_POSTINCR) @@ -5174,7 +5198,7 @@ static void sh2_generate_utils(void) emith_sh2_rcall(arg0, arg1, arg2, arg3); EMITH_SJMP_START(DCOND_CS); emith_and_r_r_c(DCOND_CC, arg0, arg3); - emith_eor_r_imm_ptr_c(DCOND_CC, arg0, 1); + emit_le_ptr8(DCOND_CC, arg0); emith_read8s_r_r_r_c(DCOND_CC, RET_REG, arg2, arg0); emith_ret_c(DCOND_CC); EMITH_SJMP_END(DCOND_CS); @@ -5204,7 +5228,7 @@ static void sh2_generate_utils(void) EMITH_SJMP_START(DCOND_CS); emith_and_r_r_c(DCOND_CC, arg0, arg3); emith_read_r_r_r_c(DCOND_CC, RET_REG, arg2, arg0); - emith_ror_c(DCOND_CC, RET_REG, RET_REG, 16); + emit_le_swap(DCOND_CC, RET_REG); emith_ret_c(DCOND_CC); EMITH_SJMP_END(DCOND_CS); emith_move_r_r_ptr(arg1, CONTEXT_REG); @@ -5221,7 +5245,7 @@ static void sh2_generate_utils(void) emith_abijump_reg_c(DCOND_CS, arg2); EMITH_SJMP_END(DCOND_CC); emith_and_r_r_r(arg1, arg0, arg3); - emith_eor_r_imm_ptr(arg1, 1); + emit_le_ptr8(-1, arg1); emith_read8s_r_r_r(arg1, arg2, arg1); emith_push_ret(arg1); emith_move_r_r_ptr(arg2, CONTEXT_REG); @@ -5257,7 +5281,7 @@ static void sh2_generate_utils(void) EMITH_SJMP_END(DCOND_CC); emith_and_r_r_r(arg1, arg0, arg3); emith_read_r_r_r(arg1, arg2, arg1); - emith_ror(arg1, arg1, 16); + emit_le_swap(-1, arg1); emith_push_ret(arg1); emith_move_r_r_ptr(arg2, CONTEXT_REG); emith_abicall(p32x_sh2_poll_memory32); diff --git a/pico/32x/32x.c b/pico/32x/32x.c index b91310a9..bf3c9ec0 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -123,7 +123,6 @@ void Pico32xStartup(void) emu_32x_startup(); } -#define HWSWAP(x) (((x) << 16) | ((x) >> 16)) void p32x_reset_sh2s(void) { elprintf(EL_32X, "sh2 reset"); @@ -143,9 +142,9 @@ void p32x_reset_sh2s(void) unsigned int vbr; // initial data - idl_src = HWSWAP(*(unsigned int *)(Pico.rom + 0x3d4)) & ~0xf0000000; - idl_dst = HWSWAP(*(unsigned int *)(Pico.rom + 0x3d8)) & ~0xf0000000; - idl_size= HWSWAP(*(unsigned int *)(Pico.rom + 0x3dc)); + idl_src = CPU_BE2(*(unsigned int *)(Pico.rom + 0x3d4)) & ~0xf0000000; + idl_dst = CPU_BE2(*(unsigned int *)(Pico.rom + 0x3d8)) & ~0xf0000000; + idl_size= CPU_BE2(*(unsigned int *)(Pico.rom + 0x3dc)); if (idl_size > Pico.romsize || idl_src + idl_size > Pico.romsize || idl_size > 0x40000 || idl_dst + idl_size > 0x40000 || (idl_src & 3) || (idl_dst & 3)) { elprintf(EL_STATUS|EL_ANOMALY, "32x: invalid initial data ptrs: %06x -> %06x, %06x", @@ -155,7 +154,7 @@ void p32x_reset_sh2s(void) memcpy(Pico32xMem->sdram + idl_dst, Pico.rom + idl_src, idl_size); // VBR - vbr = HWSWAP(*(unsigned int *)(Pico.rom + 0x3e8)); + vbr = CPU_BE2(*(unsigned int *)(Pico.rom + 0x3e8)); sh2_set_vbr(0, vbr); // checksum and M_OK @@ -169,7 +168,7 @@ void p32x_reset_sh2s(void) unsigned int vbr; // GBR/VBR - vbr = HWSWAP(*(unsigned int *)(Pico.rom + 0x3ec)); + vbr = CPU_BE2(*(unsigned int *)(Pico.rom + 0x3ec)); sh2_set_gbr(1, 0x20004000); sh2_set_vbr(1, vbr); // program will set S_OK diff --git a/pico/32x/draw.c b/pico/32x/draw.c index 0277db2c..f5638b96 100644 --- a/pico/32x/draw.c +++ b/pico/32x/draw.c @@ -81,11 +81,11 @@ static void convert_pal555(int invert_prio) int i = 320; \ while (i > 0) { \ for (; i > 0 && (*pmd & 0x3f) == mdbg; pd++, pmd++, i--) { \ - t = pal[*(unsigned char *)((uintptr_t)(p32x++) ^ 1)]; \ + t = pal[*(unsigned char *)(MEM_BE2((uintptr_t)(p32x++)))]; \ *pd = t; \ } \ for (; i > 0 && (*pmd & 0x3f) != mdbg; pd++, pmd++, i--) { \ - t = pal[*(unsigned char *)((uintptr_t)(p32x++) ^ 1)]; \ + t = pal[*(unsigned char *)(MEM_BE2((uintptr_t)(p32x++)))]; \ if (t & PXPRIO) \ *pd = t; \ else \ diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 931e432e..fe9ae57d 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -56,7 +56,7 @@ static void (*m68k_write8_io)(u32 a, u32 d); static void (*m68k_write16_io)(u32 a, u32 d); // addressing byte in 16bit reg -#define REG8IN16(ptr, offs) ((u8 *)ptr)[(offs) ^ 1] +#define REG8IN16(ptr, offs) ((u8 *)ptr)[MEM_BE2(offs)] // poll detection #define POLL_THRESHOLD 5 @@ -1081,7 +1081,7 @@ static void PicoWrite8_32x_on(u32 a, u32 d) // TODO: verify if ((a & 0xfe00) == 0x5200) { // a15200 elprintf(EL_32X|EL_ANOMALY, "m68k 32x PAL w8 [%06x] %02x @%06x", a, d & 0xff, SekPc); - ((u8 *)Pico32xMem->pal)[(a & 0x1ff) ^ 1] = d; + ((u8 *)Pico32xMem->pal)[MEM_BE2(a & 0x1ff)] = d; Pico32x.dirty_pal = 1; return; } @@ -1173,7 +1173,7 @@ u32 PicoRead8_32x(u32 a) if (PicoIn.opt & POPT_EN_32X) { if ((a & 0xffc0) == 0x5100) { // a15100 // regs are always readable - d = ((u8 *)Pico32x.regs)[(a & 0x3f) ^ 1]; + d = ((u8 *)Pico32x.regs)[MEM_BE2(a & 0x3f)]; goto out; } @@ -1236,7 +1236,7 @@ void PicoWrite8_32x(u32 a, u32 d) // allow only COMM for now if ((a & 0x30) == 0x20) { u8 *r8 = (u8 *)r; - r8[a ^ 1] = d; + r8[MEM_BE2(a)] = d; } return; } @@ -1275,7 +1275,7 @@ void PicoWrite16_32x(u32 a, u32 d) #define sh2_write8_dramN(p, a, d) \ if ((d & 0xff) != 0) { \ u8 *dram = (u8 *)p; \ - dram[(a & 0x1ffff) ^ 1] = d; \ + dram[MEM_BE2(a & 0x1ffff)] = d; \ } static void m68k_write8_dram0_ow(u32 a, u32 d) @@ -1315,7 +1315,7 @@ static void m68k_write16_dram1_ow(u32 a, u32 d) static void PicoWrite8_hint(u32 a, u32 d) { if ((a & 0xfffc) == 0x0070) { - Pico32xMem->m68k_rom[a ^ 1] = d; + Pico32xMem->m68k_rom[MEM_BE2(a)] = d; return; } @@ -1468,9 +1468,9 @@ static u32 REGPARM(2) sh2_read8_cs0(u32 a, SH2 *sh2) // TODO: mirroring? if (!sh2->is_slave && a < sizeof(Pico32xMem->sh2_rom_m)) - d = Pico32xMem->sh2_rom_m.b[a ^ 1]; + d = Pico32xMem->sh2_rom_m.b[MEM_BE2(a)]; else if (sh2->is_slave && a < sizeof(Pico32xMem->sh2_rom_s)) - d = Pico32xMem->sh2_rom_s.b[a ^ 1]; + d = Pico32xMem->sh2_rom_s.b[MEM_BE2(a)]; else d = sh2_read8_unmapped(a, sh2); goto out; @@ -1493,7 +1493,7 @@ static u32 REGPARM(2) sh2_read8_rom(u32 a, SH2 *sh2) { u32 bank = carthw_ssf2_banks[(a >> 19) & 7] << 19; s8 *p = sh2->p_rom; - return p[(bank + (a & 0x7ffff)) ^ 1]; + return p[MEM_BE2(bank + (a & 0x7ffff))]; } // read16 @@ -1569,7 +1569,7 @@ static u32 REGPARM(2) sh2_read32_rom(u32 a, SH2 *sh2) u32 bank = carthw_ssf2_banks[(a >> 19) & 7] << 19; u32 *p = sh2->p_rom; u32 d = p[(bank + (a & 0x7fffc)) / 4]; - return (d << 16) | (d >> 16); + return CPU_BE2(d); } // writes @@ -1644,7 +1644,7 @@ static void REGPARM(3) sh2_write8_cs0(u32 a, u32 d, SH2 *sh2) if ((a & 0x3fe00) == 0x4200) { sh2->poll_cnt = 0; - ((u8 *)Pico32xMem->pal)[(a & 0x1ff) ^ 1] = d; + ((u8 *)Pico32xMem->pal)[MEM_BE2(a & 0x1ff)] = d; Pico32x.dirty_pal = 1; goto out; } @@ -1667,7 +1667,7 @@ static void REGPARM(3) sh2_write8_dram(u32 a, u32 d, SH2 *sh2) static void REGPARM(3) sh2_write8_sdram(u32 a, u32 d, SH2 *sh2) { - u32 a1 = (a & 0x3ffff) ^ 1; + u32 a1 = MEM_BE2(a & 0x3ffff); ((u8 *)sh2->p_sdram)[a1] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_ram; @@ -1679,7 +1679,7 @@ static void REGPARM(3) sh2_write8_sdram(u32 a, u32 d, SH2 *sh2) static void REGPARM(3) sh2_write8_da(u32 a, u32 d, SH2 *sh2) { - u32 a1 = (a & 0xfff) ^ 1; + u32 a1 = MEM_BE2(a & 0xfff); sh2->data_array[a1] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_da; @@ -1805,10 +1805,10 @@ static void REGPARM(3) sh2_write32_cs0(u32 a, u32 d, SH2 *sh2) #define sh2_write32_dramN(p, a, d) \ u32 *pd = &((u32 *)p)[(a & 0x1ffff) / 4]; \ if (!(a & 0x20000)) { \ - *pd = (d << 16) | (d >> 16); \ + *pd = CPU_BE2(d); \ } else { \ /* overwrite */ \ - u32 v = *pd, m = 0; d = (d << 16) | (d >> 16) ; \ + u32 v = *pd, m = 0; d = CPU_BE2(d); \ if (!(d & 0x000000ff)) m |= 0x000000ff; \ if (!(d & 0x0000ff00)) m |= 0x0000ff00; \ if (!(d & 0x00ff0000)) m |= 0x00ff0000; \ @@ -1829,7 +1829,7 @@ static void REGPARM(3) sh2_write32_dram(u32 a, u32 d, SH2 *sh2) static void REGPARM(3) sh2_write32_sdram(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0x3fffc; - *(u32 *)((char*)sh2->p_sdram + a1) = (d << 16) | (d >> 16); + *(u32 *)((char*)sh2->p_sdram + a1) = CPU_BE2(d); #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_ram; u32 t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; @@ -1842,7 +1842,7 @@ static void REGPARM(3) sh2_write32_sdram(u32 a, u32 d, SH2 *sh2) static void REGPARM(3) sh2_write32_da(u32 a, u32 d, SH2 *sh2) { u32 a1 = a & 0xffc; - *((u32 *)sh2->data_array + a1/4) = (d << 16) | (d >> 16); + *((u32 *)sh2->data_array + a1/4) = CPU_BE2(d); #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_da; u32 t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; @@ -1876,7 +1876,7 @@ u32 REGPARM(2) p32x_sh2_read8(u32 a, SH2 *sh2) sh2_map += SH2MAP_ADDR2OFFS_R(a); p = sh2_map->addr; if (!map_flag_set(p)) - return *(s8 *)((p << 1) + ((a & sh2_map->mask) ^ 1)); + return *(s8 *)((p << 1) + MEM_BE2(a & sh2_map->mask)); else return ((sh2_read_handler *)(p << 1))(a, sh2); } @@ -1903,7 +1903,7 @@ u32 REGPARM(2) p32x_sh2_read32(u32 a, SH2 *sh2) p = sh2_map->addr; if (!map_flag_set(p)) { u32 *pd = (u32 *)((p << 1) + (a & sh2_map->mask)); - return (*pd << 16) | (*pd >> 16); + return CPU_BE2(*pd); } else return ((sh2_read_handler *)(p << 1))(a, sh2); } @@ -1979,7 +1979,7 @@ int p32x_sh2_memcpy(u32 dst, u32 src, int count, int size, SH2 *sh2) // align dst to halfword if (dst & 1) { - p32x_sh2_write8(dst, *(u8 *)((uptr)ps ^ 1), sh2); + p32x_sh2_write8(dst, *(u8 *)MEM_BE2((uptr)ps), sh2); ps++, dst++, len --; } @@ -1990,7 +1990,11 @@ int p32x_sh2_memcpy(u32 dst, u32 src, int count, int size, SH2 *sh2) u16 dl, dh = *sp++; for (i = 0; i < (len & ~1); i += 2, dst += 2, sp++) { dl = dh, dh = *sp; +#if CPU_IS_LE p32x_sh2_write16(dst, (dh >> 8) | (dl << 8), sh2); +#else + p32x_sh2_write16(dst, (dl >> 8) | (dh << 8), sh2); +#endif } if (len & 1) p32x_sh2_write8(dst, dh, sh2); @@ -2014,7 +2018,7 @@ int p32x_sh2_memcpy(u32 dst, u32 src, int count, int size, SH2 *sh2) u32 d; for (i = 0; i < (len & ~3); i += 4, dst += 4, sp += 2) { d = *(u32 *)sp; - p32x_sh2_write32(dst, (d << 16) | (d >> 16), sh2); + p32x_sh2_write32(dst, CPU_BE2(d), sh2); } } if (len & 2) { @@ -2022,7 +2026,7 @@ int p32x_sh2_memcpy(u32 dst, u32 src, int count, int size, SH2 *sh2) dst += 2; } if (len & 1) - p32x_sh2_write8(dst, *sp >> 8, sh2); + p32x_sh2_write8(dst, ((u8 *)sp)[MEM_BE2(0)], sh2); } return count; @@ -2137,7 +2141,6 @@ static const u16 ssh2_code[] = { 0x2400, 0x0018, // 23c _start_cd }; -#define HWSWAP(x) (((u16)(x) << 16) | ((x) >> 16)) static void get_bios(void) { u16 *ps; @@ -2167,7 +2170,7 @@ static void get_bios(void) ps = (u16 *)Pico32xMem->m68k_rom; pl = (u32 *)ps; for (i = 1; i < 0xc0/4; i++) - pl[i] = HWSWAP(0x880200 + (i - 1) * 6); + pl[i] = CPU_BE2(0x880200 + (i - 1) * 6); pl[0x70/4] = 0; // fill with nops @@ -2197,12 +2200,12 @@ static void get_bios(void) // fill exception vector table to our trap address for (i = 0; i < 128; i++) - pl[i] = HWSWAP(0x200); + pl[i] = CPU_BE2(0x200); // start - pl[0] = pl[2] = HWSWAP(0x204); + pl[0] = pl[2] = CPU_BE2(0x204); // reset SP - pl[1] = pl[3] = HWSWAP(0x6040000); + pl[1] = pl[3] = CPU_BE2(0x6040000); // startup code memcpy(&Pico32xMem->sh2_rom_m.b[0x200], msh2_code, sizeof(msh2_code)); @@ -2218,12 +2221,12 @@ static void get_bios(void) // fill exception vector table to our trap address for (i = 0; i < 128; i++) - pl[i] = HWSWAP(0x200); + pl[i] = CPU_BE2(0x200); // start - pl[0] = pl[2] = HWSWAP(0x204); + pl[0] = pl[2] = CPU_BE2(0x204); // reset SP - pl[1] = pl[3] = HWSWAP(0x603f800); + pl[1] = pl[3] = CPU_BE2(0x603f800); // startup code memcpy(&Pico32xMem->sh2_rom_s.b[0x200], ssh2_code, sizeof(ssh2_code)); diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index a355f759..ae9474d2 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -290,7 +290,7 @@ u32 REGPARM(2) sh2_peripheral_read16(u32 a, SH2 *sh2) u32 d; a &= 0x1fe; - d = r[(a / 2) ^ 1]; + d = r[MEM_BE2(a / 2)]; elprintf_sh2(sh2, EL_32XP, "peri r16 [%08x] %04x @%06x", a | ~0x1ff, d, sh2_pc(sh2)); @@ -420,7 +420,7 @@ void REGPARM(3) sh2_peripheral_write16(u32 a, u32 d, SH2 *sh2) return; } - r[(a / 2) ^ 1] = d; + r[MEM_BE2(a / 2)] = d; if ((a & 0x1c0) == 0x140) p32x_sh2_poll_event(sh2, SH2_STATE_CPOLL, SekCyclesDone()); } diff --git a/pico/cart.c b/pico/cart.c index 17350046..9d35c04c 100644 --- a/pico/cart.c +++ b/pico/cart.c @@ -184,6 +184,9 @@ zip_failed: if (fread(&cso->header, 1, sizeof(cso->header), f) != sizeof(cso->header)) goto cso_failed; + cso->header.block_size = CPU_LE4(cso->header.block_size); + cso->header.total_bytes = CPU_LE4(cso->header.total_bytes); + cso->header.total_bytes_high = CPU_LE4(cso->header.total_bytes_high); if (strncmp(cso->header.magic, "CISO", 4) != 0) { elprintf(EL_STATUS, "cso: bad header"); @@ -453,6 +456,7 @@ int pm_close(pm_file *fp) // byteswap, data needs to be int aligned, src can match dst void Byteswap(void *dst, const void *src, int len) { +#if CPU_IS_LE const unsigned int *ps = src; unsigned int *pd = dst; int i, m; @@ -465,14 +469,15 @@ void Byteswap(void *dst, const void *src, int len) unsigned int t = ps[i]; pd[i] = ((t & m) << 8) | ((t & ~m) >> 8); } +#endif } // Interleve a 16k block and byteswap static int InterleveBlock(unsigned char *dest,unsigned char *src) { int i=0; - for (i=0;i<0x2000;i++) dest[(i<<1) ]=src[ i]; // Odd - for (i=0;i<0x2000;i++) dest[(i<<1)+1]=src[0x2000+i]; // Even + for (i=0;i<0x2000;i++) dest[(i<<1)+MEM_BE2(1)]=src[ i]; // Odd + for (i=0;i<0x2000;i++) dest[(i<<1)+MEM_BE2(0)]=src[0x2000+i]; // Even return 0; } @@ -615,7 +620,7 @@ int PicoCartInsert(unsigned char *rom, unsigned int romsize, const char *carthw_ // This will hang the emu, but will prevent nasty crashes. // note: 4 bytes are padded to every ROM if (rom != NULL) - *(unsigned long *)(rom+romsize) = 0xFFFE4EFA; // 4EFA FFFE byteswapped + *(unsigned long *)(rom+romsize) = CPU_BE2(0x4EFAFFFE); Pico.rom=rom; Pico.romsize=romsize; @@ -714,7 +719,7 @@ static int rom_strcmp(int rom_offset, const char *s1) if (rom_offset + len > Pico.romsize) return 0; for (i = 0; i < len; i++) - if (s1[i] != s_rom[(i + rom_offset) ^ 1]) + if (s1[i] != s_rom[MEM_BE2(i + rom_offset)]) return 1; return 0; } diff --git a/pico/carthw/carthw.c b/pico/carthw/carthw.c index 2fed8220..3b8e8d43 100644 --- a/pico/carthw/carthw.c +++ b/pico/carthw/carthw.c @@ -395,7 +395,7 @@ static u32 carthw_pier_prot_read8(u32 a) } elprintf(EL_UIO, "pier r8 [%06x] @%06x", a, SekPc); - return Pico.rom[(a & 0x7fff) ^ 1]; + return Pico.rom[MEM_BE2(a & 0x7fff)]; } static void carthw_pier_mem_setup(void) diff --git a/pico/cd/genplus_macros.h b/pico/cd/genplus_macros.h index 85da416b..a665e5c2 100644 --- a/pico/cd/genplus_macros.h +++ b/pico/cd/genplus_macros.h @@ -12,8 +12,8 @@ #define int16 s16 #define int32 s32 -#define READ_BYTE(BASE, ADDR) (BASE)[(ADDR)^1] -#define WRITE_BYTE(BASE, ADDR, VAL) (BASE)[(ADDR)^1] = (VAL) +#define READ_BYTE(BASE, ADDR) (BASE)[MEM_BE2(ADDR)] +#define WRITE_BYTE(BASE, ADDR, VAL) (BASE)[MEM_BE2(ADDR)] = (VAL) #define load_param(param, size) \ memcpy(param, &state[bufferptr], size); \ diff --git a/pico/cd/memory.c b/pico/cd/memory.c index e8a025c1..7cf9ffd5 100644 --- a/pico/cd/memory.c +++ b/pico/cd/memory.c @@ -560,13 +560,13 @@ write_comm: static u32 PicoReadM68k8_cell0(u32 a) { a = (a&3) | (cell_map(a >> 2) << 2); // cell arranged - return Pico_mcd->word_ram1M[0][a ^ 1]; + return Pico_mcd->word_ram1M[0][MEM_BE2(a)]; } static u32 PicoReadM68k8_cell1(u32 a) { a = (a&3) | (cell_map(a >> 2) << 2); - return Pico_mcd->word_ram1M[1][a ^ 1]; + return Pico_mcd->word_ram1M[1][MEM_BE2(a)]; } static u32 PicoReadM68k16_cell0(u32 a) @@ -584,13 +584,13 @@ static u32 PicoReadM68k16_cell1(u32 a) static void PicoWriteM68k8_cell0(u32 a, u32 d) { a = (a&3) | (cell_map(a >> 2) << 2); - Pico_mcd->word_ram1M[0][a ^ 1] = d; + Pico_mcd->word_ram1M[0][MEM_BE2(a)] = d; } static void PicoWriteM68k8_cell1(u32 a, u32 d) { a = (a&3) | (cell_map(a >> 2) << 2); - Pico_mcd->word_ram1M[1][a ^ 1] = d; + Pico_mcd->word_ram1M[1][MEM_BE2(a)] = d; } static void PicoWriteM68k16_cell0(u32 a, u32 d) @@ -754,7 +754,7 @@ static void s68k_unmapped_write16(u32 a, u32 d) static void PicoWriteS68k8_prgwp(u32 a, u32 d) { if (a >= (Pico_mcd->s68k_regs[2] << 9)) - Pico_mcd->prg_ram[a ^ 1] = d; + Pico_mcd->prg_ram[MEM_BE2(a)] = d; } static void PicoWriteS68k16_prgwp(u32 a, u32 d) @@ -768,7 +768,7 @@ static void PicoWriteS68k16_prgwp(u32 a, u32 d) // decode (080000 - 0bffff, in 1M mode) static u32 PicoReadS68k8_dec0(u32 a) { - u32 d = Pico_mcd->word_ram1M[0][((a >> 1) ^ 1) & 0x1ffff]; + u32 d = Pico_mcd->word_ram1M[0][MEM_BE2(a >> 1) & 0x1ffff]; if (a & 1) d &= 0x0f; else @@ -778,7 +778,7 @@ static u32 PicoReadS68k8_dec0(u32 a) static u32 PicoReadS68k8_dec1(u32 a) { - u32 d = Pico_mcd->word_ram1M[1][((a >> 1) ^ 1) & 0x1ffff]; + u32 d = Pico_mcd->word_ram1M[1][MEM_BE2(a >> 1) & 0x1ffff]; if (a & 1) d &= 0x0f; else @@ -788,7 +788,7 @@ static u32 PicoReadS68k8_dec1(u32 a) static u32 PicoReadS68k16_dec0(u32 a) { - u32 d = Pico_mcd->word_ram1M[0][((a >> 1) ^ 1) & 0x1ffff]; + u32 d = Pico_mcd->word_ram1M[0][MEM_BE2(a >> 1) & 0x1ffff]; d |= d << 4; d &= ~0xf0; return d; @@ -796,7 +796,7 @@ static u32 PicoReadS68k16_dec0(u32 a) static u32 PicoReadS68k16_dec1(u32 a) { - u32 d = Pico_mcd->word_ram1M[1][((a >> 1) ^ 1) & 0x1ffff]; + u32 d = Pico_mcd->word_ram1M[1][MEM_BE2(a >> 1) & 0x1ffff]; d |= d << 4; d &= ~0xf0; return d; @@ -806,7 +806,7 @@ static u32 PicoReadS68k16_dec1(u32 a) #define mk_decode_w8(bank) \ static void PicoWriteS68k8_dec_m0b##bank(u32 a, u32 d) \ { \ - u8 *pd = &Pico_mcd->word_ram1M[bank][((a >> 1) ^ 1) & 0x1ffff]; \ + u8 *pd = &Pico_mcd->word_ram1M[bank][MEM_BE2(a >> 1) & 0x1ffff];\ \ if (!(a & 1)) \ *pd = (*pd & 0x0f) | (d << 4); \ @@ -816,7 +816,7 @@ static void PicoWriteS68k8_dec_m0b##bank(u32 a, u32 d) \ \ static void PicoWriteS68k8_dec_m1b##bank(u32 a, u32 d) \ { \ - u8 *pd = &Pico_mcd->word_ram1M[bank][((a >> 1) ^ 1) & 0x1ffff]; \ + u8 *pd = &Pico_mcd->word_ram1M[bank][MEM_BE2(a >> 1) & 0x1ffff];\ u8 mask = (a & 1) ? 0x0f : 0xf0; \ \ if (!(*pd & mask) && (d & 0x0f)) /* underwrite */ \ @@ -835,7 +835,7 @@ mk_decode_w8(1) #define mk_decode_w16(bank) \ static void PicoWriteS68k16_dec_m0b##bank(u32 a, u32 d) \ { \ - u8 *pd = &Pico_mcd->word_ram1M[bank][((a >> 1) ^ 1) & 0x1ffff]; \ + u8 *pd = &Pico_mcd->word_ram1M[bank][MEM_BE2(a >> 1) & 0x1ffff];\ \ d &= 0x0f0f; \ *pd = d | (d >> 4); \ @@ -843,7 +843,7 @@ static void PicoWriteS68k16_dec_m0b##bank(u32 a, u32 d) \ \ static void PicoWriteS68k16_dec_m1b##bank(u32 a, u32 d) \ { \ - u8 *pd = &Pico_mcd->word_ram1M[bank][((a >> 1) ^ 1) & 0x1ffff]; \ + u8 *pd = &Pico_mcd->word_ram1M[bank][MEM_BE2(a >> 1) & 0x1ffff];\ \ d &= 0x0f0f; /* underwrite */ \ if (!(*pd & 0xf0)) *pd |= d >> 4; \ @@ -852,7 +852,7 @@ static void PicoWriteS68k16_dec_m1b##bank(u32 a, u32 d) \ \ static void PicoWriteS68k16_dec_m2b##bank(u32 a, u32 d) \ { \ - u8 *pd = &Pico_mcd->word_ram1M[bank][((a >> 1) ^ 1) & 0x1ffff]; \ + u8 *pd = &Pico_mcd->word_ram1M[bank][MEM_BE2(a >> 1) & 0x1ffff];\ \ d &= 0x0f0f; /* overwrite */ \ d |= d >> 4; \ diff --git a/pico/draw.c b/pico/draw.c index 513bd665..a2e82c06 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -331,7 +331,7 @@ static void DrawStrip(struct TileStrip *ts, int lflags, int cellskip) pal = ((code>>9)&0x30) | sh; // shadow - pack = *(u32 *)(PicoMem.vram + addr); + pack = CPU_LE2(*(u32 *)(PicoMem.vram + addr)); if (!pack) blank = code; } @@ -420,7 +420,7 @@ static void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) } pack = (code & 0x1000 ? ty^0xe : ty); // Y-flip - pack = *(u32 *)(PicoMem.vram + addr+pack); + pack = CPU_LE2(*(u32 *)(PicoMem.vram + addr+pack)); if (!pack) blank = code; @@ -480,7 +480,7 @@ void DrawStripInterlace(struct TileStrip *ts, int plane_sh) pal = ((code>>9)&0x30) | sh; // shadow - pack = *(u32 *)(PicoMem.vram + addr); + pack = CPU_LE2(*(u32 *)(PicoMem.vram + addr)); if (!pack) blank = code; } @@ -630,7 +630,7 @@ static void DrawWindow(int tstart, int tend, int prio, int sh, addr=(code&0x7ff)<<4; if (code&0x1000) addr+=14-ty; else addr+=ty; // Y-flip - pack = *(u32 *)(PicoMem.vram + addr); + pack = CPU_LE2(*(u32 *)(PicoMem.vram + addr)); if (!pack) { blank = code; continue; @@ -672,7 +672,7 @@ static void DrawWindow(int tstart, int tend, int prio, int sh, addr=(code&0x7ff)<<4; if (code&0x1000) addr+=14-ty; else addr+=ty; // Y-flip - pack = *(u32 *)(PicoMem.vram + addr); + pack = CPU_LE2(*(u32 *)(PicoMem.vram + addr)); if (!pack) { blank = code; continue; @@ -851,7 +851,7 @@ static void DrawSprite(u32 *sprite, int sh, int w) if(sx<=0) continue; if(sx>=328) break; // Offscreen - pack = *(u32 *)(PicoMem.vram + (tile & 0x7fff)); + pack = CPU_LE2(*(u32 *)(PicoMem.vram + (tile & 0x7fff))); fTileFunc(pd + sx, pack, pal); } } @@ -867,7 +867,7 @@ static void DrawSpriteInterlace(u32 *sprite) int sx, sy; // parse the sprite data - sy=sprite[0]; + sy=CPU_LE2(sprite[0]); height=sy>>24; sy=(sy&0x3ff)-0x100; // Y width=(height>>2)&3; height&=3; @@ -875,7 +875,7 @@ static void DrawSpriteInterlace(u32 *sprite) row=(Pico.est.DrawScanline<<1)-sy; // Row of the sprite we are on - code=sprite[1]; + code=CPU_LE2(sprite[1]); sx=((code>>16)&0x1ff)-0x78; // X if (code&0x1000) row^=(16<=328) break; // Offscreen - pack = *(u32 *)(PicoMem.vram + (tile & 0x7fff)); + pack = CPU_LE2(*(u32 *)(PicoMem.vram + (tile & 0x7fff))); if (code & 0x0800) TileFlip(pd + sx, pack, pal); else TileNorm(pd + sx, pack, pal); } @@ -923,8 +923,8 @@ static NOINLINE void DrawAllSpritesInterlace(int pri, int sh) sprite=(u32 *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite // get sprite info - code = sprite[0]; - sx = sprite[1]; + code = CPU_LE2(sprite[0]); + sx = CPU_LE2(sprite[1]); if(((sx>>15)&1) != pri) goto nextsprite; // wrong priority sprite // check if it is on this line @@ -1020,7 +1020,7 @@ static void DrawSpritesSHi(unsigned char *sprited, const struct PicoEState *est) if(sx<=0) continue; if(sx>=328) break; // Offscreen - pack = *(u32 *)(PicoMem.vram + (tile & 0x7fff)); + pack = CPU_LE2(*(u32 *)(PicoMem.vram + (tile & 0x7fff))); fTileFunc(pd + sx, pack, pal); } } @@ -1089,7 +1089,7 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) if(sx>=328) break; // Offscreen - pack = *(u32 *)(PicoMem.vram + (tile & 0x7fff)); + pack = CPU_LE2(*(u32 *)(PicoMem.vram + (tile & 0x7fff))); m |= mp[1] << 8; // next mask byte // shift mask bits to bits 8-15 for easier load/store handling @@ -1132,7 +1132,7 @@ static void DrawStripForced(struct TileStrip *ts, int cellskip) pal = (code>>9)&0x30; } - pack = *(u32 *)(PicoMem.vram + addr); + pack = CPU_LE2(*(u32 *)(PicoMem.vram + addr)); if (code & 0x0800) TileFlip_and(pd + dx, pack, pal); else TileNorm_and(pd + dx, pack, pal); @@ -1196,7 +1196,7 @@ static void DrawStripVSRamForced(struct TileStrip *ts, int plane_sh, int cellski } pack = code & 0x1000 ? ty^0xe : ty; // Y-flip - pack = *(u32 *)(PicoMem.vram + addr+pack); + pack = CPU_LE2(*(u32 *)(PicoMem.vram + addr+pack)); if (code & 0x0800) TileFlip_and(pd + dx, pack, pal); else TileNorm_and(pd + dx, pack, pal); @@ -1230,7 +1230,7 @@ void DrawStripInterlaceForced(struct TileStrip *ts) pal = (code>>9)&0x30; // shadow - pack = *(u32 *)(PicoMem.vram + addr); + pack = CPU_LE2(*(u32 *)(PicoMem.vram + addr)); } if (code & 0x0800) TileFlip_and(pd + dx, pack, pal); @@ -1363,11 +1363,11 @@ static void DrawSpritesForced(unsigned char *sprited) mp = mb+(sx>>3); for (m = *mp; width; width--, sx+=8, tile+=delta, *mp++ = m, m >>= 8) { - unsigned int pack; + u32 pack; if(sx>=328) break; // Offscreen - pack = *(u32 *)(PicoMem.vram + (tile & 0x7fff)); + pack = CPU_LE2(*(u32 *)(PicoMem.vram + (tile & 0x7fff))); m |= mp[1] << 8; // next mask byte // shift mask bits to bits 8-15 for easier load/store handling @@ -1428,13 +1428,13 @@ static NOINLINE void PrepareSprites(int max_lines) // parse sprite info. the 1st half comes from the VDPs internal cache, // the 2nd half is read from VRAM - code = VdpSATCache[link]; // normally but not always equal to sprite[0] + code = CPU_LE2(VdpSATCache[link]); // normally same as sprite[0] sy = (code&0x1ff)-0x80; hv = (code>>24)&0xf; height = (hv&3)+1; width = (hv>>2)+1; - code2 = sprite[1]; + code2 = CPU_LE2(sprite[1]); sx = (code2>>16)&0x1ff; sx -= 0x78; // Get X coordinate + 8 diff --git a/pico/memory.c b/pico/memory.c index 96e9fd3f..72b7a909 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -438,7 +438,7 @@ static u32 PicoRead8_sram(u32 a) // XXX: this is banking unfriendly if (a < Pico.romsize) - return Pico.rom[a ^ 1]; + return Pico.rom[MEM_BE2(a)]; return m68k_unmapped_read8(a); } diff --git a/pico/memory.h b/pico/memory.h index eba23471..37439a79 100644 --- a/pico/memory.h +++ b/pico/memory.h @@ -63,7 +63,7 @@ u32 name(u32 a) \ if (map_flag_set(v)) \ return ((cpu68k_read_f *)(v << 1))(a); \ else \ - return *(u8 *)((v << 1) + (a ^ 1)); \ + return *(u8 *)((v << 1) + MEM_BE2(a)); \ } #define MAKE_68K_READ16(name, map) \ @@ -106,7 +106,7 @@ void name(u32 a, u8 d) \ if (map_flag_set(v)) \ ((cpu68k_write_f *)(v << 1))(a, d); \ else \ - *(u8 *)((v << 1) + (a ^ 1)) = d; \ + *(u8 *)((v << 1) + MEM_BE2(a)) = d; \ } #define MAKE_68K_WRITE16(name, map) \ diff --git a/pico/mode4.c b/pico/mode4.c index 9cbc1d40..00a4aef9 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -123,7 +123,7 @@ static void draw_sprites(int scanline) for (i = s = 0; i < 64; i++) { int y; - y = sat[i] + 1; + y = sat[MEM_LE2(i)] + 1; if (y == 0xd1) break; if (y + h <= scanline || scanline < y) @@ -133,8 +133,8 @@ static void draw_sprites(int scanline) break; } - sprites_x[s] = xoff + sat[0x80 + i*2]; - sprites_addr[s] = sprite_base + ((sat[0x80 + i*2 + 1] & addr_mask) << (5-1)) + + sprites_x[s] = xoff + sat[MEM_LE2(0x80 + i*2)]; + sprites_addr[s] = sprite_base + ((sat[MEM_LE2(0x80 + i*2 + 1)] & addr_mask) << (5-1)) + ((scanline - y) << (2-1)); s++; } @@ -145,7 +145,7 @@ static void draw_sprites(int scanline) // now draw all sprites backwards for (--s; s >= 0; s--) { - pack = *(u32 *)(PicoMem.vram + sprites_addr[s]); + pack = CPU_LE2(*(u32 *)(PicoMem.vram + sprites_addr[s])); TileNormM4(sprites_x[s], pack, 0x10); } } @@ -176,7 +176,7 @@ static void draw_strip_low(const unsigned short *nametab, int dx, int cells, int pal = (code>>7) & 0x10; } - pack = *(u32 *)(PicoMem.vram + addr); /* Get 4 bitplanes / 8 pixels */ + pack = CPU_LE2(*(u32 *)(PicoMem.vram + addr)); /* Get 4 bitplanes / 8 pixels */ if (pack == 0) TileBGM4(dx, pal); else if (code & 0x0200) TileFlipM4Low(dx, pack, pal); else TileNormM4Low(dx, pack, pal); @@ -211,7 +211,7 @@ static void draw_strip_high(const unsigned short *nametab, int dx, int cells, in pal = (code>>7) & 0x10; } - pack = *(u32 *)(PicoMem.vram + addr); /* Get 4 bitplanes / 8 pixels */ + pack = CPU_LE2(*(u32 *)(PicoMem.vram + addr)); /* Get 4 bitplanes / 8 pixels */ if (pack == 0) { blank = code; continue; diff --git a/pico/pico_int.h b/pico/pico_int.h index ccbf6bc0..3cf5858c 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -569,8 +569,8 @@ typedef struct #define P32XI_CMD (1 << 8/2) #define P32XI_PWM (1 << 6/2) -// peripheral reg access -#define PREG8(regs,offs) ((unsigned char *)regs)[offs ^ 3] +// peripheral reg access (32 bit regs) +#define PREG8(regs,offs) ((unsigned char *)regs)[MEM_BE4(offs)] #define DMAC_FIFO_LEN (4*2) #define PWM_BUFF_LEN 1024 // in one channel samples diff --git a/pico/pico_port.h b/pico/pico_port.h index af9ce853..ac8f0ccd 100644 --- a/pico/pico_port.h +++ b/pico/pico_port.h @@ -6,6 +6,7 @@ #if !(defined(_MSC_VER) && _MSC_VER < 1800) #include #endif +#include "pico_types.h" #if defined(__GNUC__) && defined(__i386__) #define REGPARM(x) __attribute__((regparm(x))) @@ -32,4 +33,46 @@ #define strdup _strdup #endif + +// There's no standard way to determine endianess at compile time. +// Do not bother with mixed endian platforms, no one will ever compile on that. +#if defined __BYTE_ORDER__ +#define CPU_IS_LE __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#elif defined _BYTE_ORDER +#define CPU_IS_LE __BYTE_ORDER == __LITTLE_ENDIAN +#elif defined __BIG_ENDIAN__ || defined _M_PPC // Windows on PPC was big endian +#define CPU_IS_LE 0 +#elif defined __LITTLE_ENDIAN__ || defined _WIN32 // all other Windows is LE +#define CPU_IS_LE 1 +#else +#warning "can't detect byte order, assume little endian" +#define CPU_IS_LE 1 +#endif + +#if CPU_IS_LE +// address/offset operations +#define MEM_BE2(a) ((a)^1) // addr/offs of u8 in u16, or u16 in u32 +#define MEM_BE4(a) ((a)^3) // addr/offs of u8 in u32 +#define MEM_LE2(a) (a) +#define MEM_LE4(a) (a) +// swapping +#define CPU_BE2(v) (((v)<<16)|((v)>>16)) +#define CPU_BE4(v) (((u32)(v)>>24)|(((v)>>8)&0x00ff00)| \ + (((v)<<8)&0xff0000)|(u32)((v)<<24)) +#define CPU_LE2(v) (v) // swap of 2*u16 in u32 +#define CPU_LE4(v) (v) // swap of 4*u8 in u32 +#else +// address/offset operations +#define MEM_BE2(a) (a) +#define MEM_BE4(a) (a) +#define MEM_LE2(a) ((a)^1) +#define MEM_LE4(a) ((a)^3) +// swapping +#define CPU_BE2(v) (v) +#define CPU_BE4(v) (v) +#define CPU_LE2(v) (((v)<<16)|((v)>>16)) +#define CPU_LE4(v) (((u32)(v)>>24)|(((v)>>8)&0x00ff00)| \ + (((v)<<8)&0xff0000)|(u32)((v)<<24)) +#endif + #endif // PICO_PORT_INCLUDED diff --git a/pico/sms.c b/pico/sms.c index d4b412b3..57181a35 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -28,7 +28,7 @@ static unsigned char vdp_data_read(void) struct PicoVideo *pv = &Pico.video; unsigned char d; - d = PicoMem.vramb[pv->addr]; + d = PicoMem.vramb[MEM_LE2(pv->addr)]; pv->addr = (pv->addr + 1) & 0x3fff; pv->pending = 0; return d; @@ -56,7 +56,7 @@ static void vdp_data_write(unsigned char d) if (PicoMem.cram[pv->addr & 0x1f] != d) Pico.m.dirtyPal = 1; PicoMem.cram[pv->addr & 0x1f] = d; } else { - PicoMem.vramb[pv->addr] = d; + PicoMem.vramb[MEM_LE2(pv->addr)] = d; } pv->addr = (pv->addr + 1) & 0x3fff; diff --git a/platform/common/plat_sdl.c b/platform/common/plat_sdl.c index 01958dc7..96671f37 100644 --- a/platform/common/plat_sdl.c +++ b/platform/common/plat_sdl.c @@ -19,7 +19,7 @@ #include "plat_sdl.h" #include "version.h" -#include +#include static void *shadow_fb; @@ -73,7 +73,11 @@ void bgr_to_uyvy_init(void) int r = (i >> 11) & 0x1f, g = (i >> 6) & 0x1f, b = (i >> 0) & 0x1f; int y = (yuv_ry[r] + yuv_gy[g] + yuv_by[b]) >> 16; yuv_uyvy[i].y = yuv_y[y]; +#if CPU_IS_LE yuv_uyvy[i].vyu = (yuv_v[r-y + 32] << 16) | (yuv_y[y] << 8) | yuv_u[b-y + 32]; +#else + yuv_uyvy[i].vyu = (yuv_v[b-y + 32] << 16) | (yuv_y[y] << 8) | yuv_u[r-y + 32]; +#endif } } @@ -87,17 +91,29 @@ void rgb565_to_uyvy(void *d, const void *s, int pixels, int x2) { struct uyvy *uyvy0 = yuv_uyvy + src[0], *uyvy1 = yuv_uyvy + src[1]; struct uyvy *uyvy2 = yuv_uyvy + src[2], *uyvy3 = yuv_uyvy + src[3]; +#if CPU_IS_LE dst[0] = (uyvy0->y << 24) | uyvy0->vyu; dst[1] = (uyvy1->y << 24) | uyvy1->vyu; dst[2] = (uyvy2->y << 24) | uyvy2->vyu; dst[3] = (uyvy3->y << 24) | uyvy3->vyu; +#else + dst[0] = uyvy0->y | (uyvy0->vyu << 8); + dst[1] = uyvy1->y | (uyvy1->vyu << 8); + dst[2] = uyvy2->y | (uyvy2->vyu << 8); + dst[3] = uyvy3->y | (uyvy3->vyu << 8); +#endif } else for (; pixels > 0; src += 4, dst += 2, pixels -= 4) { struct uyvy *uyvy0 = yuv_uyvy + src[0], *uyvy1 = yuv_uyvy + src[1]; struct uyvy *uyvy2 = yuv_uyvy + src[2], *uyvy3 = yuv_uyvy + src[3]; +#if CPU_IS_LE dst[0] = (uyvy1->y << 24) | uyvy0->vyu; dst[1] = (uyvy3->y << 24) | uyvy2->vyu; +#else + dst[0] = uyvy1->y | (uyvy0->vyu << 8); + dst[1] = uyvy3->y | (uyvy2->vyu << 8); +#endif } }