From 8ac9ab7fcb43d33952f5293720b868e7acbb62b4 Mon Sep 17 00:00:00 2001 From: kub Date: Wed, 8 Jan 2020 00:49:13 +0100 Subject: [PATCH] audio: added SSG-EG to YM2612, plus some timing changes for SN76496+YM2612 --- Makefile | 2 +- cpu/drc/emit_arm64.c | 2 +- pico/memory.c | 6 +- pico/pico.h | 2 +- pico/pico_cmn.c | 29 +-- pico/pico_int.h | 7 +- pico/sms.c | 8 +- pico/sound/mix.c | 7 +- pico/sound/mix_arm.S | 6 +- pico/sound/sound.c | 150 ++++++-------- pico/sound/ym2612.c | 367 ++++++++++++++++++++++++----------- pico/sound/ym2612.h | 22 ++- pico/sound/ym2612_arm.S | 420 +++++++++++++++++++++------------------- 13 files changed, 572 insertions(+), 456 deletions(-) diff --git a/Makefile b/Makefile index 49116ce0..053e1606 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,7 @@ endif ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","gp2x" "opendingux" "rpi1")) # very small caches, avoid optimization options making the binary much bigger -CFLAGS += -finline-limit=43 -fno-unroll-loops -fno-ipa-cp -ffast-math +CFLAGS += -finline-limit=42 -fno-unroll-loops -fno-ipa-cp -ffast-math # this gets you about 20% better execution speed on 32bit arm/mips CFLAGS += -fno-common -fno-stack-protector -fno-guess-branch-probability -fno-caller-saves -fno-tree-loop-if-convert -fno-regmove endif diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index 2e873161..f4645bc1 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -1393,7 +1393,7 @@ static void emith_sync_t(int sr) else if (tcond >= 0) { int tmp = rcache_get_tmp(); EMIT(A64_CSET(tcond, tmp)); - EMIT(A64_BFI_IMM(sr, tmp, 0, 1)); // assumes SR.T = bit 0 + EMIT(A64_BFI_IMM(sr, tmp, __builtin_ffs(T)-1, 1)); rcache_free_tmp(tmp); } tcond = -1; diff --git a/pico/memory.c b/pico/memory.c index cc82f789..9fe3a085 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -546,7 +546,7 @@ static void PicoWrite8_z80(u32 a, u32 d) } if ((a & 0x6000) == 0x4000) { // FM Sound if (PicoIn.opt & POPT_EN_FM) - Pico.m.status |= ym2612_write_local(a & 3, d & 0xff, 0) & 1; + ym2612_write_local(a & 3, d & 0xff, 0); return; } // TODO: probably other VDP access too? Maybe more mirrors? @@ -1059,6 +1059,8 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) break; } + int scanline = get_scanline(is_from_z80); + PsndDoFM(scanline); #ifdef __GP2X__ if (PicoIn.opt & POPT_EXT_FM) return YM2612Write_940(a, d, get_scanline(is_from_z80)); @@ -1224,7 +1226,7 @@ static unsigned char z80_md_bank_read(unsigned short a) static void z80_md_ym2612_write(unsigned int a, unsigned char data) { if (PicoIn.opt & POPT_EN_FM) - Pico.m.status |= ym2612_write_local(a, data, 1) & 1; + ym2612_write_local(a, data, 1); } static void z80_md_vdp_br_write(unsigned int a, unsigned char data) diff --git a/pico/pico.h b/pico/pico.h index a9359a18..daf5dfdf 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -70,7 +70,7 @@ extern void *p32x_bios_g, *p32x_bios_m, *p32x_bios_s; #define POPT_EN_DRC (1<<17) #define POPT_DIS_SPRITE_LIM (1<<18) #define POPT_DIS_IDLE_DET (1<<19) -#define POPT_EN_32X (1<<20) +#define POPT_EN_32X (1<<20) // x0 0000 #define POPT_EN_PWM (1<<21) #define POPT_PWM_IRQ_OPT (1<<22) diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 1f89da90..5fa0b16f 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -88,7 +88,6 @@ static void do_timing_hacks_vb(void) static int PicoFrameHints(void) { struct PicoVideo *pv = &Pico.video; - int line_sample = Pico.m.pal ? 68 : 93; int vdp_slots = (Pico.video.reg[12] & 1) ? 18 : 16; int lines, y, lines_vis, skip; int vcnt_wrap, vcnt_adj; @@ -150,23 +149,6 @@ static int PicoFrameHints(void) } } - // get samples from sound chips - if ((y == 224 || y == line_sample) && PicoIn.sndOut) - { - cycles = SekCyclesDone(); - - if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) - PicoSyncZ80(cycles); -#ifdef PICO_CD - if (PicoIn.AHW & PAHW_MCD) - pcd_sync_s68k(cycles, 0); -#endif -#ifdef PICO_32X - p32x_sync_sh2s(cycles); -#endif - PsndGetSamples(y); - } - // Run scanline: Pico.t.m68c_line_start = Pico.t.m68c_aim; do_timing_hacks_as(pv, vdp_slots); @@ -238,10 +220,6 @@ static int PicoFrameHints(void) p32x_start_blank(); #endif - // get samples from sound chips - if (y == 224 && PicoIn.sndOut) - PsndGetSamples(y); - // Run scanline: CPUS_RUN(CYCLES_M68K_LINE - CYCLES_M68K_VINT_LAG); @@ -298,7 +276,7 @@ static int PicoFrameHints(void) pv->status |= ((pv->reg[1] >> 3) ^ SR_VB) & SR_VB; // forced blanking // last scanline - Pico.m.scanline = y; + Pico.m.scanline = y++; pv->v_counter = 0xff; pv->lwrite_cnt = 0; @@ -337,6 +315,11 @@ static int PicoFrameHints(void) #ifdef PICO_32X p32x_sync_sh2s(cycles); #endif + + // get samples from sound chips + if (PicoIn.sndOut) + PsndGetSamples(y); + timers_cycle(); pv->hint_cnt = hint; diff --git a/pico/pico_int.h b/pico/pico_int.h index 0fc458ef..d3da72ce 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -336,7 +336,7 @@ struct PicoMisc unsigned char eeprom_cycle; // EEPROM cycle number unsigned char eeprom_slave; // EEPROM slave word for X24C02 and better SRAMs unsigned char eeprom_status; - unsigned char status; // rapid_ym2612, multi_ym_updates + unsigned char pad1; // was ym2612 status unsigned short dma_xfers; // 18 unsigned char eeprom_wb[2]; // EEPROM latch/write buffer unsigned int frame_count; // 1c for movies and idle det @@ -433,6 +433,8 @@ struct PicoSound int len_e_cnt; short dac_line; short psg_line; + unsigned int fm_mult; // samples per line in Q16 + unsigned int fm_pos; // last FM position in Q16 }; // run tools/mkoffsets pico/pico_int_offs.h if you change these @@ -872,9 +874,10 @@ PICO_INTERNAL void PsndReset(void); PICO_INTERNAL void PsndStartFrame(void); PICO_INTERNAL void PsndDoDAC(int line_to); PICO_INTERNAL void PsndDoPSG(int line_to); +PICO_INTERNAL void PsndDoFM(int line_to); PICO_INTERNAL void PsndClear(void); PICO_INTERNAL void PsndGetSamples(int y); -PICO_INTERNAL void PsndGetSamplesMS(void); +PICO_INTERNAL void PsndGetSamplesMS(int y); // sms.c #ifndef NO_SMS diff --git a/pico/sms.c b/pico/sms.c index 2800e209..b016f197 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -320,16 +320,12 @@ void PicoFrameMS(void) } } - // 224 because of how it's done for MD... - if (y == 224 && PicoIn.sndOut) - PsndGetSamplesMS(); - cycles_aim += cycles_line; cycles_done += z80_run((cycles_aim - cycles_done) >> 8) << 8; } - if (PicoIn.sndOut && Pico.snd.psg_line < lines) - PsndDoPSG(lines - 1); + if (PicoIn.sndOut) + PsndGetSamplesMS(lines); } void PicoFrameDrawOnlyMS(void) diff --git a/pico/sound/mix.c b/pico/sound/mix.c index 242cb375..4b4bbdd8 100644 --- a/pico/sound/mix.c +++ b/pico/sound/mix.c @@ -12,16 +12,15 @@ #define MINOUT (-32768) /* limitter */ -#define Limit16(val) { \ - val -= (val >> 2); \ - if ((short)val != val) val = (val < 0 ? MINOUT : MAXOUT); \ -} +#define Limit16(val) \ + if ((short)val != val) val = (val < 0 ? MINOUT : MAXOUT) int mix_32_to_16l_level; static struct iir2 { // 2-pole IIR int x[2]; // sample buffer int y[2]; // filter intermediates + int i; } lfi2, rfi2; // NB ">>" rounds to -infinity, "/" to 0. To compensate the effect possibly use diff --git a/pico/sound/mix_arm.S b/pico/sound/mix_arm.S index bb7388d6..104b3065 100644 --- a/pico/sound/mix_arm.S +++ b/pico/sound/mix_arm.S @@ -400,6 +400,8 @@ m32_16l_st_l_no_unal2: ldmfd sp!, {r4-r11,lr} bx lr +#endif /* __GP2X__ */ + .global mix_reset @ void mix_reset: ldr r0, =filter @@ -409,11 +411,7 @@ mix_reset: bx lr .data - DCfilt r4, r10 - DCfilt r5, r11 filter: .ds 8 -#endif /* __GP2X__ */ - @ vim:filetype=armasm diff --git a/pico/sound/sound.c b/pico/sound/sound.c index 30d4a072..f4cd4241 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -32,52 +32,17 @@ extern int *sn76496_regs; static void dac_recalculate(void) { int lines = Pico.m.pal ? 313 : 262; - int mid = Pico.m.pal ? 68 : 93; - int i, dac_cnt, pos, len; + int i, pos; - if (Pico.snd.len <= lines) - { - // shrinking algo - dac_cnt = -Pico.snd.len; - len=1; pos=0; - dac_info[225] = 1; - - for(i=226; i != 225; i++) - { - if (i >= lines) i = 0; - if(dac_cnt < 0) { - pos++; - dac_cnt += lines; - } - dac_cnt -= Pico.snd.len; - dac_info[i] = pos; - } - } - else + pos = 0; // Q16 + + for(i = 0; i <= lines; i++) { - // stretching - dac_cnt = Pico.snd.len; - pos=0; - for(i = 225; i != 224; i++) - { - if (i >= lines) i = 0; - len=0; - while(dac_cnt >= 0) { - dac_cnt -= lines; - len++; - } - if (i == mid) // midpoint - while(pos+len < Pico.snd.len/2) { - dac_cnt -= lines; - len++; - } - dac_cnt += Pico.snd.len; - pos += len; - dac_info[i] = pos; - } + dac_info[i] = ((pos+(1<<15)) >> 16); // round to nearest + pos += Pico.snd.fm_mult; } - for (i = lines; i < sizeof(dac_info) / sizeof(dac_info[0]); i++) - dac_info[i] = dac_info[0]; + for (i = lines+1; i < sizeof(dac_info) / sizeof(dac_info[0]); i++) + dac_info[i] = dac_info[i-1]; } @@ -95,6 +60,7 @@ void PsndRerate(int preserve_state) { void *state = NULL; int target_fps = Pico.m.pal ? 50 : 60; + int target_lines = Pico.m.pal ? 313 : 262; if (preserve_state) { state = malloc(0x204); @@ -121,6 +87,9 @@ void PsndRerate(int preserve_state) Pico.snd.len_e_add = ((PicoIn.sndRate - Pico.snd.len * target_fps) << 16) / target_fps; Pico.snd.len_e_cnt = 0; + // samples per line + Pico.snd.fm_mult = 65536.0 * PicoIn.sndRate / (target_fps*target_lines); + // recalculate dac info dac_recalculate(); @@ -149,8 +118,7 @@ PICO_INTERNAL void PsndStartFrame(void) } Pico.snd.dac_line = Pico.snd.psg_line = 0; - Pico.m.status &= ~1; - dac_info[224] = Pico.snd.len_use; + Pico.snd.fm_pos = 0; } PICO_INTERNAL void PsndDoDAC(int line_to) @@ -159,9 +127,6 @@ PICO_INTERNAL void PsndDoDAC(int line_to) int dout = ym2612.dacout; int line_from = Pico.snd.dac_line; - if (line_to >= 313) - line_to = 312; - pos = dac_info[line_from]; pos1 = dac_info[line_to + 1]; len = pos1 - pos; @@ -188,14 +153,9 @@ PICO_INTERNAL void PsndDoPSG(int line_to) int pos, pos1, len; int stereo = 0; - if (line_to >= 313) - line_to = 312; - pos = dac_info[line_from]; pos1 = dac_info[line_to + 1]; len = pos1 - pos; - //elprintf(EL_STATUS, "%3d %3d %3d %3d %3d", - // pos, pos1, len, line_from, line_to); if (len <= 0) return; @@ -211,6 +171,34 @@ PICO_INTERNAL void PsndDoPSG(int line_to) SN76496Update(PicoIn.sndOut + pos, len, stereo); } +PICO_INTERNAL void PsndDoFM(int line_to) +{ + int pos, len; + int stereo = 0; + + // Q16, number of samples to fill in buffer + len = ((line_to-1) * Pico.snd.fm_mult) - Pico.snd.fm_pos; + + // don't do this too often (no more than 256 per sec) + if (len >> 16 <= PicoIn.sndRate >> 9) + return; + + // update position and calculate buffer offset and length + pos = Pico.snd.fm_pos >> 16; + Pico.snd.fm_pos += len; + len = (Pico.snd.fm_pos >> 16) - pos; + + // fill buffer + if (PicoIn.opt & POPT_EN_STEREO) { + stereo = 1; + pos <<= 1; + } + if (PicoIn.opt & POPT_EN_FM) + YM2612UpdateOne(PsndBuffer + pos, len, stereo, 1); + else + memset32(PsndBuffer + pos, 0, len<> 3; + int fmlen = (Pico.snd.fm_pos >> 16) - offset; offset <<= stereo; + buf32 = PsndBuffer+offset; pprof_start(sound); @@ -288,14 +277,15 @@ static int PsndRender(int offset, int length) return length; } - // Add in the stereo FM buffer - if (PicoIn.opt & POPT_EN_FM) { - buf32_updated = YM2612UpdateOne(buf32, length, stereo, 1); - } else - memset32(buf32, 0, length< 0) { + int *fmbuf = buf32 + (fmlen << stereo); + if (PicoIn.opt & POPT_EN_FM) + YM2612UpdateOne(fmbuf, length-fmlen, stereo, 1); + else + memset32(fmbuf, 0, (length-fmlen)< max ) val = max; \ - else if ( val < min ) val = min; \ -} - - /* TL_TAB_LEN is calculated as: * 13 - sinus amplitude bits (Y axis) * 2 - sinus sign bit (Y axis) @@ -289,8 +281,8 @@ O(18),O(18),O(18),O(18),O(18),O(18),O(18),O(18), O(18),O(18),O(18),O(18),O(18),O(18),O(18),O(18), /* rates 00-11 */ -O(18),O(18),O( 0),O( 0), -O( 0),O( 0),O( 2),O( 2), +O(18),O(18),O( 2),O( 3), +O( 0),O( 1),O( 2),O( 3), O( 0),O( 1),O( 2),O( 3), O( 0),O( 1),O( 2),O( 3), O( 0),O( 1),O( 2),O( 3), @@ -554,6 +546,13 @@ INLINE void set_timers( int v ) ym2612.OPN.ST.status &= ~1; } +INLINE void recalc_volout(FM_SLOT *SLOT) +{ + INT16 vol_out = SLOT->volume; + if ((SLOT->ssg&0x0c) == 0x0c) + vol_out = (0x200 - SLOT->volume) & MAX_ATT_INDEX; + SLOT->vol_out = vol_out + SLOT->tl; +} INLINE void FM_KEYON(int c , int s ) { @@ -562,13 +561,15 @@ INLINE void FM_KEYON(int c , int s ) { SLOT->key = 1; SLOT->phase = 0; /* restart Phase Generator */ + SLOT->ssg ^= SLOT->ssgn; + SLOT->ssgn = 0; + SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC; if (SLOT->ar + SLOT->ksr < 32+62) { - SLOT->state = (SLOT->volume > MIN_ATT_INDEX) ? EG_ATT : - ((SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC); + if (SLOT->volume > MIN_ATT_INDEX) SLOT->state = EG_ATT; } else { SLOT->volume = MIN_ATT_INDEX; - SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC; } + recalc_volout(SLOT); ym2612.slot_mask |= (1<key ) { SLOT->key = 0; - if (SLOT->state>EG_REL) + if (SLOT->state>EG_REL) { SLOT->state = EG_REL;/* phase -> Release */ + if (SLOT->ssg&0x08) { + if (SLOT->ssg&0x04) + SLOT->volume = (0x200 - SLOT->volume); + if (SLOT->volume >= 0x200) { + SLOT->volume = MAX_ATT_INDEX; + SLOT->state = EG_OFF; + } + } + } + SLOT->vol_out = SLOT->volume + SLOT->tl; } } @@ -597,12 +608,15 @@ INLINE void set_det_mul(FM_CH *CH, FM_SLOT *SLOT, int v) INLINE void set_tl(FM_SLOT *SLOT, int v) { SLOT->tl = (v&0x7f)<<(ENV_BITS-7); /* 7bit TL */ + if (SLOT->state > EG_REL) + recalc_volout(SLOT); } /* set attack rate & key scale */ INLINE void set_ar_ksr(FM_CH *CH, FM_SLOT *SLOT, int v) { UINT8 old_KSR = SLOT->KSR; + int eg_sh_ar, eg_sel_ar; SLOT->ar = (v&0x1f) ? 32 + ((v&0x1f)<<1) : 0; @@ -611,24 +625,20 @@ INLINE void set_ar_ksr(FM_CH *CH, FM_SLOT *SLOT, int v) { CH->SLOT[SLOT1].Incr=-1; } + + /* refresh Attack rate */ + if ((SLOT->ar + SLOT->ksr) < 32+62) + { + eg_sh_ar = eg_rate_shift [SLOT->ar + SLOT->ksr ]; + eg_sel_ar = eg_rate_select[SLOT->ar + SLOT->ksr ]; + } else { - int eg_sh_ar, eg_sel_ar; - - /* refresh Attack rate */ - if ((SLOT->ar + SLOT->ksr) < 32+62) - { - eg_sh_ar = eg_rate_shift [SLOT->ar + SLOT->ksr ]; - eg_sel_ar = eg_rate_select[SLOT->ar + SLOT->ksr ]; - } - else - { - eg_sh_ar = 0; - eg_sel_ar = 18; - } - - SLOT->eg_pack_ar = eg_inc_pack[eg_sel_ar] | (eg_sh_ar<<24); + eg_sh_ar = 0; + eg_sel_ar = 18; } + + SLOT->eg_pack_ar = eg_inc_pack[eg_sel_ar] | (eg_sh_ar<<24); } /* set decay rate */ @@ -750,7 +760,7 @@ INLINE int advance_lfo(int lfo_ampm, UINT32 lfo_cnt_old, UINT32 lfo_cnt) return lfo_ampm; } -INLINE void update_eg_phase(UINT16 *vol_out, FM_SLOT *SLOT, UINT32 eg_cnt) +INLINE void update_eg_phase(FM_SLOT *SLOT, UINT32 eg_cnt) { INT32 volume = SLOT->volume; UINT32 pack = SLOT->eg_pack[SLOT->state - 1]; @@ -763,44 +773,113 @@ INLINE void update_eg_phase(UINT16 *vol_out, FM_SLOT *SLOT, UINT32 eg_cnt) eg_inc_val = pack >> ((eg_cnt >> shift) & 7) * 3; eg_inc_val = (1 << (eg_inc_val & 7)) >> 1; - switch (SLOT->state) - { - case EG_ATT: /* attack phase */ - volume += ( ~volume * eg_inc_val ) >> 4; - if ( volume <= MIN_ATT_INDEX ) + if (SLOT->ssg&0x08) { + switch (SLOT->state) { - volume = MIN_ATT_INDEX; - SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS: EG_DEC; - } - break; + case EG_ATT: /* attack phase */ + volume += ( ~volume * eg_inc_val ) >> 4; + if ( volume <= MIN_ATT_INDEX ) + { + volume = MIN_ATT_INDEX; + SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS: EG_DEC; + } + break; - case EG_DEC: /* decay phase */ - volume += eg_inc_val; - if ( volume >= (INT32) SLOT->sl ) - SLOT->state = EG_SUS; - break; + case EG_DEC: /* decay phase */ + if (volume < 0x200) + volume += 4*eg_inc_val; + if ( volume >= (INT32) SLOT->sl ) + SLOT->state = EG_SUS; + break; - case EG_SUS: /* sustain phase */ - volume += eg_inc_val; - if ( volume >= MAX_ATT_INDEX ) - { - volume = MAX_ATT_INDEX; - /* do not change SLOT->state (verified on real chip) */ + case EG_SUS: /* sustain phase */ + if (volume < 0x200) + volume += 4*eg_inc_val; + break; + + case EG_REL: /* release phase */ + if (volume < 0x200) + volume += 4*eg_inc_val; + if ( volume >= 0x200 ) + { + volume = MAX_ATT_INDEX; + SLOT->state = EG_OFF; + } + break; } - break; - case EG_REL: /* release phase */ - volume += eg_inc_val; - if ( volume >= MAX_ATT_INDEX ) + SLOT->vol_out = volume + SLOT->tl; + if ((SLOT->ssg&0x04) && (SLOT->state > EG_REL)) + SLOT->vol_out = ((0x200 - volume) & MAX_ATT_INDEX) + SLOT->tl; + } else { + switch (SLOT->state) { - volume = MAX_ATT_INDEX; - SLOT->state = EG_OFF; + case EG_ATT: /* attack phase */ + volume += ( ~volume * eg_inc_val ) >> 4; + if ( volume <= MIN_ATT_INDEX ) + { + volume = MIN_ATT_INDEX; + SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS: EG_DEC; + } + break; + + case EG_DEC: /* decay phase */ + volume += eg_inc_val; + if ( volume >= (INT32) SLOT->sl ) + SLOT->state = EG_SUS; + break; + + case EG_SUS: /* sustain phase */ + volume += eg_inc_val; + if ( volume >= MAX_ATT_INDEX ) + { + volume = MAX_ATT_INDEX; + /* do not change SLOT->state (verified on real chip) */ + } + break; + + case EG_REL: /* release phase */ + volume += eg_inc_val; + if ( volume >= MAX_ATT_INDEX ) + { + volume = MAX_ATT_INDEX; + SLOT->state = EG_OFF; + } + break; } - break; - } + SLOT->vol_out = volume + SLOT->tl; + } SLOT->volume = volume; - *vol_out = SLOT->tl + volume; /* tl is 7bit<<3, volume 0-1023 (0-2039 total) */ +} + +INLINE void update_ssg_eg_phase(FM_SLOT *SLOT) +{ + if (SLOT->ssg&0x01) { + if (SLOT->ssg&0x02) { + SLOT->ssg ^= SLOT->ssgn ^ 4; + SLOT->ssgn = 4; + } + + if (SLOT->state != EG_ATT && !(SLOT->ssg&0x04)) + SLOT->volume = MAX_ATT_INDEX; + } else { + if (SLOT->ssg&0x02) { + SLOT->ssg ^= 4; + SLOT->ssgn ^= 4; + } else + SLOT->phase = 0; + + if (SLOT->state != EG_ATT) { + SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC; + if (SLOT->ar + SLOT->ksr < 32+62) { + if (SLOT->volume > MIN_ATT_INDEX) SLOT->state = EG_ATT; + } else { + SLOT->volume = MIN_ATT_INDEX; + } + } + } + recalc_volout(SLOT); } #endif @@ -846,6 +925,16 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) { int smp = 0; /* produced sample */ unsigned int eg_out, eg_out2, eg_out4; + FM_SLOT *SLOT; + + SLOT = &ct->CH->SLOT[SLOT1]; + if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) update_ssg_eg_phase(SLOT); + SLOT = &ct->CH->SLOT[SLOT2]; + if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) update_ssg_eg_phase(SLOT); + SLOT = &ct->CH->SLOT[SLOT3]; + if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) update_ssg_eg_phase(SLOT); + SLOT = &ct->CH->SLOT[SLOT4]; + if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) update_ssg_eg_phase(SLOT); if (ct->pack & 8) { /* LFO enabled ? (test Earthworm Jim in between demo 1 and 2) */ ct->pack = (ct->pack&0xffff) | (advance_lfo(ct->pack >> 16, ct->lfo_cnt, ct->lfo_cnt + ct->lfo_inc) << 16); @@ -857,12 +946,58 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) { ct->eg_timer -= EG_TIMER_OVERFLOW; ct->eg_cnt++; - - if (ct->CH->SLOT[SLOT1].state != EG_OFF) update_eg_phase(&ct->vol_out1, &ct->CH->SLOT[SLOT1], ct->eg_cnt); - if (ct->CH->SLOT[SLOT2].state != EG_OFF) update_eg_phase(&ct->vol_out2, &ct->CH->SLOT[SLOT2], ct->eg_cnt); - if (ct->CH->SLOT[SLOT3].state != EG_OFF) update_eg_phase(&ct->vol_out3, &ct->CH->SLOT[SLOT3], ct->eg_cnt); - if (ct->CH->SLOT[SLOT4].state != EG_OFF) update_eg_phase(&ct->vol_out4, &ct->CH->SLOT[SLOT4], ct->eg_cnt); + if (ct->eg_cnt >= 4096) ct->eg_cnt = 1; + + SLOT = &ct->CH->SLOT[SLOT1]; + SLOT->vol_ipol = SLOT->vol_out; + if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt); + SLOT = &ct->CH->SLOT[SLOT2]; + SLOT->vol_ipol = SLOT->vol_out; + if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt); + SLOT = &ct->CH->SLOT[SLOT3]; + SLOT->vol_ipol = SLOT->vol_out; + if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt); + SLOT = &ct->CH->SLOT[SLOT4]; + SLOT->vol_ipol = SLOT->vol_out; + if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt); + } +#if 0 + UINT32 ifrac0 = ct->eg_timer / (EG_TIMER_OVERFLOW>>EG_SH); + UINT32 ifrac1 = (1<CH->SLOT[SLOT1]; + ct->vol_out1 = (SLOT->vol_ipol*ifrac1 + SLOT->vol_out*ifrac0) >> EG_SH; + SLOT = &ct->CH->SLOT[SLOT2]; + ct->vol_out2 = (SLOT->vol_ipol*ifrac1 + SLOT->vol_out*ifrac0) >> EG_SH; + SLOT = &ct->CH->SLOT[SLOT3]; + ct->vol_out3 = (SLOT->vol_ipol*ifrac1 + SLOT->vol_out*ifrac0) >> EG_SH; + SLOT = &ct->CH->SLOT[SLOT4]; + ct->vol_out4 = (SLOT->vol_ipol*ifrac1 + SLOT->vol_out*ifrac0) >> EG_SH; +#else + switch (ct->eg_timer >> EG_SH) + { + case 0: + ct->vol_out1 = ct->CH->SLOT[SLOT1].vol_ipol; + ct->vol_out2 = ct->CH->SLOT[SLOT2].vol_ipol; + ct->vol_out3 = ct->CH->SLOT[SLOT3].vol_ipol; + ct->vol_out4 = ct->CH->SLOT[SLOT4].vol_ipol; + break; + case (EG_TIMER_OVERFLOW>>EG_SH)-1: + ct->vol_out1 = ct->CH->SLOT[SLOT1].vol_out; + ct->vol_out2 = ct->CH->SLOT[SLOT2].vol_out; + ct->vol_out3 = ct->CH->SLOT[SLOT3].vol_out; + ct->vol_out4 = ct->CH->SLOT[SLOT4].vol_out; + break; + default: + ct->vol_out1 = (ct->CH->SLOT[SLOT1].vol_ipol + + ct->CH->SLOT[SLOT1].vol_out) >> 1; + ct->vol_out2 = (ct->CH->SLOT[SLOT2].vol_ipol + + ct->CH->SLOT[SLOT2].vol_out) >> 1; + ct->vol_out3 = (ct->CH->SLOT[SLOT3].vol_ipol + + ct->CH->SLOT[SLOT3].vol_out) >> 1; + ct->vol_out4 = (ct->CH->SLOT[SLOT4].vol_ipol + + ct->CH->SLOT[SLOT4].vol_out) >> 1; } +#endif if (ct->pack & 4) continue; /* output disabled */ @@ -892,7 +1027,7 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) if (ct->pack & (1<<(SLOT4+8))) eg_out4 += add; } - switch( ct->CH->ALGO ) + switch( ct->algo&0x7 ) { case 0: { @@ -1086,6 +1221,33 @@ static void chan_render_finish(void) ym2612.OPN.lfo_cnt = crct.lfo_cnt; } +static UINT32 update_lfo_phase(FM_SLOT *SLOT, UINT32 block_fnum) +{ + UINT32 fnum_lfo; + INT32 lfo_fn_table_index_offset; + UINT8 blk; + UINT32 fn; + int fc,fdt; + + fnum_lfo = ((block_fnum & 0x7f0) >> 4) * 32 * 8; + lfo_fn_table_index_offset = lfo_pm_table[ fnum_lfo + crct.CH->pms + ((crct.pack>>16)&0xff) ]; + if (lfo_fn_table_index_offset) /* LFO phase modulation active */ + { + block_fnum = block_fnum*2 + lfo_fn_table_index_offset; + blk = (block_fnum&0x7000) >> 12; + fn = block_fnum & 0xfff; + + /* phase increment counter */ + fc = (fn_table[fn]>>(7-blk)); + + fdt = fc + SLOT->DT[crct.CH->kcode]; + if (fdt < 0) fdt += fn_table[0x7ff*2] >> 2; + + return (fdt * SLOT->mul) >> 1; + } else + return SLOT->Incr; +} + static int chan_render(int *buffer, int length, int c, UINT32 flags) // flags: stereo, ?, disabled, ?, pan_r, pan_l { crct.CH = &ym2612.CH[c]; @@ -1114,58 +1276,22 @@ static int chan_render(int *buffer, int length, int c, UINT32 flags) // flags: s crct.phase3 = crct.CH->SLOT[SLOT3].phase; crct.phase4 = crct.CH->SLOT[SLOT4].phase; - /* current output from EG circuit (without AM from LFO) */ - crct.vol_out1 = crct.CH->SLOT[SLOT1].tl + ((UINT32)crct.CH->SLOT[SLOT1].volume); - crct.vol_out2 = crct.CH->SLOT[SLOT2].tl + ((UINT32)crct.CH->SLOT[SLOT2].volume); - crct.vol_out3 = crct.CH->SLOT[SLOT3].tl + ((UINT32)crct.CH->SLOT[SLOT3].volume); - crct.vol_out4 = crct.CH->SLOT[SLOT4].tl + ((UINT32)crct.CH->SLOT[SLOT4].volume); - crct.op1_out = crct.CH->op1_out; crct.algo = crct.CH->ALGO & 7; - if(crct.CH->pms) + if(crct.CH->pms && (ym2612.OPN.ST.mode & 0xC0) && c == 2) { + /* 3 slot mode */ + crct.incr1 = update_lfo_phase(&crct.CH->SLOT[SLOT1], ym2612.OPN.SL3.block_fnum[1]); + crct.incr2 = update_lfo_phase(&crct.CH->SLOT[SLOT2], ym2612.OPN.SL3.block_fnum[2]); + crct.incr3 = update_lfo_phase(&crct.CH->SLOT[SLOT3], ym2612.OPN.SL3.block_fnum[0]); + crct.incr4 = update_lfo_phase(&crct.CH->SLOT[SLOT4], crct.CH->block_fnum); + } + else if(crct.CH->pms) { - /* add support for 3 slot mode */ - UINT32 block_fnum = crct.CH->block_fnum; - - UINT32 fnum_lfo = ((block_fnum & 0x7f0) >> 4) * 32 * 8; - INT32 lfo_fn_table_index_offset = lfo_pm_table[ fnum_lfo + crct.CH->pms + ((crct.pack>>16)&0xff) ]; - - if (lfo_fn_table_index_offset) /* LFO phase modulation active */ - { - UINT8 blk; - UINT32 fn; - int kc,fc,fdt; - - block_fnum = block_fnum*2 + lfo_fn_table_index_offset; - blk = (block_fnum&0x7000) >> 12; - fn = block_fnum & 0xfff; - - /* keyscale code */ - kc = (blk<<2) | opn_fktable[(fn >> 7) & 0xf]; - /* phase increment counter */ - fc = (fn_table[fn]>>(7-blk)); - - fdt = fc + crct.CH->SLOT[SLOT1].DT[kc]; - if (fdt < 0) fdt += fn_table[0x7ff*2] >> 2; - crct.incr1 = (fdt*crct.CH->SLOT[SLOT1].mul) >> 1; - fdt = fc + crct.CH->SLOT[SLOT2].DT[kc]; - if (fdt < 0) fdt += fn_table[0x7ff*2] >> 2; - crct.incr2 = (fdt*crct.CH->SLOT[SLOT2].mul) >> 1; - fdt = fc + crct.CH->SLOT[SLOT3].DT[kc]; - if (fdt < 0) fdt += fn_table[0x7ff*2] >> 2; - crct.incr3 = (fdt*crct.CH->SLOT[SLOT3].mul) >> 1; - fdt = fc + crct.CH->SLOT[SLOT4].DT[kc]; - if (fdt < 0) fdt += fn_table[0x7ff*2] >> 2; - crct.incr4 = (fdt*crct.CH->SLOT[SLOT4].mul) >> 1; - } - else /* LFO phase modulation = zero */ - { - crct.incr1 = crct.CH->SLOT[SLOT1].Incr; - crct.incr2 = crct.CH->SLOT[SLOT2].Incr; - crct.incr3 = crct.CH->SLOT[SLOT3].Incr; - crct.incr4 = crct.CH->SLOT[SLOT4].Incr; - } + crct.incr1 = update_lfo_phase(&crct.CH->SLOT[SLOT1], crct.CH->block_fnum); + crct.incr2 = update_lfo_phase(&crct.CH->SLOT[SLOT2], crct.CH->block_fnum); + crct.incr3 = update_lfo_phase(&crct.CH->SLOT[SLOT3], crct.CH->block_fnum); + crct.incr4 = update_lfo_phase(&crct.CH->SLOT[SLOT4], crct.CH->block_fnum); } else /* no LFO phase modulation */ { @@ -1297,8 +1423,13 @@ static void reset_channels(FM_CH *CH) CH[c].fc = 0; for(s = 0 ; s < 4 ; s++ ) { + CH[c].SLOT[s].Incr = -1; + CH[c].SLOT[s].key = 0; + CH[c].SLOT[s].phase = 0; + CH[c].SLOT[s].ssg = CH[c].SLOT[s].ssgn = 0; CH[c].SLOT[s].state= EG_OFF; CH[c].SLOT[s].volume = MAX_ATT_INDEX; + CH[c].SLOT[s].vol_out = MAX_ATT_INDEX; } CH[c].mem_value = CH[c].op1_out = 0; } @@ -1503,8 +1634,10 @@ static int OPNWriteReg(int r, int v) break; case 0x90: /* SSG-EG */ - // removed. - ret = 0; + SLOT->ssg = v&0x0f; + SLOT->ssg ^= SLOT->ssgn; + if (SLOT->state > EG_REL) + recalc_volout(SLOT); break; case 0xa0: diff --git a/pico/sound/ym2612.h b/pico/sound/ym2612.h index bbe6b1a4..3a1ea7a9 100644 --- a/pico/sound/ym2612.h +++ b/pico/sound/ym2612.h @@ -53,6 +53,11 @@ typedef struct }; UINT32 eg_pack[4]; }; + + UINT8 ssg; /* 0x30 SSG-EG waveform */ + UINT8 ssgn; + UINT16 vol_out; /* 0x32 current output from EG (without LFO) */ + UINT16 vol_ipol; /* 0x34 interpolator memory */ } FM_SLOT; @@ -176,21 +181,22 @@ int YM2612PicoStateLoad2(int *tat, int *tbt); #else /* GP2X specific */ #include "../../platform/gp2x/940ctl.h" -#define YM2612Init(baseclock,rate) { \ +#define YM2612Init(baseclock,rate) do { \ if (PicoIn.opt&POPT_EXT_FM) YM2612Init_940(baseclock, rate); \ else YM2612Init_(baseclock, rate); \ -} -#define YM2612ResetChip() { \ +} while (0) +#define YM2612ResetChip() do { \ if (PicoIn.opt&POPT_EXT_FM) YM2612ResetChip_940(); \ else YM2612ResetChip_(); \ -} -#define YM2612UpdateOne(buffer,length,stereo,is_buf_empty) \ +} while (0) +#define YM2612UpdateOne(buffer,length,stereo,is_buf_empty) do { \ (PicoIn.opt&POPT_EXT_FM) ? YM2612UpdateOne_940(buffer, length, stereo, is_buf_empty) : \ - YM2612UpdateOne_(buffer, length, stereo, is_buf_empty); -#define YM2612PicoStateLoad() { \ + YM2612UpdateOne_(buffer, length, stereo, is_buf_empty); \ +} while (0) +#define YM2612PicoStateLoad() do { \ if (PicoIn.opt&POPT_EXT_FM) YM2612PicoStateLoad_940(); \ else YM2612PicoStateLoad_(); \ -} +} while (0) #endif /* __GP2X__ */ diff --git a/pico/sound/ym2612_arm.S b/pico/sound/ym2612_arm.S index 9b807928..86e5f1c0 100644 --- a/pico/sound/ym2612_arm.S +++ b/pico/sound/ym2612_arm.S @@ -1,6 +1,7 @@ /* * PicoDrive * (C) notaz, 2006 + * (C) kub, 2020 added SSG-EG and simple output rate interpolation * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -18,7 +19,7 @@ .equiv SLOT2, 2 .equiv SLOT3, 1 .equiv SLOT4, 3 -.equiv SLOT_STRUCT_SIZE, 0x30 +.equiv SLOT_STRUCT_SIZE, 0x38 .equiv TL_TAB_LEN, 0x1A00 @@ -28,11 +29,11 @@ .equiv EG_REL, 1 .equiv EG_OFF, 0 -.equiv EG_SH, 16 @ 16.16 fixed point (envelope generator timing) +.equiv EG_SH, 16 @ 16.16 fixed point (envelope generator timing) .equiv EG_TIMER_OVERFLOW, (3*(1<= (INT32) SLOT->sl ) + strgeb r3, [r5,#0x17] @ state + b 10f + +4: @ EG_ATT + subs r3, r3, #1 @ eg_inc_val_shift - 1 + mvnpl r2, r0 + movpl r2, r2, lsl r3 + addpl r0, r0, r2, asr #4 + cmp r0, #0 @ if (volume <= MIN_ATT_INDEX) + bgt 10f + ldr r2, [r5,#0x1c] + mov r0, #0 + cmp r2, #0 + movne r3, #EG_DEC + moveq r3, #EG_SUS + strb r3, [r5,#0x17] @ state + b 10f + +1: @ EG_REL + mov r2, #0x200 + cmp r0, r2 @ if ( volume >= 0x200 ) + movge r0, #1024 + subge r0, #1 + movge r3, #EG_OFF + strgeb r3, [r5,#0x17] @ state + +10: @ finish + strh r0, [r5,#0x1a] @ volume + ldrb r2, [r5,#0x30] @ ssg + ldrb r3, [r5,#0x17] @ state + cmp r2, #0x0c @ if ( ssg&0x04 && state > EG_REL ) + cmpge r3, #EG_REL+1 + rsbge r0, r0, #0x200 @ volume = (0x200-volume) & MAX_ATT + lslge r0, r0, #10 + lsrge r0, r0, #10 + +11: + ldrh r3, [r5,#0x18] @ tl + add r0, r0, r3 @ volume += tl + strh r0, [r5,#0x32] @ vol_out .if \slot == SLOT1 mov r6, r6, lsr #16 - add r0, r0, r3 orr r6, r0, r6, lsl #16 .elseif \slot == SLOT2 mov r6, r6, lsl #16 - add r0, r0, r3 mov r0, r0, lsl #16 orr r6, r0, r6, lsr #16 .elseif \slot == SLOT3 mov r7, r7, lsr #16 - add r0, r0, r3 orr r7, r0, r7, lsl #16 .elseif \slot == SLOT4 mov r7, r7, lsl #16 - add r0, r0, r3 mov r0, r0, lsl #16 orr r7, r0, r7, lsr #16 .endif @@ -137,6 +202,63 @@ 0: @ EG_OFF .endm +@ r5=slot, trashes: r0,r2,r3 +.macro update_ssg_eg + ldrh r0, [r5,#0x30] @ ssg+ssgn + ldrb r2, [r5,#0x17] @ state + ldrh r3, [r5,#0x1a] @ volume + tst r0, #0x08 @ ssg enabled? + beq 9f + cmp r2, #EG_REL @ state > EG_REL? + ble 9f + cmp r3, #0x200 @ volume >= 0x200? + blt 9f + + tst r0, #0x01 + beq 1f + + tst r0, #0x02 + eorne r0, r0, lsr #8 @ ssg ^= ssgn ^ 4 + eorne r0, r0, #0x4 + orrne r0, r0, #0x400 @ ssgn = 4 + strneh r0, [r5,#0x30] + + eor r0, r0, #0x4 @ if ( !(ssg&0x04 ) + tst r0, #0x4 + cmpne r2, #EG_ATT @ if ( state != EG_ATT ) + movne r0, #0x400 + subne r0, r0, #1 + strneh r0, [r5,#0x1a] @ volume = MAX_ATT + b 9f + +1: tst r0, #0x02 + eorne r0, r0, #0x4 @ ssg ^= 4 + eorne r0, r0, #0x400 @ ssgn ^= 4 + strneh r0, [r5,#0x30] + moveq r3, #0 + streq r3, [r5,#0x0c] @ phase = 0 + + cmp r2, #EG_ATT @ if ( state != EG_ATT ) + beq 9f + + ldr r3, [r5,#0x1c] @ sl + mov r2, #EG_SUS @ state = sl==MIN_ATT ? EG_SUS:EG_DEC + cmp r3, #0 + + ldr r0, [r5,#0x04] @ ar + ldr r3, [r5,#0x14] @ ksr + movne r2, #EG_DEC + add r0, r0, r3 + cmp r0, #32+62 @ if ( ar+ksr >= 32+62 ) + ldrlt r0, [r5,#0x1a] + movge r0, #0 + strgeh r0, [r5,#0x1a] @ volume = MIN_ATT + + cmp r0, #0 + movgt r2, #EG_ATT + strb r2, [r5,#0x17] @ state +9: +.endm @ r12=lfo_ampm[31:16], r1=lfo_cnt_old, r2=lfo_cnt, r3=scratch .macro advance_lfo_m @@ -532,187 +654,6 @@ .endm -/* -.global update_eg_phase @ FM_SLOT *SLOT, UINT32 eg_cnt - -update_eg_phase: - stmfd sp!, {r5,r6} - mov r5, r0 @ slot - ldrh r3, [r5,#0x18] @ tl - ldrh r6, [r5,#0x1a] @ volume - add r6, r6, r3 - update_eg_phase_slot SLOT1 - mov r0, r6 - ldmfd sp!, {r5,r6} - bx lr -.pool - - -.global advance_lfo @ int lfo_ampm, UINT32 lfo_cnt_old, UINT32 lfo_cnt - -advance_lfo: - mov r12, r0, lsl #16 - advance_lfo_m - mov r0, r12, lsr #16 - bx lr -.pool - - -.global upd_algo0 @ chan_rend_context *c -upd_algo0: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - PIC_LDR(r3, ip, ym_sin_tab) - PIC_LDR(r5, ip, ym_tl_tab) - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo0_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo1 @ chan_rend_context *c -upd_algo1: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - PIC_LDR(r3, ip, ym_sin_tab) - PIC_LDR(r5, ip, ym_tl_tab) - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo1_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo2 @ chan_rend_context *c -upd_algo2: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - PIC_LDR(r3, ip, ym_sin_tab) - PIC_LDR(r5, ip, ym_tl_tab) - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo2_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo3 @ chan_rend_context *c -upd_algo3: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - PIC_LDR(r3, ip, ym_sin_tab) - PIC_LDR(r5, ip, ym_tl_tab) - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo3_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo4 @ chan_rend_context *c -upd_algo4: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - PIC_LDR(r3, ip, ym_sin_tab) - PIC_LDR(r5, ip, ym_tl_tab) - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo4_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo5 @ chan_rend_context *c -upd_algo5: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - PIC_LDR(r3, ip, ym_sin_tab) - PIC_LDR(r5, ip, ym_tl_tab) - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo5_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo6 @ chan_rend_context *c -upd_algo6: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - PIC_LDR(r3, ip, ym_sin_tab) - PIC_LDR(r5, ip, ym_tl_tab) - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo6_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo7 @ chan_rend_context *c -upd_algo7: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - PIC_LDR(r3, ip, ym_sin_tab) - PIC_LDR(r5, ip, ym_tl_tab) - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo7_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_slot1 @ chan_rend_context *c -upd_slot1: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - PIC_LDR(r3, ip, ym_sin_tab) - PIC_LDR(r5, ip, ym_tl_tab) - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_slot1_m - str r10, [lr, #0x38] - - ldmfd sp!, {r4-r10,pc} -.pool -*/ - - @ lr=context, r12=pack (stereo, lastchan, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16]) @ r0-r2=scratch, r3=sin_tab/scratch, r4=(length<<8)|unused[4],was_update,algo[3], r5=tl_tab/slot, @ r6-r7=vol_out[4], r8=eg_timer, r9=eg_timer_add[31:16], r10=op1_out, r11=buffer @@ -730,14 +671,21 @@ chan_render_loop: add r0, lr, #0x44 ldmia r0, {r8,r9} @ eg_timer, eg_timer_add ldr r10, [lr, #0x54] @ op1_out - ldmia lr, {r6,r7} @ load volumes +@ ldmia lr, {r6,r7} @ load volumes + ldr r5, [lr, #0x40] @ CH + ldrh r6, [r5, #0x32] @ vol_out values for all slots + ldrh r2, [r5, #0x32+SLOT_STRUCT_SIZE*2] + ldrh r7, [r5, #0x32+SLOT_STRUCT_SIZE] + ldrh r3, [r5, #0x32+SLOT_STRUCT_SIZE*3] + orr r6, r6, r2, lsl #16 + orr r7, r7, r3, lsl #16 tst r12, #8 @ lfo? beq crl_loop crl_loop_lfo: add r0, lr, #0x30 - ldmia r0, {r1,r2} + ldmia r0, {r1,r2} @ lfo_cnt, lfo_inc subs r4, r4, #0x100 bmi crl_loop_end @@ -754,15 +702,29 @@ crl_loop: subs r4, r4, #0x100 bmi crl_loop_end + @ -- SSG -- + add r0, lr, #0x3c + ldmia r0, {r1,r5} @ eg_cnt, CH + + @ r5=slot, trashes: r0,r2,r3 + update_ssg_eg + add r5, r5, #SLOT_STRUCT_SIZE*2 @ SLOT2 (2) + update_ssg_eg + sub r5, r5, #SLOT_STRUCT_SIZE @ SLOT3 (1) + update_ssg_eg + add r5, r5, #SLOT_STRUCT_SIZE*2 @ SLOT4 (3) + update_ssg_eg + sub r5, r5, #SLOT_STRUCT_SIZE*3 + @ -- EG -- add r8, r8, r9 cmp r8, #EG_TIMER_OVERFLOW bcc eg_done - add r0, lr, #0x3c - ldmia r0, {r1,r5} @ eg_cnt, CH eg_loop: sub r8, r8, #EG_TIMER_OVERFLOW add r1, r1, #1 + cmp r1, #4096 + movge r1, #1 @ SLOT1 (0) @ r5=slot, r1=eg_cnt, trashes: r0,r2,r3 update_eg_phase_slot SLOT1 @@ -774,8 +736,8 @@ eg_loop: update_eg_phase_slot SLOT4 cmp r8, #EG_TIMER_OVERFLOW - subcs r5, r5, #SLOT_STRUCT_SIZE*3 - bcs eg_loop + sub r5, r5, #SLOT_STRUCT_SIZE*3 + bhs eg_loop str r1, [lr, #0x3c] eg_done: @@ -787,6 +749,66 @@ eg_done: cmp r0, #0x4 beq crl_loop + @ output interpolation +#if 0 + @ basic interpolator, interpolate in middle region, else use closer value + mov r3, r8, lsr #EG_SH @ eg_timer, [0..3<>EG_SH)/2 + bgt 0f @ mix is vol_out + + ldrh r0, [r5,#0x34] @ SLOT1 vol_ipol + lsleq r2, r6, #16 + addeq r0, r0, r2, lsr #16 + lsreq r0, r0, #1 + mov r6, r6, lsr #16 + orr r6, r0, r6, lsl #16 + + ldrh r0, [r5,#0x34+SLOT_STRUCT_SIZE*2] @ SLOT2 vol_ipol + addeq r0, r0, r6, lsr #16 + lsreq r0, r0, #1 + mov r6, r6, lsl #16 + orr r6, r6, r0 + ror r6, r6, #16 + + ldrh r0, [r5,#0x34+SLOT_STRUCT_SIZE] @ SLOT3 vol_ipol + lsleq r2, r7, #16 + addeq r0, r0, r2, lsr #16 + lsreq r0, r0, #1 + mov r7, r7, lsr #16 + orr r7, r0, r7, lsl #16 + + ldrh r0, [r5,#0x34+SLOT_STRUCT_SIZE*3] @ SLOT4 vol_ipol + addeq r0, r0, r7, lsr #16 + lsreq r0, r0, #1 + mov r7, r7, lsl #16 + orr r7, r7, r0 + ror r7, r7, #16 +#elif 0 + @ super-basic... just take value closest to sample point + mov r3, r8, lsr #EG_SH-1 @ eg_timer, [0..3<>EG_SH) + bgt 0f @ mix is vol_out + + ldrh r0, [r5,#0x34] @ SLOT1 vol_ipol + mov r6, r6, lsr #16 + orr r6, r0, r6, lsl #16 + + ldrh r0, [r5,#0x34+SLOT_STRUCT_SIZE*2] @ SLOT2 vol_ipol + mov r6, r6, lsl #16 + orr r6, r6, r0 + ror r6, r6, #16 + + ldrh r0, [r5,#0x34+SLOT_STRUCT_SIZE] @ SLOT3 vol_ipol + mov r7, r7, lsr #16 + orr r7, r0, r7, lsl #16 + + ldrh r0, [r5,#0x34+SLOT_STRUCT_SIZE*3] @ SLOT4 vol_ipol + mov r7, r7, lsl #16 + orr r7, r7, r0 + ror r7, r7, #16 +#endif +0: + @ -- SLOT1 -- PIC_LDR(r3, r2, ym_tl_tab) -- 2.39.2