From 6d28fb5023d53282209ade40fdd30f46905aacbf Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 30 Sep 2017 00:19:55 +0300 Subject: [PATCH] fix ym2612 asm, rework EG this should be split, but I'm lazy EG saves ~900 bytes --- pico/sound/ym2612.c | 106 +++++++++++---------------- pico/sound/ym2612.h | 14 ++-- pico/sound/ym2612_arm.s | 154 +++++++++++++++++----------------------- 3 files changed, 117 insertions(+), 157 deletions(-) diff --git a/pico/sound/ym2612.c b/pico/sound/ym2612.c index efe5054..5c52785 100644 --- a/pico/sound/ym2612.c +++ b/pico/sound/ym2612.c @@ -739,83 +739,57 @@ INLINE int advance_lfo(int lfo_ampm, UINT32 lfo_cnt_old, UINT32 lfo_cnt) return lfo_ampm; } -#define EG_INC_VAL() \ - ((1 << ((pack >> ((eg_cnt>>shift)&7)*3)&7)) >> 1) - -INLINE UINT32 update_eg_phase(FM_SLOT *SLOT, UINT32 eg_cnt) +INLINE void update_eg_phase(UINT16 *vol_out, FM_SLOT *SLOT, UINT32 eg_cnt) { INT32 volume = SLOT->volume; + UINT32 pack = SLOT->eg_pack[SLOT->state - 1]; + UINT32 shift = pack >> 24; + INT32 eg_inc_val; - switch(SLOT->state) - { - case EG_ATT: /* attack phase */ - { - UINT32 pack = SLOT->eg_pack_ar; - UINT32 shift = pack>>24; - if ( !(eg_cnt & ((1<>4; + if (eg_cnt & ((1 << shift) - 1)) + return; - if (volume <= MIN_ATT_INDEX) - { - volume = MIN_ATT_INDEX; - SLOT->state = EG_DEC; - } - } - break; - } + eg_inc_val = pack >> ((eg_cnt >> shift) & 7) * 3; + eg_inc_val = (1 << (eg_inc_val & 7)) >> 1; - case EG_DEC: /* decay phase */ + switch (SLOT->state) + { + case EG_ATT: /* attack phase */ + volume += ( ~volume * eg_inc_val ) >> 4; + if ( volume <= MIN_ATT_INDEX ) { - UINT32 pack = SLOT->eg_pack_d1r; - UINT32 shift = pack>>24; - if ( !(eg_cnt & ((1<= (INT32) SLOT->sl ) - SLOT->state = EG_SUS; - } - break; + volume = MIN_ATT_INDEX; + SLOT->state = EG_DEC; } + break; - case EG_SUS: /* sustain phase */ - { - UINT32 pack = SLOT->eg_pack_d2r; - UINT32 shift = pack>>24; - if ( !(eg_cnt & ((1<= (INT32) SLOT->sl ) + SLOT->state = EG_SUS; + break; - if ( volume >= MAX_ATT_INDEX ) - { - volume = MAX_ATT_INDEX; - /* do not change SLOT->state (verified on real chip) */ - } - } - break; + case EG_SUS: /* sustain phase */ + volume += eg_inc_val; + if ( volume >= MAX_ATT_INDEX ) + { + volume = MAX_ATT_INDEX; + /* do not change SLOT->state (verified on real chip) */ } + break; - case EG_REL: /* release phase */ + case EG_REL: /* release phase */ + volume += eg_inc_val; + if ( volume >= MAX_ATT_INDEX ) { - UINT32 pack = SLOT->eg_pack_rr; - UINT32 shift = pack>>24; - if ( !(eg_cnt & ((1<= MAX_ATT_INDEX ) - { - volume = MAX_ATT_INDEX; - SLOT->state = EG_OFF; - } - } - break; + volume = MAX_ATT_INDEX; + SLOT->state = EG_OFF; } + break; } SLOT->volume = volume; - return SLOT->tl + ((UINT32)volume); /* tl is 7bit<<3, volume 0-1023 (0-2039 total) */ + *vol_out = SLOT->tl + volume; /* tl is 7bit<<3, volume 0-1023 (0-2039 total) */ } #endif @@ -873,10 +847,10 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) ct->eg_timer -= EG_TIMER_OVERFLOW; ct->eg_cnt++; - if (ct->CH->SLOT[SLOT1].state != EG_OFF) ct->vol_out1 = update_eg_phase(&ct->CH->SLOT[SLOT1], ct->eg_cnt); - if (ct->CH->SLOT[SLOT2].state != EG_OFF) ct->vol_out2 = update_eg_phase(&ct->CH->SLOT[SLOT2], ct->eg_cnt); - if (ct->CH->SLOT[SLOT3].state != EG_OFF) ct->vol_out3 = update_eg_phase(&ct->CH->SLOT[SLOT3], ct->eg_cnt); - if (ct->CH->SLOT[SLOT4].state != EG_OFF) ct->vol_out4 = update_eg_phase(&ct->CH->SLOT[SLOT4], ct->eg_cnt); + if (ct->CH->SLOT[SLOT1].state != EG_OFF) update_eg_phase(&ct->vol_out1, &ct->CH->SLOT[SLOT1], ct->eg_cnt); + if (ct->CH->SLOT[SLOT2].state != EG_OFF) update_eg_phase(&ct->vol_out2, &ct->CH->SLOT[SLOT2], ct->eg_cnt); + if (ct->CH->SLOT[SLOT3].state != EG_OFF) update_eg_phase(&ct->vol_out3, &ct->CH->SLOT[SLOT3], ct->eg_cnt); + if (ct->CH->SLOT[SLOT4].state != EG_OFF) update_eg_phase(&ct->vol_out4, &ct->CH->SLOT[SLOT4], ct->eg_cnt); } if (ct->pack & 4) continue; /* output disabled */ @@ -1071,7 +1045,7 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) } else { buffer[scounter] += smp; } - ct->algo = 8; // algo is only used in asm, here only bit3 is used + ct->algo |= 8; } /* update phase counters AFTER output calculations */ diff --git a/pico/sound/ym2612.h b/pico/sound/ym2612.h index f5e98a0..73a36a8 100644 --- a/pico/sound/ym2612.h +++ b/pico/sound/ym2612.h @@ -43,10 +43,16 @@ typedef struct INT16 volume; /* #0x1a envelope counter | need_save */ UINT32 sl; /* #0x1c sustain level:sl_table[SL] */ - UINT32 eg_pack_ar; /* #0x20 (attack state) */ - UINT32 eg_pack_d1r; /* #0x24 (decay state) */ - UINT32 eg_pack_d2r; /* #0x28 (sustain state) */ - UINT32 eg_pack_rr; /* #0x2c (release state) */ + /* asm relies on this order: */ + union { + struct { + UINT32 eg_pack_rr; /* #0x20 1 (release state) */ + UINT32 eg_pack_d2r; /* #0x24 2 (sustain state) */ + UINT32 eg_pack_d1r; /* #0x28 3 (decay state) */ + UINT32 eg_pack_ar; /* #0x2c 4 (attack state) */ + }; + UINT32 eg_pack[4]; + }; } FM_SLOT; diff --git a/pico/sound/ym2612_arm.s b/pico/sound/ym2612_arm.s index 7db3122..9c436d4 100644 --- a/pico/sound/ym2612_arm.s +++ b/pico/sound/ym2612_arm.s @@ -30,103 +30,73 @@ .equiv EG_TIMER_OVERFLOW, (3*(1<= (INT32) SLOT->sl ) - movge r3, #EG_SUS strgeb r3, [r5,#0x17] @ state - b 4f + b 10f + +4: @ EG_ATT + subs r3, r3, #1 @ eg_inc_val_shift - 1 + mov r2, #0 + mvnpl r2, r0 + mov r2, r2, lsl r3 + add r0, r0, r2, asr #4 + cmp r0, #0 @ if (volume <= MIN_ATT_INDEX) + movle r3, #EG_DEC + strleb r3, [r5,#0x17] @ state + movle r0, #0 + b 10f 2: @ EG_SUS - ldr r2, [r5,#0x28] @ eg_pack_d2r (1ci) - mov r0, r2, lsr #24 - mov r3, r3, lsl r0 - sub r3, r3, #1 - tst r1, r3 - bne 5f @ do smth for tl problem (set on init?) - mov r3, r1, lsr r0 - ldrh r0, [r5,#0x1a] @ volume - and r3, r3, #7 - add r3, r3, r3, lsl #1 - mov r3, r2, lsr r3 - and r3, r3, #7 @ shift for eg_inc calculation - mov r2, #1 - mov r3, r2, lsl r3 - add r0, r0, r3, asr #1 mov r2, #1024 sub r2, r2, #1 @ r2 = MAX_ATT_INDEX cmp r0, r2 @ if ( volume >= MAX_ATT_INDEX ) movge r0, r2 - b 4f + b 10f -3: @ EG_REL - ldr r2, [r5,#0x2c] @ eg_pack_rr (1ci) - mov r0, r2, lsr #24 - mov r3, r3, lsl r0 - sub r3, r3, #1 - tst r1, r3 - bne 5f @ do smth for tl problem (set on init?) - mov r3, r1, lsr r0 - ldrh r0, [r5,#0x1a] @ volume - and r3, r3, #7 - add r3, r3, r3, lsl #1 - mov r3, r2, lsr r3 - and r3, r3, #7 @ shift for eg_inc calculation - mov r2, #1 - mov r3, r2, lsl r3 - add r0, r0, r3, asr #1 +1: @ EG_REL mov r2, #1024 sub r2, r2, #1 @ r2 = MAX_ATT_INDEX cmp r0, r2 @ if ( volume >= MAX_ATT_INDEX ) @@ -134,7 +104,7 @@ movge r3, #EG_OFF strgeb r3, [r5,#0x17] @ state -4: +10: @ finish ldrh r3, [r5,#0x18] @ tl strh r0, [r5,#0x1a] @ volume .if \slot == SLOT1 @@ -157,7 +127,7 @@ orr r7, r0, r7, lsr #16 .endif -5: +0: @ EG_OFF .endm @@ -187,28 +157,30 @@ tstne r12, #(1<<(\slot+8)) .if \slot == SLOT1 mov r1, r6, lsl #16 - mov r1, r1, lsr #17 + mov r1, r1, lsr #16 .elseif \slot == SLOT2 - mov r1, r6, lsr #17 + mov r1, r6, lsr #16 .elseif \slot == SLOT3 mov r1, r7, lsl #16 - mov r1, r1, lsr #17 + mov r1, r1, lsr #16 .elseif \slot == SLOT4 - mov r1, r7, lsr #17 + mov r1, r7, lsr #16 .endif andne r2, r12, #0xc0 movne r2, r2, lsr #6 addne r2, r2, #24 addne r1, r1, r12, lsr r2 + bic r1, r1, #1 .endm +@ \r=sin/result, r1=env, r3=ym_tl_tab .macro lookup_tl r tst \r, #0x100 eorne \r, \r, #0xff @ if (sin & 0x100) sin = 0xff - (sin&0xff); tst \r, #0x200 and \r, \r, #0xff - orr \r, \r, r1, lsl #8 + orr \r, \r, r1, lsl #7 mov \r, \r, lsl #1 ldrh \r, [r3, \r] @ 2ci if ne rsbne \r, \r, #0 @@ -345,9 +317,9 @@ make_eg_out SLOT3 cmp r1, #ENV_QUIET ldr r2, [lr, #0x38] @ mem (for future) - movcs r0, r2 + mov r0, #0 bcs 0f - ldr r0, [lr, #0x18] @ 1ci + ldr r0, [lr, #0x18] @ phase3 mov r0, r0, lsr #16 lookup_tl r0 @ r0=c2 @@ -370,13 +342,13 @@ cmp r1, #ENV_QUIET movcs r2, #0 bcs 2f - ldr r2, [lr, #0x14] + ldr r2, [lr, #0x14] @ phase2 mov r5, r10, lsr #17 add r2, r5, r2, lsr #16 lookup_tl r2 @ r2=mem 2: - str r2, [lr, #0x38] @ mem + str r2, [lr, #0x38] @ mem .endm @@ -541,9 +513,9 @@ movne r0, r0, asr #16 movne r0, r0, lsl r2 - ldr r2, [lr, #0x10] + ldr r2, [lr, #0x10] @ phase1 + add r0, r0, r2 mov r0, r0, lsr #16 - add r0, r0, r2, lsr #16 lookup_tl r0 mov r10,r10,lsl #16 @ ct->op1_out <<= 16; mov r0, r0, lsl #16 @@ -759,11 +731,18 @@ chan_render_loop: crl_loop_lfo: add r0, lr, #0x30 ldmia r0, {r1,r2} + + subs r4, r4, #0x100 + bmi crl_loop_end + add r2, r2, r1 str r2, [lr, #0x30] + @ r12=lfo_ampm[31:16], r1=lfo_cnt_old, r2=lfo_cnt advance_lfo_m + add r4, r4, #0x100 + crl_loop: subs r4, r4, #0x100 bmi crl_loop_end @@ -859,7 +838,6 @@ crl_algo6: crl_algo7: upd_algo7_m - .pool crl_algo_done: @@ -917,6 +895,7 @@ crl_do_phase: crl_loop_end: +@ stmia lr, {r6,r7} @ save volumes (for debug) str r8, [lr, #0x44] @ eg_timer str r12, [lr, #0x4c] @ pack (for lfo_ampm) str r4, [lr, #0x50] @ was_update @@ -925,3 +904,4 @@ crl_loop_end: .pool +@ vim:filetype=armasm -- 2.39.2