From 30969671e53b7e7b55aa9923cf742575f00b1e84 Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 21 Dec 2020 23:22:00 +0100 Subject: [PATCH] sound, improved and optimized reimplementation of libretro lowpass filter --- pico/sound/mix.c | 55 +++++++++++----- pico/sound/mix.h | 2 +- pico/sound/mix_arm.S | 117 +++++++++++++++++++++++++++-------- pico/sound/sound.c | 81 +----------------------- platform/libretro/libretro.c | 9 +++ 5 files changed, 141 insertions(+), 123 deletions(-) diff --git a/pico/sound/mix.c b/pico/sound/mix.c index 4b4bbdd8..58e9c8c7 100644 --- a/pico/sound/mix.c +++ b/pico/sound/mix.c @@ -1,6 +1,7 @@ /* * some code for sample mixing * (C) notaz, 2006,2007 + * (C) kub, 2019,2020 added filtering * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -13,78 +14,97 @@ /* limitter */ #define Limit16(val) \ + val -= val >> 2; /* reduce level to avoid clipping */ \ if ((short)val != val) val = (val < 0 ? MINOUT : MAXOUT) int mix_32_to_16l_level; -static struct iir2 { // 2-pole IIR - int x[2]; // sample buffer +static struct iir { + int alpha; // alpha for EMA low pass int y[2]; // filter intermediates - int i; } lfi2, rfi2; // NB ">>" rounds to -infinity, "/" to 0. To compensate the effect possibly use // "-(-y>>n)" (round to +infinity) instead of "y>>n" in places. -// NB uses Q12 fixpoint; samples mustn't have more than 20 bits for this. +// NB uses fixpoint; samples mustn't have more than (32-QB) bits. Adding the +// outputs of the sound sources together yields a max. of 18 bits, restricting +// QB to a maximum of 14. #define QB 12 +// NB alpha for DC filtering shouldn't be smaller than 1/(1<y[0] += ((x<<(QB-8)) - (fi2->y[0]>>8)) * fi2->alpha; + fi2->y[0] += (x - (fi2->y[0]>>QB)) * fi2->alpha; + // DC filter. for alpha=1-1/8192 cutoff ~1HZ, for 1-1/1024 ~7Hz + fi2->y[1] += (fi2->y[0] - fi2->y[1]) >> QB; + return (fi2->y[0] - fi2->y[1]) >> QB; +} + // exponential moving average filter for DC filtering -// y[n] = (x[n]-y[n-1])*(1/8192) (corner approx. 20Hz, gain 1) -static inline int filter_exp(struct iir2 *fi2, int x) +// y[n] = (x[n]-y[n-1])*(1-1/8192) (corner approx. 1Hz, gain 1) +static inline int filter_exp(struct iir *fi2, int x) { - int xf = (x<y[0]; - fi2->y[0] += xf >> 13; - xf -= xf >> 2; // level reduction to avoid clipping from overshoot - return xf>>QB; + fi2->y[1] += ((x << QB) - fi2->y[1]) >> QB; + return x - (fi2->y[1] >> QB); } // unfiltered (for testing) -static inline int filter_null(struct iir2 *fi2, int x) +static inline int filter_null(struct iir *fi2, int x) { return x; } +#define filter filter_band + #define mix_32_to_16l_stereo_core(dest, src, count, lv, fl) { \ int l, r; \ + struct iir lf = lfi2, rf = rfi2; \ \ for (; count > 0; count--) \ { \ l = r = *dest; \ l += *src++ >> lv; \ r += *src++ >> lv; \ - l = fl(&lfi2, l); \ - r = fl(&rfi2, r); \ + l = fl(&lf, l); \ + r = fl(&rf, r); \ Limit16(l); \ Limit16(r); \ *dest++ = l; \ *dest++ = r; \ } \ + lfi2 = lf, rfi2 = rf; \ } void mix_32_to_16l_stereo_lvl(short *dest, int *src, int count) { - mix_32_to_16l_stereo_core(dest, src, count, mix_32_to_16l_level, filter_exp); + mix_32_to_16l_stereo_core(dest, src, count, mix_32_to_16l_level, filter); } void mix_32_to_16l_stereo(short *dest, int *src, int count) { - mix_32_to_16l_stereo_core(dest, src, count, 0, filter_exp); + mix_32_to_16l_stereo_core(dest, src, count, 0, filter); } void mix_32_to_16_mono(short *dest, int *src, int count) { int l; + struct iir lf = lfi2; for (; count > 0; count--) { l = *dest; l += *src++; - l = filter_exp(&lfi2, l); + l = filter(&lf, l); Limit16(l); *dest++ = l; } + lfi2 = lf; } @@ -118,8 +138,9 @@ void mix_16h_to_32_s2(int *dest_buf, short *mp3_buf, int count) } } -void mix_reset(void) +void mix_reset(int alpha_q16) { memset(&lfi2, 0, sizeof(lfi2)); memset(&rfi2, 0, sizeof(rfi2)); + lfi2.alpha = rfi2.alpha = (0x10000-alpha_q16) >> 4; // filter alpha, Q12 } diff --git a/pico/sound/mix.h b/pico/sound/mix.h index e128bad1..a0dfcac7 100644 --- a/pico/sound/mix.h +++ b/pico/sound/mix.h @@ -8,4 +8,4 @@ void mix_32_to_16_mono(short *dest, int *src, int count); extern int mix_32_to_16l_level; void mix_32_to_16l_stereo_lvl(short *dest, int *src, int count); -void mix_reset(void); +void mix_reset(int alpha_q16); diff --git a/pico/sound/mix_arm.S b/pico/sound/mix_arm.S index a1558d74..60438988 100644 --- a/pico/sound/mix_arm.S +++ b/pico/sound/mix_arm.S @@ -154,34 +154,46 @@ m16_32_s2_no_unal2: @ limit -@ reg=int_sample, lr=1, r3=tmp, kills flags +@ reg=int_sample, r12=1, r8=tmp, kills flags .macro Limit reg - add r3, lr, \reg, asr #15 - bics r3, r3, #1 @ in non-overflow conditions r3 is 0 or 1 + sub \reg, \reg, \reg, asr #2 @ reduce audio lvl some to avoid clipping + add r8, r12, \reg, asr #15 + bics r8, r8, #1 @ in non-overflow conditions r8 is 0 or 1 movne \reg, #0x8000 subpl \reg, \reg, #1 .endm - @ limit and shift up by 16 -@ reg=int_sample, lr=1, r3=tmp, kills flags +@ reg=int_sample, r12=1, r8=tmp, kills flags .macro Limitsh reg - add r3, lr, \reg, asr #15 - bics r3, r3, #1 @ in non-overflow conditions r3 is 0 or 1 + sub \reg, \reg, \reg, asr #2 @ reduce audio lvl some to avoid clipping + add r8, r12,\reg, asr #15 + bics r8, r8, #1 @ in non-overflow conditions r8 is 0 or 1 moveq \reg, \reg, lsl #16 movne \reg, #0x80000000 subpl \reg, \reg, #0x00010000 .endm + @ filter out DC offset -@ in=int_sample (max 20 bit), y=filter memory, r3=tmp +@ in=int_sample (max 20 bit), y=filter memory, r8=tmp .macro DCfilt in y - rsb r3, \y, \in, lsl #12 @ fixpoint 20.12 - add \y, \y, r3, asr #13 - sub r3, r3, r3, asr #2 @ reduce audio lvl some - asr \in, r3, #12 + rsb r8, \y, \in, lsl #12 @ fixpoint 20.12 + add \y, \y, r8, asr #12 @ alpha = 1-1/4094 + sub \in, \in, \y, asr #12 +.endm + +@ lowpass filter +@ in=int_sample (max 20 bit), y=filter memory, r12=alpha(Q8), r8=tmp +.macro LPfilt in y +@ asr r8, \y, #8 +@ rsb r8, r8, \in, lsl #4 @ fixpoint 20.12 + sub r8, \in, \y, asr #12 @ fixpoint 20.12 + mla \y, r8, r12, \y + asr \in, \y, #12 .endm + @ mix 32bit audio (with 16bits really used, upper bits indicate overflow) with normal 16 bit audio with left channel only @ warning: this function assumes dest is word aligned .global mix_32_to_16l_stereo @ short *dest, int *src, int count @@ -193,9 +205,10 @@ mix_32_to_16l_stereo: subs r2, r2, #4 bmi m32_16l_st_end - mov lr, #1 ldr r12, =filter - ldmia r12, {r10-r11} + ldr r8, [r12], #4 + ldmia r12, {r3,r10-r11,lr} + str r8, [sp, #-4]! m32_16l_st_loop: ldmia r0, {r8,r12} @@ -206,10 +219,16 @@ m32_16l_st_loop: add r5, r5, r8, asr #16 add r6, r6, r12,asr #16 add r7, r7, r12,asr #16 + ldr r12,[sp] + LPfilt r4, r3 + LPfilt r5, lr + LPfilt r6, r3 + LPfilt r7, lr DCfilt r4, r10 DCfilt r5, r11 DCfilt r6, r10 DCfilt r7, r11 + mov r12,#1 Limitsh r4 Limitsh r5 Limitsh r6 @@ -228,8 +247,12 @@ m32_16l_st_end: ldmia r1!,{r4,r5} add r4, r4, r6 add r5, r5, r6 + ldr r12,[sp] + LPfilt r4, r3 + LPfilt r5, lr DCfilt r4, r10 DCfilt r5, r11 + mov r12,#1 Limitsh r4 Limitsh r5 orr r4, r5, r4, lsr #16 @@ -237,7 +260,9 @@ m32_16l_st_end: m32_16l_st_no_unal2: ldr r12, =filter - stmia r12, {r10-r11} + add r12,r12, #4 + stmia r12, {r3,r10-r11,lr} + add sp, sp, #4 ldmfd sp!, {r4-r8,r10-r11,lr} bx lr @@ -248,9 +273,10 @@ m32_16l_st_no_unal2: mix_32_to_16_mono: stmfd sp!, {r4-r8,r10-r11,lr} - mov lr, #1 ldr r12, =filter - ldr r10, [r12] + ldr r8, [r12], #4 + ldmia r12, {r10-r11} + str r8, [sp, #-4]! @ check if dest is word aligned tst r0, #2 @@ -259,6 +285,10 @@ mix_32_to_16_mono: ldr r4, [r1], #4 sub r2, r2, #1 add r4, r4, r5 + ldr r12,[sp] + LPfilt r4, r11 + DCfilt r4, r10 + mov r12,#1 Limit r4 strh r4, [r0], #2 @@ -275,10 +305,16 @@ m32_16_mo_loop: add r7, r7, r12,asr #16 mov r12,r12,lsl #16 add r6, r6, r12,asr #16 + ldr r12,[sp] + LPfilt r4, r11 + LPfilt r5, r11 + LPfilt r6, r11 + LPfilt r7, r11 DCfilt r4, r10 DCfilt r5, r10 DCfilt r6, r10 DCfilt r7, r10 + mov r12,#1 Limitsh r4 Limitsh r5 Limitsh r6 @@ -298,8 +334,12 @@ m32_16_mo_end: add r5, r5, r6, asr #16 mov r6, r6, lsl #16 add r4, r4, r6, asr #16 + ldr r12,[sp] + LPfilt r4, r11 + LPfilt r5, r11 DCfilt r4, r10 DCfilt r5, r10 + mov r12,#1 Limitsh r4 Limitsh r5 orr r4, r5, r4, lsr #16 @@ -311,13 +351,18 @@ m32_16_mo_no_unal2: ldrsh r5, [r0] ldr r4, [r1], #4 add r4, r4, r5 + ldr r12,[sp] + LPfilt r4, r11 DCfilt r4, r10 + mov r12,#1 Limit r4 strh r4, [r0], #2 m32_16_mo_no_unal: ldr r12, =filter - str r10, [r12] + add r12,r12, #4 + stmia r12, {r10-r11} + add sp, sp, #4 ldmfd sp!, {r4-r8,r10-r11,lr} bx lr @@ -344,7 +389,9 @@ mix_32_to_16l_stereo_lvl: mov lr, #1 ldr r9, [r9] ldr r12, =filter - ldm r12, {r10-r11} + ldr r8, [r12], #4 + ldmia r12, {r3,r10-r11,lr} + str r8, [sp, #-4]! mov r2, r2, lsl #1 subs r2, r2, #4 @@ -363,10 +410,16 @@ m32_16l_st_l_loop: mov r5, r5, asr r9 mov r6, r6, asr r9 mov r7, r7, asr r9 + ldr r12,[sp] + LPfilt r4, r3 + LPfilt r5, lr + LPfilt r6, r3 + LPfilt r7, lr DCfilt r4, r10 DCfilt r5, r11 DCfilt r6, r10 DCfilt r7, r11 + mov r12,#1 Limitsh r4 Limitsh r5 Limitsh r6 @@ -387,8 +440,12 @@ m32_16l_st_l_end: add r5, r5, r6 mov r4, r4, asr r9 mov r5, r5, asr r9 + ldr r12,[sp] + LPfilt r4, r3 + LPfilt r5, lr DCfilt r4, r10 DCfilt r5, r11 + mov r12,#1 Limitsh r4 Limitsh r5 orr r4, r5, r4, lsr #16 @@ -396,22 +453,32 @@ m32_16l_st_l_end: m32_16l_st_l_no_unal2: ldr r12, =filter - stmia r12, {r10-r11} + add r12,r12, #4 + stmia r12, {r3,r10-r11,lr} + add sp, sp, #4 ldmfd sp!, {r4-r11,lr} bx lr #endif /* __GP2X__ */ -.global mix_reset @ void +.global mix_reset @ int alpha_q16 mix_reset: - ldr r0, =filter + ldr r2, =filter + rsb r0, r0, #0x10000 +@ asr r0, r0, #8 + asr r0, r0, #4 + str r0, [r2], #4 mov r1, #0 - str r1, [r0] - str r1, [r0, #4] + str r1, [r2], #4 + str r1, [r2], #4 + str r1, [r2], #4 + str r1, [r2], #4 bx lr .data filter: - .ds 8 + .ds 4 @ alpha_q8 + .ds 8 @ filter history for left channel + .ds 8 @ filter history for right channel @ vim:filetype=armasm diff --git a/pico/sound/sound.c b/pico/sound/sound.c index 678330a0..ad748688 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -26,73 +26,6 @@ short cdda_out_buffer[2*1152]; // sn76496 extern int *sn76496_regs; -// Low pass filter 'previous' samples -static int32_t lpf_lp; -static int32_t lpf_rp; - -static void low_pass_filter_stereo(int *buf32, int length) -{ - int samples = length; - int *out32 = buf32; - // Restore previous samples - int32_t lpf_l = lpf_lp; - int32_t lpf_r = lpf_rp; - - // Single-pole low-pass filter (6 dB/octave) - int32_t factor_a = PicoIn.sndFilterRange; - int32_t factor_b = 0x10000 - factor_a; - - do - { - // Apply low-pass filter - lpf_l = (lpf_l * factor_a) + (out32[0] * factor_b); - lpf_r = (lpf_r * factor_a) + (out32[1] * factor_b); - - // 16.16 fixed point - lpf_l >>= 16; - lpf_r >>= 16; - - // Update sound buffer - *out32++ = lpf_l; - *out32++ = lpf_r; - } - while (--samples); - - // Save last samples for next frame - lpf_lp = lpf_l; - lpf_rp = lpf_r; -} - -static void low_pass_filter_mono(int *buf32, int length) -{ - int samples = length; - int *out32 = buf32; - // Restore previous sample - int32_t lpf_l = lpf_lp; - - // Single-pole low-pass filter (6 dB/octave) - int32_t factor_a = PicoIn.sndFilterRange; - int32_t factor_b = 0x10000 - factor_a; - - do - { - // Apply low-pass filter - lpf_l = (lpf_l * factor_a) + (out32[0] * factor_b); - - // 16.16 fixed point - lpf_l >>= 16; - - // Update sound buffer - *out32++ = lpf_l; - } - while (--samples); - - // Save last sample for next frame - lpf_lp = lpf_l; -} - -void (*low_pass_filter)(int *buf32, int length) = low_pass_filter_stereo; - // ym2413 #define YM2413_CLK 3579545 OPLL old_opll; @@ -119,11 +52,7 @@ PICO_INTERNAL void PsndReset(void) PsndRerate(0); timers_reset(); - // Reset low pass filter - lpf_lp = 0; - lpf_rp = 0; - - mix_reset(); + mix_reset(PicoIn.sndFilter ? PicoIn.sndFilterRange : 0); } @@ -179,9 +108,6 @@ void PsndRerate(int preserve_state) // set mixer PsndMix_32_to_16l = (PicoIn.opt & POPT_EN_STEREO) ? mix_32_to_16l_stereo : mix_32_to_16_mono; - // set low pass filter - low_pass_filter = (PicoIn.opt & POPT_EN_STEREO) ? low_pass_filter_stereo : low_pass_filter_mono; - if (PicoIn.AHW & PAHW_PICO) PicoReratePico(); } @@ -463,11 +389,6 @@ static int PsndRender(int offset, int length) if ((PicoIn.AHW & PAHW_32X) && (PicoIn.opt & POPT_EN_PWM)) p32x_pwm_update(buf32, length-offset, stereo); - // Apply low pass filter, if required - if (PicoIn.sndFilter == 1) { - low_pass_filter(buf32, length); - } - // convert + limit to normal 16bit output PsndMix_32_to_16l(PicoIn.sndOut+(offset< #include #include +#include #include "../common/input_pico.h" #include "../common/version.h" #include @@ -1434,6 +1435,8 @@ static void update_variables(bool first_run) unsigned old_frameskip_type; int old_vout_format; double new_sound_rate; + unsigned short old_snd_filter; + int32_t old_snd_filter_range; var.value = NULL; var.key = "picodrive_input1"; @@ -1539,6 +1542,7 @@ static void update_variables(bool first_run) PicoIn.opt &= ~POPT_EN_DRC; #endif + old_snd_filter = PicoIn.sndFilter; var.value = NULL; var.key = "picodrive_audio_filter"; PicoIn.sndFilter = 0; @@ -1547,6 +1551,7 @@ static void update_variables(bool first_run) PicoIn.sndFilter = 1; } + old_snd_filter_range = PicoIn.sndFilterRange; var.value = NULL; var.key = "picodrive_lowpass_range"; PicoIn.sndFilterRange = (60 * 65536) / 100; @@ -1554,6 +1559,10 @@ static void update_variables(bool first_run) PicoIn.sndFilterRange = (atoi(var.value) * 65536) / 100; } + if (old_snd_filter != PicoIn.sndFilter || old_snd_filter_range != PicoIn.sndFilterRange) { + mix_reset(PicoIn.sndFilter ? PicoIn.sndFilterRange : 0); + } + old_frameskip_type = frameskip_type; frameskip_type = 0; var.key = "picodrive_frameskip"; -- 2.39.2