From: kub Date: Thu, 31 Mar 2022 17:27:49 +0000 (+0000) Subject: sound, prepare FM filtering X-Git-Tag: v2.00~324 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e2e2b6ad1bc26ae05a10b5ae7c9acaeecfc0d899;p=picodrive.git sound, prepare FM filtering --- diff --git a/pico/pico.h b/pico/pico.h index dc596796..1baf4438 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -76,6 +76,7 @@ extern void *p32x_bios_g, *p32x_bios_m, *p32x_bios_s; #define POPT_PWM_IRQ_OPT (1<<22) #define POPT_DIS_FM_SSGEG (1<<23) #define POPT_EN_FM_DAC (1<<24) //x00 0000 +#define POPT_EN_FM_FILTER (1<<25) #define PAHW_MCD (1<<0) #define PAHW_32X (1<<1) diff --git a/pico/pico_int.h b/pico/pico_int.h index e95060b0..837953cf 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -465,6 +465,7 @@ struct PicoSound unsigned int fm_pos; // last FM position in Q20 unsigned int psg_pos; // last PSG position in Q16 unsigned int ym2413_pos; // last YM2413 position + unsigned int fm_fir_mul, fm_fir_div; // ratio for FM resampling FIR }; // run tools/mkoffsets pico/pico_int_offs.h if you change these diff --git a/pico/sound/blipper.c b/pico/sound/blipper.c new file mode 100644 index 00000000..72744718 --- /dev/null +++ b/pico/sound/blipper.c @@ -0,0 +1,540 @@ +/* + * Copyright (C) 2013 - Hans-Kristian Arntzen + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and + * associated documentation files (the "Software"), + * to deal in the Software without restriction, + * including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * + * 03-2022 kub: modified for arbitrary decimation rates + * 03-2022 kub: modified for 32 bit sample size + */ + +#include "blipper.h" + +#include +#include +#include +#include + +#define BLIPPER_FILTER_AMP 0.75 + +#if BLIPPER_LOG_PERFORMANCE +#include +static double get_time(void) +{ + struct timespec tv; + clock_gettime(CLOCK_MONOTONIC, &tv); + return tv.tv_sec + tv.tv_nsec / 1000000000.0; +} +#endif + +struct blipper +{ + blipper_long_sample_t *output_buffer; + unsigned output_avail; + unsigned output_buffer_samples; + + blipper_sample_t *filter_bank; + + unsigned phase; + unsigned phases; + unsigned phases_div; + unsigned taps; + + blipper_long_sample_t integrator; + blipper_long_sample_t ramp; + blipper_long_sample_t last_sample; + +#if BLIPPER_LOG_PERFORMANCE + double total_time; + double integrator_time; + unsigned long total_samples; +#endif + + int owns_filter; +}; + +void blipper_free(blipper_t *blip) +{ + if (blip) + { +#if BLIPPER_LOG_PERFORMANCE + fprintf(stderr, "[blipper]: Processed %lu samples, using %.6f seconds blipping and %.6f seconds integrating.\n", blip->total_samples, blip->total_time, blip->integrator_time); +#endif + + if (blip->owns_filter) + free(blip->filter_bank); + free(blip->output_buffer); + free(blip); + } +} + +static double besseli0(double x) +{ + unsigned i; + double sum = 0.0; + + double factorial = 1.0; + double factorial_mult = 0.0; + double x_pow = 1.0; + double two_div_pow = 1.0; + double x_sqr = x * x; + + /* Approximate. This is an infinite sum. + * Luckily, it converges rather fast. */ + for (i = 0; i < 18; i++) + { + sum += x_pow * two_div_pow / (factorial * factorial); + + factorial_mult += 1.0; + x_pow *= x_sqr; + two_div_pow *= 0.25; + factorial *= factorial_mult; + } + + return sum; +} + +static double sinc(double v) +{ + if (fabs(v) < 0.00001) + return 1.0; + else + return sin(v) / v; +} + +/* index range = [-1, 1) */ +static double kaiser_window(double index, double beta) +{ + return besseli0(beta * sqrt(1.0 - index * index)); +} + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +static blipper_real_t *blipper_create_sinc(unsigned phases, unsigned taps, + double cutoff, double beta) +{ + unsigned i, filter_len; + double sidelobes, window_mod, window_phase, sinc_phase; + blipper_real_t *filter; + + filter = (blipper_real_t*)malloc(phases * taps * sizeof(*filter)); + if (!filter) + return NULL; + + sidelobes = taps / 2.0; + window_mod = 1.0 / kaiser_window(0.0, beta); + filter_len = phases * taps; + for (i = 0; i < filter_len; i++) + { + window_phase = (double)i / filter_len; /* [0, 1) */ + window_phase = 2.0 * window_phase - 1.0; /* [-1, 1) */ + sinc_phase = window_phase * sidelobes; /* [-taps / 2, taps / 2) */ + + filter[i] = cutoff * sinc(M_PI * sinc_phase * cutoff) * + kaiser_window(window_phase, beta) * window_mod; + } + + return filter; +} + +void blipper_set_ramp(blipper_t *blip, blipper_long_sample_t delta, + unsigned clocks) +{ + blipper_real_t ramp = BLIPPER_FILTER_AMP * delta * blip->phases / clocks; +#if BLIPPER_FIXED_POINT + blip->ramp = (blipper_long_sample_t)floor(ramp * 0x8000 + 0.5); +#else + blip->ramp = ramp; +#endif +} + +/* We differentiate and integrate at different sample rates. + * Differentiation is D(z) = 1 - z^-1 and happens when delta impulses + * are convolved. Integration step after decimation by D is 1 / (1 - z^-D). + * + * If our sinc filter is S(z) we'd have a response of + * S(z) * (1 - z^-1) / (1 - z^-D) after blipping. + * + * Compensate by prefiltering S(z) with the inverse (1 - z^-D) / (1 - z^-1). + * This filtering creates a finite length filter, albeit slightly longer. + * + * phases is the same as decimation rate. */ +static blipper_real_t *blipper_prefilter_sinc(blipper_real_t *filter, unsigned phases, + unsigned taps) +{ + unsigned i; + float filter_amp = BLIPPER_FILTER_AMP / phases; + blipper_real_t *tmp_filter; + blipper_real_t *new_filter = (blipper_real_t*)malloc((phases * taps + phases) * sizeof(*filter)); + if (!new_filter) + goto error; + + tmp_filter = (blipper_real_t*)realloc(filter, (phases * taps + phases) * sizeof(*filter)); + if (!tmp_filter) + goto error; + filter = tmp_filter; + + /* Integrate. */ + new_filter[0] = filter[0]; + for (i = 1; i < phases * taps; i++) + new_filter[i] = new_filter[i - 1] + filter[i]; + for (i = phases * taps; i < phases * taps + phases; i++) + new_filter[i] = new_filter[phases * taps - 1]; + + taps++; + + /* Differentiate with offset of D. */ + memcpy(filter, new_filter, phases * sizeof(*filter)); + for (i = phases; i < phases * taps; i++) + filter[i] = new_filter[i] - new_filter[i - phases]; + + /* blipper_prefilter_sinc() boosts the gain of the sinc. + * Have to compensate for this. Attenuate a bit more to ensure + * we don't clip, especially in fixed point. */ + for (i = 0; i < phases * taps; i++) + filter[i] *= filter_amp; + + free(new_filter); + return filter; + +error: + free(new_filter); + free(filter); + return NULL; +} + +/* Creates a polyphase filter bank. + * Interleaves the filter for cache coherency and possibilities + * for SIMD processing. */ +static blipper_real_t *blipper_interleave_sinc(blipper_real_t *filter, unsigned phases, + unsigned taps) +{ + unsigned t, p; + blipper_real_t *new_filter = (blipper_real_t*)malloc(phases * taps * sizeof(*filter)); + if (!new_filter) + goto error; + + for (t = 0; t < taps; t++) + for (p = 0; p < phases; p++) + new_filter[p * taps + t] = filter[t * phases + p]; + + free(filter); + return new_filter; + +error: + free(new_filter); + free(filter); + return NULL; +} + +#if BLIPPER_FIXED_POINT +static blipper_sample_t *blipper_quantize_sinc(blipper_real_t *filter, unsigned taps) +{ + unsigned t; + blipper_sample_t *filt = (blipper_sample_t*)malloc(taps * sizeof(*filt)); + if (!filt) + goto error; + + for (t = 0; t < taps; t++) + filt[t] = (blipper_sample_t)floor(filter[t] * 0x7fff + 0.5); + + free(filter); + return filt; + +error: + free(filter); + free(filt); + return NULL; +} +#endif + +blipper_sample_t *blipper_create_filter_bank(unsigned phases, unsigned taps, + double cutoff, double beta) +{ + blipper_real_t *sinc_filter; + + /* blipper_prefilter_sinc() will add one tap. + * To keep number of taps as expected, compensate for it here + * to keep the interface more obvious. */ + if (taps <= 1) + return 0; + taps--; + + sinc_filter = blipper_create_sinc(phases, taps, cutoff, beta); + if (!sinc_filter) + return 0; + + sinc_filter = blipper_prefilter_sinc(sinc_filter, phases, taps); + if (!sinc_filter) + return 0; + taps++; + + sinc_filter = blipper_interleave_sinc(sinc_filter, phases, taps); + if (!sinc_filter) + return 0; + +#if BLIPPER_FIXED_POINT + return blipper_quantize_sinc(sinc_filter, phases * taps); +#else + return sinc_filter; +#endif +} + +void blipper_reset(blipper_t *blip) +{ + blip->phase = 0; + memset(blip->output_buffer, 0, + (blip->output_avail + blip->taps) * sizeof(*blip->output_buffer)); + blip->output_avail = 0; + blip->last_sample = 0; + blip->integrator = 0; + blip->ramp = 0; +} + +blipper_t *blipper_new(unsigned taps, double cutoff, double beta, + unsigned decimation, unsigned buffer_samples, + const blipper_sample_t *filter_bank) +{ + blipper_t *blip = NULL; + + /* Sanity check. Not strictly required to be supported in C. */ + if ((-3 >> 2) != -1) + { + fprintf(stderr, "Integer right shift not supported.\n"); + return NULL; + } + + blip = (blipper_t*)calloc(1, sizeof(*blip)); + if (!blip) + return NULL; + + blip->phases = decimation; + blip->phases_div = 0x100000000ULL/decimation; + + blip->taps = taps; + + if (!filter_bank) + { + blip->filter_bank = blipper_create_filter_bank(blip->phases, taps, cutoff, beta); + if (!blip->filter_bank) + goto error; + blip->owns_filter = 1; + } + else + blip->filter_bank = (blipper_sample_t*)filter_bank; + + blip->output_buffer = (blipper_long_sample_t*)calloc(buffer_samples + blip->taps, + sizeof(*blip->output_buffer)); + if (!blip->output_buffer) + goto error; + blip->output_buffer_samples = buffer_samples + blip->taps; + + return blip; + +error: + blipper_free(blip); + return NULL; +} + +inline void blipper_push_delta(blipper_t *blip, blipper_long_sample_t delta, unsigned clocks_step) +{ + unsigned target_output, filter_phase, taps, i; + const blipper_sample_t *response; + blipper_long_sample_t *target; + + blip->phase += clocks_step; + + target_output = ((unsigned long long)blip->phase * blip->phases_div) >> 32; + + filter_phase = (target_output * blip->phases) - blip->phase; + if (filter_phase >= blip->phases) // rounding error for *(1/phases) + filter_phase += blip->phases, target_output ++; + response = blip->filter_bank + blip->taps * filter_phase; + + target = blip->output_buffer + target_output; + taps = blip->taps; + + blip->output_avail = target_output; + + for (i = 1; i < taps; i += 2) { + target[i-1] += delta * response[i-1]; + target[i ] += delta * response[i ]; + } + if (taps & 1) + target[i-1] += delta * response[i-1]; +} + +static inline void _blipper_push_samples(blipper_t *blip, + const char *data, blipper_long_sample_t (*get)(const char *), + unsigned samples, unsigned stride, unsigned clocks_step) +{ + unsigned s; + unsigned clocks_skip = 0; + blipper_long_sample_t last = blip->last_sample; + +#if BLIPPER_LOG_PERFORMANCE + double t0 = get_time(); +#endif + + for (s = 0; s < samples; s++, data += stride) + { + blipper_long_sample_t val = get(data); + clocks_skip += clocks_step; + if (val != last) + { + blipper_push_delta(blip, val - last, clocks_skip); + clocks_skip = 0; + last = val; + } + } + + blip->phase += clocks_skip; + blip->output_avail = ((unsigned long long)blip->phase * blip->phases_div) >> 32; + if ((blip->output_avail+1) * blip->phases <= blip->phase) + blip->output_avail++; // rounding error for *(1/phases) + blip->last_sample = last; + +#if BLIPPER_LOG_PERFORMANCE + blip->total_time += get_time() - t0; + blip->total_samples += samples; +#endif +} + +static inline blipper_long_sample_t _blipper_get_short(const char *data) +{ + return *(blipper_sample_t *)data; +} + +static inline blipper_long_sample_t _blipper_get_long(const char *data) +{ + return *(blipper_long_sample_t *)data; +} + +void blipper_push_samples(blipper_t *blip, const blipper_sample_t *data, + unsigned samples, unsigned stride, unsigned clocks_step) +{ + _blipper_push_samples(blip, (const char *)data, _blipper_get_short, samples, + stride * sizeof(*data), clocks_step); +} + +void blipper_push_long_samples(blipper_t *blip, const blipper_long_sample_t *data, + unsigned samples, unsigned stride, unsigned clocks_step) +{ + _blipper_push_samples(blip, (const char *)data, _blipper_get_long, samples, + stride * sizeof(*data), clocks_step); +} + +unsigned blipper_read_phase(blipper_t *blip) +{ + return blip->phase; +} + +unsigned blipper_read_avail(blipper_t *blip) +{ + return blip->output_avail; +} + +static inline void _blipper_put_short(char *data, blipper_long_sample_t val) +{ + *(blipper_sample_t *)data = val; +} + +static inline void _blipper_put_long(char *data, blipper_long_sample_t val) +{ + *(blipper_long_sample_t *)data = val; +} + +static inline void _blipper_read(blipper_t *blip, int clamp, char *output, + void (*put)(char *, blipper_long_sample_t), unsigned samples, unsigned stride) +{ + unsigned s; + blipper_long_sample_t sum = blip->integrator; + const blipper_long_sample_t *out = blip->output_buffer; + blipper_long_sample_t ramp = blip->ramp; + +#if BLIPPER_LOG_PERFORMANCE + double t0 = get_time(); +#endif + +#if BLIPPER_FIXED_POINT + for (s = 0; s < samples; s++, output += stride) + { + blipper_long_sample_t quant; + + /* Cannot overflow. Also add a leaky integrator. + Mitigates DC shift numerical instability which is + inherent for integrators. */ + sum += ((out[s] + ramp) >> 1) - (sum >> 9); + + /* Rounded. With leaky integrator, this cannot overflow. */ + quant = (sum + 0x4000) >> 15; + + /* Clamp. quant can potentially have range [-0x10000, 0xffff] here. + * In both cases, top 16-bits will have a uniform bit pattern which can be exploited. */ + if (clamp && (blipper_sample_t)quant != quant) + { + quant = (quant >> 16) ^ 0x7fff; + sum = quant << 15; + } + + put(output, quant); + } +#else + for (s = 0; s < samples; s++, output += stride) + { + /* Leaky integrator, same as fixed point (1.0f / 512.0f) */ + sum += out[s] + ramp - sum * 0.00195f; + put(output, sum); + } +#endif + + /* Don't bother with ring buffering. + * The entire buffer should be read out ideally anyways. */ + memmove(blip->output_buffer, blip->output_buffer + samples, + (blip->output_avail + blip->taps - samples) * sizeof(*out)); + memset(blip->output_buffer + blip->output_avail + blip->taps - samples, 0, samples * sizeof(*out)); + blip->output_avail -= samples; + blip->phase -= samples * blip->phases; + + blip->integrator = sum; + +#if BLIPPER_LOG_PERFORMANCE + blip->integrator_time += get_time() - t0; +#endif +} + +void blipper_read(blipper_t *blip, blipper_sample_t *output, unsigned samples, + unsigned stride) +{ + _blipper_read(blip, 1, (char *)output, _blipper_put_short, samples, + stride * sizeof(*output)); +} + +void blipper_read_long(blipper_t *blip, blipper_long_sample_t *output, unsigned samples, + unsigned stride) +{ + _blipper_read(blip, 0, (char *)output, _blipper_put_long, samples, + stride * sizeof(*output)); +} diff --git a/pico/sound/blipper.h b/pico/sound/blipper.h new file mode 100644 index 00000000..20b75975 --- /dev/null +++ b/pico/sound/blipper.h @@ -0,0 +1,195 @@ +/* + * Copyright (C) 2013 - Hans-Kristian Arntzen + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and + * associated documentation files (the "Software"), + * to deal in the Software without restriction, + * including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BLIPPER_H__ +#define BLIPPER_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Compile time configurables. */ +#ifndef BLIPPER_LOG_PERFORMANCE +#define BLIPPER_LOG_PERFORMANCE 0 +#endif + +#ifndef BLIPPER_FIXED_POINT +#define BLIPPER_FIXED_POINT 1 +#endif + +/* Set to float or double. + * long double is unlikely to provide any improved precision. */ +#ifndef BLIPPER_REAL_T +#define BLIPPER_REAL_T float +#endif + +/* Allows including several implementations in one lib. */ +#if BLIPPER_FIXED_POINT +#define BLIPPER_MANGLE(x) x##_fixed +#else +#define BLIPPER_CONCAT2(a, b) a ## b +#define BLIPPER_CONCAT(a, b) BLIPPER_CONCAT2(a, b) +#define BLIPPER_MANGLE(x) BLIPPER_CONCAT(x##_, BLIPPER_REAL_T) +#endif + +#include + +typedef struct blipper blipper_t; +typedef BLIPPER_REAL_T blipper_real_t; + +#if BLIPPER_FIXED_POINT +#ifdef HAVE_STDINT_H +#include +typedef int16_t blipper_sample_t; +typedef int32_t blipper_long_sample_t; +#else +#if SHRT_MAX == 0x7fff +typedef short blipper_sample_t; +#elif INT_MAX == 0x7fff +typedef int blipper_sample_t; +#else +#error "Cannot find suitable type for blipper_sampler_t." +#endif + +#if INT_MAX == 0x7fffffffl +typedef int blipper_long_sample_t; +#elif LONG_MAX == 0x7fffffffl +typedef long blipper_long_sample_t; +#else +#error "Cannot find suitable type for blipper_long_sample_t." +#endif +#endif +#else +typedef BLIPPER_REAL_T blipper_sample_t; +typedef BLIPPER_REAL_T blipper_long_sample_t; /* Meaningless for float version. */ +#endif + +/* Create a new blipper. + * taps: Number of filter taps per impulse. + * + * cutoff: Cutoff frequency in the passband. Has a range of [0, 1]. + * + * beta: Beta used for Kaiser window. + * + * decimation: Sets decimation rate. + * The input sampling rate is then output_rate * decimation. + * buffer_samples: The maximum number of processed output samples that can be + * buffered up by blipper. + * + * filter_bank: An optional filter which has already been created by + * blipper_create_filter_bank(). blipper_new() does not take ownership + * of the buffer and must be freed by caller. + * If non-NULL, cutoff and beta will be ignored. + * + * Some sane values: + * taps = 64, cutoff = 0.85, beta = 8.0 + */ +#define blipper_new BLIPPER_MANGLE(blipper_new) +blipper_t *blipper_new(unsigned taps, double cutoff, double beta, + unsigned decimation, unsigned buffer_samples, const blipper_sample_t *filter_bank); + +/* Reset the blipper to its initiate state. */ +#define blipper_reset BLIPPER_MANGLE(blipper_reset) +void blipper_reset(blipper_t *blip); + +/* Create a filter which can be passed to blipper_new() in filter_bank. + * Arguments to decimation and taps must match. */ +#define blipper_create_filter_bank BLIPPER_MANGLE(blipper_create_filter_bank) +blipper_sample_t *blipper_create_filter_bank(unsigned decimation, + unsigned taps, double cutoff, double beta); + +/* Frees the blipper. blip can be NULL (no-op). */ +#define blipper_free BLIPPER_MANGLE(blipper_free) +void blipper_free(blipper_t *blip); + +/* Add a ramp to the synthesized wave. The ramp is added to the integrator + * on every input sample. + * The amount added is delta / clocks per input sample. + * The interface is fractional to have better accuract with fixed point. + * This can be combined with a delta train to synthesize e.g. sawtooth waves. + * When using a ramp, care must be taken to ensure that the integrator does not saturate. + * It is recommended to use floating point implementation when using the ramp. */ +#define blipper_set_ramp BLIPPER_MANGLE(blipper_set_ramp) +void blipper_set_ramp(blipper_t *blip, blipper_long_sample_t delta, + unsigned clocks); + +/* Data pushing interfaces. One of these should be used exclusively. */ + +/* Push a single delta, which occurs clock_step input samples after the + * last time a delta was pushed. The delta value is the difference signal + * between the new sample and the previous. + * It is unnecessary to pass a delta of 0. + * If the deltas are known beforehand (e.g. when synthesizing a waveform), + * this is a more efficient interface than blipper_push_samples(). + * + * The caller must ensure not to push deltas in a way that can destabilize + * the final integration. + */ +#define blipper_push_delta BLIPPER_MANGLE(blipper_push_delta) +void blipper_push_delta(blipper_t *blip, blipper_long_sample_t delta, unsigned clocks_step); + +/* Push raw samples. blipper will find the deltas themself and push them. + * stride is the number of samples between each sample to be used. + * This can be used to push interleaved stereo data to two independent + * blippers. + */ +#define blipper_push_samples BLIPPER_MANGLE(blipper_push_samples) +void blipper_push_samples(blipper_t *blip, const blipper_sample_t *delta, + unsigned samples, unsigned stride, unsigned clocks_step); +#define blipper_push_long_samples BLIPPER_MANGLE(blipper_push_long_samples) +void blipper_push_long_samples(blipper_t *blip, const blipper_long_sample_t *delta, + unsigned samples, unsigned stride, unsigned clocks_step); + +/* Returns the number of samples available for reading using + * blipper_read(). + */ +#define blipper_read_avail BLIPPER_MANGLE(blipper_read_avail) +unsigned blipper_read_avail(blipper_t *blip); + +/* Returns the current filter phase + */ +#define blipper_read_phase BLIPPER_MANGLE(blipper_read_phase) +unsigned blipper_read_phase(blipper_t *blip); + +/* Reads processed samples. The caller must ensure to not read + * more than what is returned from blipper_read_avail(). + * As in blipper_push_samples(), stride is the number of samples + * between each output sample in output. + * Can be used to write to an interleaved stereo buffer. + */ +#define blipper_read BLIPPER_MANGLE(blipper_read) +void blipper_read(blipper_t *blip, blipper_sample_t *output, unsigned samples, + unsigned stride); +#define blipper_read_long BLIPPER_MANGLE(blipper_long_read) +void blipper_read_long(blipper_t *blip, blipper_long_sample_t *output, unsigned samples, + unsigned stride); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/pico/sound/resampler.c b/pico/sound/resampler.c new file mode 100644 index 00000000..5b68d0b4 --- /dev/null +++ b/pico/sound/resampler.c @@ -0,0 +1,261 @@ +/* Configurable fixed point resampling SINC filter for mono and stereo audio. + * + * (C) 2022 kub + * + * This work is licensed under the terms of any of these licenses + * (at your option): + * - GNU GPL, version 2 or later. + * - MAME license. + * See COPYING file in the top-level directory. + */ + + +/* SINC filter generation taken from the blipper library, its license is: + * + * Copyright (C) 2013 - Hans-Kristian Arntzen + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and + * associated documentation files (the "Software"), + * to deal in the Software without restriction, + * including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#include +#include +#include +#include + +#include "../pico_types.h" +#include "resampler.h" + +static double besseli0(double x) +{ + unsigned i; + double sum = 0.0; + + double factorial = 1.0; + double factorial_mult = 0.0; + double x_pow = 1.0; + double two_div_pow = 1.0; + double x_sqr = x * x; + + /* Approximate. This is an infinite sum. + * Luckily, it converges rather fast. */ + for (i = 0; i < 18; i++) + { + sum += x_pow * two_div_pow / (factorial * factorial); + + factorial_mult += 1.0; + x_pow *= x_sqr; + two_div_pow *= 0.25; + factorial *= factorial_mult; + } + + return sum; +} + +static double sinc(double v) +{ + if (fabs(v) < 0.00001) + return 1.0; + else + return sin(v) / v; +} + +/* index range = [-1, 1) */ +static double kaiser_window(double index, double beta) +{ + return besseli0(beta * sqrt(1.0 - index * index)); +} + +/* Creates a polyphase SINC filter (:phases banks with :taps each) + * Interleaves the filter for cache coherency and possibilities for SIMD */ +static s16 *create_sinc(unsigned phases, unsigned taps, double cutoff, double beta) +{ + unsigned i, filter_len; + double sidelobes, window_mod, window_phase, sinc_phase; + s16 *filter; + double tap; + + filter = (s16*)malloc(phases * taps * sizeof(*filter)); + if (!filter) + return NULL; + + sidelobes = taps / 2.0; + window_mod = 1.0 / kaiser_window(0.0, beta); + filter_len = phases * taps; + + for (i = 0; i < filter_len; i++) + { + window_phase = (double)i / filter_len; /* [0, 1) */ + window_phase = 2.0 * window_phase - 1.0; /* [-1, 1) */ + sinc_phase = window_phase * sidelobes; /* [-taps / 2, taps / 2) */ + + tap = (cutoff * sinc(M_PI * sinc_phase * cutoff) * + kaiser_window(window_phase, beta) * window_mod); + /* assign taking filter bank interleaving into account: + * :phases banks of length :taps */ + filter[(i%phases)*taps + (i/phases)] = tap * 0x7fff + 0.5; + } + + return filter; +} + +/* Public interface */ + +/* Release a resampler */ +void resampler_free(resampler_t *rs) +{ + if (rs) + { + free(rs->buffer); + free(rs->filter); + free(rs); + } +} + +/* Create a resampler with upsampling factor :interpolation and downsampling + * factor :decimation, Kaiser windowed SINC polyphase FIR with bank size :taps. + * The created filter has a size of :taps*:interpolation for upsampling and + * :taps*:decimation for downsampling. :taps is limiting the cost per sample and + * should be big enough to avoid inaccuracy (>= 8, higher is more accurate). + * :cutoff is in [0..1] with 1 representing the Nyquist rate after decimation. + * :beta is the Kaiser window beta. + * :max_input is the maximum length in a resampler_update call */ +resampler_t *resampler_new(unsigned taps, unsigned interpolation, unsigned decimation, + double cutoff, double beta, unsigned max_input, int stereo) +{ + resampler_t *rs = NULL; + + if (taps == 0 || interpolation == 0 || decimation == 0 || max_input == 0) + return NULL; /* invalid parameters */ + + rs = (resampler_t*)calloc(1, sizeof(*rs)); + if (!rs) + return NULL; /* out of memory */ + + /* :cutoff is relative to the decimated frequency, but filtering is taking + * place at the interpolated frequency. It needs to be adapted if resampled + * rate is lower. Also needs more taps to keep the transistion band width */ + if (decimation > interpolation) { + cutoff = cutoff * interpolation/decimation; + taps = taps * decimation/interpolation; + } + + rs->interpolation = interpolation; + rs->decimation = decimation; + rs->taps = taps; + /* optimizers for resampler_update: */ + rs->interp_inv = 0x100000000ULL / interpolation; + rs->ratio_int = decimation / interpolation; + + rs->filter = create_sinc(interpolation, taps, cutoff, beta); + if (!rs->filter) + goto error; + + rs->stereo = !!stereo; + rs->buffer_sz = (max_input * decimation/interpolation) + decimation + 1; + rs->buffer = calloc(1, rs->buffer_sz * (stereo ? 2:1) * sizeof(*rs->buffer)); + if (!rs->buffer) + goto error; + + return rs; + +error: + if (rs->filter) + free(rs->filter); + if (rs->buffer) + free(rs->buffer); + free(rs); + return NULL; +} + +/* Obtain :length resampled audio frames in :buffer. Use :get_samples to obtain + * the needed amount of input samples */ +void resampler_update(resampler_t *rs, s32 *buffer, int length, + void (*get_samples)(s32 *buffer, int length, int stereo)) +{ + s16 *u; + s32 *p, *q = buffer; + int spf = (rs->stereo?2:1); + s32 inlen; + s32 l, r; + int n, i; + + if (length <= 0) return; + + /* compute samples needed on input side: + * inlen = (length*decimation + interpolation-phase) / interpolation */ + n = length*rs->decimation + rs->interpolation-rs->phase; + inlen = ((u64)n * rs->interp_inv) >> 32; /* input samples, n/interpolation */ + if (inlen * rs->interpolation < n - rs->interpolation) inlen++; /* rounding */ + + /* reset buffer to start if the input doesn't fit into the buffer */ + if (rs->buffer_idx + inlen+rs->taps >= rs->buffer_sz) { + memcpy(rs->buffer, rs->buffer + rs->buffer_idx*spf, rs->taps*spf*sizeof(*rs->buffer)); + rs->buffer_idx = 0; + } + p = rs->buffer + rs->buffer_idx*spf; + + /* generate input samples */ + if (inlen > 0) + get_samples(p + rs->taps*spf, inlen, rs->stereo); + + if (rs->stereo) { + while (--length >= 0) { + /* compute filter output */ + u = rs->filter + (rs->phase * rs->taps); + for (i = 0, l = r = 0; i < rs->taps-1; i += 2) + { n = *u++; l += n * p[2*i ]; r += n * p[2*i+1]; + n = *u++; l += n * p[2*i+2]; r += n * p[2*i+3]; } + if (i < rs->taps) + { n = *u++; l += n * p[2*i ]; r += n * p[2*i+1]; } + *q++ = l >> 16, *q++ = r >> 16; + /* advance position to next sample */ + rs->phase -= rs->decimation; +// if (rs->ratio_int) { + rs->phase += rs->ratio_int*rs->interpolation, + p += 2*rs->ratio_int, rs->buffer_idx += rs->ratio_int; +// } + if (rs->phase < 0) + { rs->phase += rs->interpolation, p += 2, rs->buffer_idx ++; } + } + } else { + while (--length >= 0) { + /* compute filter output */ + u = rs->filter + (rs->phase * rs->taps); + for (i = 0, l = r = 0; i < rs->taps-1; i += 2) + { n = *u++; l += n * p[ i ]; + n = *u++; l += n * p[ i+1]; } + if (i < rs->taps) + { n = *u++; l += n * p[ i ]; } + *q++ = l >> 16; + /* advance position to next sample */ + rs->phase -= rs->decimation; +// if (rs->ratio_int) { + rs->phase += rs->ratio_int*rs->interpolation, + p += rs->ratio_int, rs->buffer_idx += rs->ratio_int; +// } + if (rs->phase < 0) + { rs->phase += rs->interpolation, p += 1, rs->buffer_idx ++; } + } + } +} diff --git a/pico/sound/resampler.h b/pico/sound/resampler.h new file mode 100644 index 00000000..eef60c03 --- /dev/null +++ b/pico/sound/resampler.h @@ -0,0 +1,44 @@ +/* Configurable fixed point resampling SINC filter for mono and stereo audio. + * + * (C) 2022 kub + * + * This work is licensed under the terms of any of these licenses + * (at your option): + * - GNU GPL, version 2 or later. + * - MAME license. + * See COPYING file in the top-level directory. + */ + +struct resampler { + int stereo; // mono or stereo? + int taps; // taps to compute per output sample + int interpolation; // upsampling factor (numerator) + int decimation; // downsampling factor (denominator) + int ratio_int; // floor(decimation/interpolation) + u32 interp_inv; // Q16, 1.0/interpolation + s16 *filter; // filter taps + s32 *buffer; // filter history and input buffer (w/o zero stuffing) + int buffer_sz; // buffer size in frames + int buffer_idx; // buffer offset + int phase; // filter phase for last output sample +}; +typedef struct resampler resampler_t; + + +/* Release a resampler */ +void resampler_free(resampler_t *r); +/* Create a resampler with upsampling factor :interpolation and downsampling + * factor :decimation, Kaiser windowed SINC polyphase FIR with bank size :taps. + * The created filter has a size of :taps*:interpolation for upsampling and + * :taps*:decimation for downsampling. :taps is limiting the cost per sample and + * should be big enough to avoid inaccuracy (>= 8, higher is more accurate). + * :cutoff is in [0..1] with 1 representing the Nyquist rate after decimation. + * :beta is the Kaiser window beta. + * :max_input is the maximum length in a resampler_update call */ +resampler_t *resampler_new(unsigned taps, unsigned interpolation, unsigned decimation, + double cutoff, double beta, unsigned max_input, int stereo); +/* Obtain :length resampled audio frames in :buffer. Use :get_samples to obtain + * the needed amount of input samples */ +void resampler_update(resampler_t *r, s32 *buffer, int length, + void (*generate_samples)(s32 *buffer, int length, int stereo)); + diff --git a/pico/sound/sound.c b/pico/sound/sound.c index 591a0299..7b7d8de4 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -14,6 +14,12 @@ #include "mix.h" #include "emu2413/emu2413.h" +#ifdef USE_BLIPPER +#include "blipper.h" +#else +#include "resampler.h" +#endif + void (*PsndMix_32_to_16l)(s16 *dest, s32 *src, int count) = mix_32_to_16l_stereo; // master int buffer to mix to @@ -32,6 +38,11 @@ OPLL old_opll; static OPLL *opll = NULL; unsigned YM2413_reg; +#ifdef USE_BLIPPER +static blipper_t *fmlblip, *fmrblip; +#else +static resampler_t *fmresampler; +#endif PICO_INTERNAL void PsndInit(void) { @@ -44,6 +55,13 @@ PICO_INTERNAL void PsndExit(void) { OPLL_delete(opll); opll = NULL; + +#ifdef USE_BLIPPER + blipper_free(fmlblip); fmlblip = NULL; + blipper_free(fmrblip); fmrblip = NULL; +#else + resampler_free(fmresampler); fmresampler = NULL; +#endif } PICO_INTERNAL void PsndReset(void) @@ -53,6 +71,111 @@ PICO_INTERNAL void PsndReset(void) timers_reset(); } +int (*PsndFMUpdate)(s32 *buffer, int length, int stereo, int is_buf_empty); + +// FM polyphase FIR resampling + +#ifdef USE_BLIPPER +#define FMFIR_TAPS 11 + +// resample FM from its native 53267Hz/52781Hz with the blipper library +static u32 ymmulinv; + +int YM2612UpdateFIR(s32 *buffer, int length, int stereo, int is_buf_empty) +{ + int mul = Pico.snd.fm_fir_mul, div = Pico.snd.fm_fir_div; + s32 *p = buffer, *q = buffer; + int ymlen; + int ret = 0; + + if (length <= 0) return ret; + + // FM samples needed: (length*div + div-blipper_read_phase(fmlblip)) / mul + ymlen = ((length*div + div-blipper_read_phase(fmlblip)) * ymmulinv) >> 32; + if (ymlen > 0) + ret = YM2612UpdateOne(p, ymlen, stereo, is_buf_empty); + + if (stereo) { + blipper_push_long_samples(fmlblip, p , ymlen, 2, mul); + blipper_push_long_samples(fmrblip, p+1, ymlen, 2, mul); + blipper_read_long(fmlblip, q , blipper_read_avail(fmlblip), 2); + blipper_read_long(fmrblip, q+1, blipper_read_avail(fmrblip), 2); + } else { + blipper_push_long_samples(fmlblip, p , ymlen, 1, mul); + blipper_read_long(fmlblip, q , blipper_read_avail(fmlblip), 1); + } + + return ret; +} + +static void YM2612_setup_FIR(int inrate, int outrate, int stereo) +{ + int mindiff = 999; + int diff, mul, div; + int maxdecim = 1500/FMFIR_TAPS; + + // compute filter ratio with smallest error for a decent number of taps + for (div = maxdecim/2; div <= maxdecim; div++) { + mul = (outrate*div + inrate/2) / inrate; + diff = outrate*div/mul - inrate; + if (abs(diff) < abs(mindiff)) { + mindiff = diff; + Pico.snd.fm_fir_mul = mul; + Pico.snd.fm_fir_div = div; + } + } + ymmulinv = 0x100000000ULL / mul; /* 1/mul in Q32 */ + printf("FM polyphase FIR ratio=%d/%d error=%.3f%%\n", + Pico.snd.fm_fir_mul, Pico.snd.fm_fir_div, 100.0*mindiff/inrate); + + // create blipper (modified for polyphase resampling). Not really perfect for + // FM, but has SINC generator, a good window, and computes the filter in Q16. + blipper_free(fmlblip); + blipper_free(fmrblip); + fmlblip = blipper_new(FMFIR_TAPS, 0.85, 8.5, Pico.snd.fm_fir_div, 1000, NULL); + if (!stereo) return; + fmrblip = blipper_new(FMFIR_TAPS, 0.85, 8.5, Pico.snd.fm_fir_div, 1000, NULL); +} +#else +#define FMFIR_TAPS 8 + +// resample FM from its native 53267Hz/52781Hz with polyphase FIR filter +static int ymchans; +static void YM2612Update(s32 *buffer, int length, int stereo) +{ + ymchans = YM2612UpdateOne(buffer, length, stereo, 1); +} + +int YM2612UpdateFIR(s32 *buffer, int length, int stereo, int is_buf_empty) +{ + resampler_update(fmresampler, buffer, length, YM2612Update); + return ymchans; +} + +static void YM2612_setup_FIR(int inrate, int outrate, int stereo) +{ + int mindiff = 999; + int diff, mul, div; + int maxmult = 30; // max interpolation factor + + // compute filter ratio with largest multiplier for smallest error + for (mul = maxmult/2; mul <= maxmult; mul++) { + div = (inrate*mul + outrate/2) / outrate; + diff = outrate*div/mul - inrate; + if (abs(diff) <= abs(mindiff)) { + mindiff = diff; + Pico.snd.fm_fir_mul = mul; + Pico.snd.fm_fir_div = div; + } + } + printf("FM polyphase FIR ratio=%d/%d error=%.3f%%\n", + Pico.snd.fm_fir_mul, Pico.snd.fm_fir_div, 100.0*mindiff/inrate); + + resampler_free(fmresampler); + fmresampler = resampler_new(FMFIR_TAPS, Pico.snd.fm_fir_mul, Pico.snd.fm_fir_div, + 0.85, 2.35, 2*inrate/50, stereo); +} +#endif // to be called after changing sound rate or chips void PsndRerate(int preserve_state) @@ -60,6 +183,7 @@ void PsndRerate(int preserve_state) void *state = NULL; int target_fps = Pico.m.pal ? 50 : 60; int target_lines = Pico.m.pal ? 313 : 262; + int ym2612_clock = Pico.m.pal ? OSC_PAL/7 : OSC_NTSC/7; if (preserve_state) { state = malloc(0x204); @@ -67,9 +191,19 @@ void PsndRerate(int preserve_state) ym2612_pack_state(); memcpy(state, YM2612GetRegs(), 0x204); } - YM2612Init(Pico.m.pal ? OSC_PAL/7 : OSC_NTSC/7, PicoIn.sndRate, + if (PicoIn.opt & POPT_EN_FM_FILTER) { + int ym2612_rate = (ym2612_clock+(6*24)/2) / (6*24); + YM2612Init(ym2612_clock, ym2612_rate, ((PicoIn.opt&POPT_DIS_FM_SSGEG) ? 0 : ST_SSG) | ((PicoIn.opt&POPT_EN_FM_DAC) ? ST_DAC : 0)); + YM2612_setup_FIR(ym2612_rate, PicoIn.sndRate, PicoIn.opt & POPT_EN_STEREO); + PsndFMUpdate = YM2612UpdateFIR; + } else { + YM2612Init(ym2612_clock, PicoIn.sndRate, + ((PicoIn.opt&POPT_DIS_FM_SSGEG) ? 0 : ST_SSG) | + ((PicoIn.opt&POPT_EN_FM_DAC) ? ST_DAC : 0)); + PsndFMUpdate = YM2612UpdateOne; + } if (preserve_state) { // feed it back it's own registers, just like after loading state memcpy(YM2612GetRegs(), state, 0x204); @@ -267,7 +401,7 @@ PICO_INTERNAL void PsndDoFM(int cyc_to) pos <<= 1; } if (PicoIn.opt & POPT_EN_FM) - YM2612UpdateOne(PsndBuffer + pos, len, stereo, 1); + PsndFMUpdate(PsndBuffer + pos, len, stereo, 1); } // cdda @@ -383,7 +517,7 @@ static int PsndRender(int offset, int length) s32 *fmbuf = buf32 + ((fmlen-offset) << stereo); Pico.snd.fm_pos += (length-fmlen) << 20; if (PicoIn.opt & POPT_EN_FM) - YM2612UpdateOne(fmbuf, length-fmlen, stereo, 1); + PsndFMUpdate(fmbuf, length-fmlen, stereo, 1); } // CD: PCM sound diff --git a/platform/common/common.mak b/platform/common/common.mak index 183a7159..9a0ad6b3 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -123,6 +123,7 @@ SRCS_COMMON += $(R)pico/carthw/svp/compiler.c endif # sound SRCS_COMMON += $(R)pico/sound/sound.c +SRCS_COMMON += $(R)pico/sound/resampler.c # $(R)pico/sound/blipper.c SRCS_COMMON += $(R)pico/sound/sn76496.c $(R)pico/sound/ym2612.c SRCS_COMMON += $(R)pico/sound/emu2413/emu2413.c ifneq "$(ARCH)$(asm_mix)" "arm1"