Commit d594b07e authored by David Reid's avatar David Reid

API CHANGE: Remove ma_src.

Resampling is now done through the ma_resampler API.

Note that with this commit the old sinc resampler has been removed
because it never worked properly and is going to replaced with a better
solution in the future. If you were using ma_src_algorithm_sinc you
should consider using ma_src_algorithm_linear with the linear.lpfCount
config variable set to MA_MAX_RESAMPLER_LPF_FILTERS.
parent d92681c4
......@@ -1480,77 +1480,6 @@ struct ma_channel_router
};
typedef struct ma_src ma_src;
typedef ma_uint32 (* ma_src_read_deinterleaved_proc)(ma_src* pSRC, ma_uint32 frameCount, void** ppSamplesOut, void* pUserData); /* Returns the number of frames that were read. */
typedef enum
{
ma_src_algorithm_linear = 0,
ma_src_algorithm_sinc,
ma_src_algorithm_none,
ma_src_algorithm_default = ma_src_algorithm_linear
} ma_src_algorithm;
typedef enum
{
ma_src_sinc_window_function_hann = 0,
ma_src_sinc_window_function_rectangular,
ma_src_sinc_window_function_default = ma_src_sinc_window_function_hann
} ma_src_sinc_window_function;
typedef struct
{
ma_src_sinc_window_function windowFunction;
ma_uint32 windowWidth;
} ma_src_config_sinc;
typedef struct
{
ma_uint32 sampleRateIn;
ma_uint32 sampleRateOut;
ma_uint32 channels;
ma_src_algorithm algorithm;
ma_bool32 neverConsumeEndOfInput : 1;
ma_bool32 noSSE2 : 1;
ma_bool32 noAVX2 : 1;
ma_bool32 noAVX512 : 1;
ma_bool32 noNEON : 1;
ma_src_read_deinterleaved_proc onReadDeinterleaved;
void* pUserData;
ma_src_config_sinc sinc;
} ma_src_config;
struct ma_src
{
union
{
struct
{
MA_ALIGN(MA_SIMD_ALIGNMENT) float input[MA_MAX_CHANNELS][MA_SRC_INPUT_BUFFER_SIZE_IN_SAMPLES];
float timeIn;
ma_uint32 leftoverFrames;
} linear;
struct
{
MA_ALIGN(MA_SIMD_ALIGNMENT) float input[MA_MAX_CHANNELS][MA_SRC_SINC_MAX_WINDOW_WIDTH*2 + MA_SRC_INPUT_BUFFER_SIZE_IN_SAMPLES];
float timeIn;
ma_uint32 inputFrameCount; /* The number of frames sitting in the input buffer, not including the first half of the window. */
ma_uint32 windowPosInSamples; /* An offset of <input>. */
float table[MA_SRC_SINC_MAX_WINDOW_WIDTH*1 * MA_SRC_SINC_LOOKUP_TABLE_RESOLUTION]; /* Precomputed lookup table. The +1 is used to avoid the need for an overflow check. */
} sinc;
};
ma_src_config config;
ma_bool32 isEndOfInputLoaded : 1;
ma_bool32 useSSE2 : 1;
ma_bool32 useAVX2 : 1;
ma_bool32 useAVX512 : 1;
ma_bool32 useNEON : 1;
};
/************************************************************************************************************************************************************
*************************************************************************************************************************************************************
......@@ -1808,40 +1737,6 @@ Helper for initializing a channel router config.
ma_channel_router_config ma_channel_router_config_init(ma_uint32 channelsIn, const ma_channel channelMapIn[MA_MAX_CHANNELS], ma_uint32 channelsOut, const ma_channel channelMapOut[MA_MAX_CHANNELS], ma_channel_mix_mode mixingMode, ma_channel_router_read_deinterleaved_proc onRead, void* pUserData);
/************************************************************************************************************************************************************
Sample Rate Conversion
======================
************************************************************************************************************************************************************/
/*
Initializes a sample rate conversion object.
*/
ma_result ma_src_init(const ma_src_config* pConfig, ma_src* pSRC);
/*
Dynamically adjusts the sample rate.
This is useful for dynamically adjust pitch. Keep in mind, however, that this will speed up or slow down the sound. If this
is not acceptable you will need to use your own algorithm.
*/
ma_result ma_src_set_sample_rate(ma_src* pSRC, ma_uint32 sampleRateIn, ma_uint32 sampleRateOut);
/*
Reads a number of frames.
Returns the number of frames actually read.
*/
ma_uint64 ma_src_read_deinterleaved(ma_src* pSRC, ma_uint64 frameCount, void** ppSamplesOut, void* pUserData);
/*
Helper for creating a sample rate conversion config.
*/
ma_src_config ma_src_config_init_new(void);
ma_src_config ma_src_config_init(ma_uint32 sampleRateIn, ma_uint32 sampleRateOut, ma_uint32 channels, ma_src_read_deinterleaved_proc onReadDeinterleaved, void* pUserData);
/************************************************************************************************************************************************************
Conversion
......@@ -4430,11 +4325,15 @@ Standard Library Stuff
#define ma_copy_memory MA_COPY_MEMORY
#define ma_assert MA_ASSERT
#define ma_countof(x) (sizeof(x) / sizeof(x[0]))
#define ma_max(x, y) (((x) > (y)) ? (x) : (y))
#define ma_min(x, y) (((x) < (y)) ? (x) : (y))
#define ma_clamp(x, lo, hi) (ma_max(lo, ma_min(x, hi)))
#define ma_offset_ptr(p, offset) (((ma_uint8*)(p)) + (offset))
#define ma_countof(x) (sizeof(x) / sizeof(x[0]))
#define ma_max(x, y) (((x) > (y)) ? (x) : (y))
#define ma_min(x, y) (((x) < (y)) ? (x) : (y))
#define ma_clamp(x, lo, hi) (ma_max(lo, ma_min(x, hi)))
#define ma_offset_ptr(p, offset) (((ma_uint8*)(p)) + (offset))
#define ma_floorf(x) ((float)floor((double)(x)))
#define ma_sinf(x) ((float)sin((double)(x)))
#define ma_cosf(x) ((float)cos((double)(x)))
#define ma_buffer_frame_capacity(buffer, channels, format) (sizeof(buffer) / ma_get_bytes_per_sample(format) / (channels))
......@@ -34910,943 +34809,6 @@ ma_channel_router_config ma_channel_router_config_init(ma_uint32 channelsIn, con
}
/**************************************************************************************************************************************************************
SRC
**************************************************************************************************************************************************************/
#define ma_floorf(x) ((float)floor((double)(x)))
#define ma_sinf(x) ((float)sin((double)(x)))
#define ma_cosf(x) ((float)cos((double)(x)))
static MA_INLINE double ma_sinc(double x)
{
if (x != 0) {
return sin(MA_PI_D*x) / (MA_PI_D*x);
} else {
return 1;
}
}
#define ma_sincf(x) ((float)ma_sinc((double)(x)))
ma_uint64 ma_src_read_deinterleaved__passthrough(ma_src* pSRC, ma_uint64 frameCount, void** ppSamplesOut, void* pUserData);
ma_uint64 ma_src_read_deinterleaved__linear(ma_src* pSRC, ma_uint64 frameCount, void** ppSamplesOut, void* pUserData);
ma_uint64 ma_src_read_deinterleaved__sinc(ma_src* pSRC, ma_uint64 frameCount, void** ppSamplesOut, void* pUserData);
void ma_src__build_sinc_table__sinc(ma_src* pSRC)
{
ma_uint32 i;
ma_assert(pSRC != NULL);
pSRC->sinc.table[0] = 1.0f;
for (i = 1; i < ma_countof(pSRC->sinc.table); i += 1) {
double x = i*MA_PI_D / MA_SRC_SINC_LOOKUP_TABLE_RESOLUTION;
pSRC->sinc.table[i] = (float)(sin(x)/x);
}
}
void ma_src__build_sinc_table__rectangular(ma_src* pSRC)
{
/* This is the same as the base sinc table. */
ma_src__build_sinc_table__sinc(pSRC);
}
void ma_src__build_sinc_table__hann(ma_src* pSRC)
{
ma_uint32 i;
ma_src__build_sinc_table__sinc(pSRC);
for (i = 0; i < ma_countof(pSRC->sinc.table); i += 1) {
double x = pSRC->sinc.table[i];
double N = MA_SRC_SINC_MAX_WINDOW_WIDTH*2;
double n = ((double)(i) / MA_SRC_SINC_LOOKUP_TABLE_RESOLUTION) + MA_SRC_SINC_MAX_WINDOW_WIDTH;
double w = 0.5 * (1 - cos((2*MA_PI_D*n) / (N)));
pSRC->sinc.table[i] = (float)(x * w);
}
}
ma_result ma_src_init(const ma_src_config* pConfig, ma_src* pSRC)
{
if (pSRC == NULL) {
return MA_INVALID_ARGS;
}
ma_zero_object(pSRC);
if (pConfig == NULL || pConfig->onReadDeinterleaved == NULL) {
return MA_INVALID_ARGS;
}
if (pConfig->channels == 0 || pConfig->channels > MA_MAX_CHANNELS) {
return MA_INVALID_ARGS;
}
pSRC->config = *pConfig;
/* SIMD */
pSRC->useSSE2 = ma_has_sse2() && !pConfig->noSSE2;
pSRC->useAVX2 = ma_has_avx2() && !pConfig->noAVX2;
pSRC->useAVX512 = ma_has_avx512f() && !pConfig->noAVX512;
pSRC->useNEON = ma_has_neon() && !pConfig->noNEON;
if (pSRC->config.algorithm == ma_src_algorithm_sinc) {
/* Make sure the window width within bounds. */
if (pSRC->config.sinc.windowWidth == 0) {
pSRC->config.sinc.windowWidth = MA_SRC_SINC_DEFAULT_WINDOW_WIDTH;
}
if (pSRC->config.sinc.windowWidth < MA_SRC_SINC_MIN_WINDOW_WIDTH) {
pSRC->config.sinc.windowWidth = MA_SRC_SINC_MIN_WINDOW_WIDTH;
}
if (pSRC->config.sinc.windowWidth > MA_SRC_SINC_MAX_WINDOW_WIDTH) {
pSRC->config.sinc.windowWidth = MA_SRC_SINC_MAX_WINDOW_WIDTH;
}
/* Set up the lookup table. */
switch (pSRC->config.sinc.windowFunction) {
case ma_src_sinc_window_function_hann: ma_src__build_sinc_table__hann(pSRC); break;
case ma_src_sinc_window_function_rectangular: ma_src__build_sinc_table__rectangular(pSRC); break;
default: return MA_INVALID_ARGS; /* <-- Hitting this means the window function is unknown to miniaudio. */
}
}
return MA_SUCCESS;
}
ma_result ma_src_set_sample_rate(ma_src* pSRC, ma_uint32 sampleRateIn, ma_uint32 sampleRateOut)
{
if (pSRC == NULL) {
return MA_INVALID_ARGS;
}
/* Must have a sample rate of > 0. */
if (sampleRateIn == 0 || sampleRateOut == 0) {
return MA_INVALID_ARGS;
}
ma_atomic_exchange_32(&pSRC->config.sampleRateIn, sampleRateIn);
ma_atomic_exchange_32(&pSRC->config.sampleRateOut, sampleRateOut);
return MA_SUCCESS;
}
ma_uint64 ma_src_read_deinterleaved(ma_src* pSRC, ma_uint64 frameCount, void** ppSamplesOut, void* pUserData)
{
ma_src_algorithm algorithm;
if (pSRC == NULL || frameCount == 0 || ppSamplesOut == NULL) {
return 0;
}
algorithm = pSRC->config.algorithm;
/* Can use a function pointer for this. */
switch (algorithm) {
case ma_src_algorithm_none: return ma_src_read_deinterleaved__passthrough(pSRC, frameCount, ppSamplesOut, pUserData);
case ma_src_algorithm_linear: return ma_src_read_deinterleaved__linear( pSRC, frameCount, ppSamplesOut, pUserData);
case ma_src_algorithm_sinc: return ma_src_read_deinterleaved__sinc( pSRC, frameCount, ppSamplesOut, pUserData);
default: break;
}
/* Should never get here. */
return 0;
}
ma_uint64 ma_src_read_deinterleaved__passthrough(ma_src* pSRC, ma_uint64 frameCount, void** ppSamplesOut, void* pUserData)
{
if (frameCount <= 0xFFFFFFFF) {
return pSRC->config.onReadDeinterleaved(pSRC, (ma_uint32)frameCount, ppSamplesOut, pUserData);
} else {
ma_uint32 iChannel;
ma_uint64 totalFramesRead;
float* ppNextSamplesOut[MA_MAX_CHANNELS];
for (iChannel = 0; iChannel < pSRC->config.channels; ++iChannel) {
ppNextSamplesOut[iChannel] = (float*)ppSamplesOut[iChannel];
}
totalFramesRead = 0;
while (totalFramesRead < frameCount) {
ma_uint32 framesJustRead;
ma_uint64 framesRemaining = frameCount - totalFramesRead;
ma_uint64 framesToReadRightNow = framesRemaining;
if (framesToReadRightNow > 0xFFFFFFFF) {
framesToReadRightNow = 0xFFFFFFFF;
}
framesJustRead = (ma_uint32)pSRC->config.onReadDeinterleaved(pSRC, (ma_uint32)framesToReadRightNow, (void**)ppNextSamplesOut, pUserData);
if (framesJustRead == 0) {
break;
}
totalFramesRead += framesJustRead;
for (iChannel = 0; iChannel < pSRC->config.channels; ++iChannel) {
ppNextSamplesOut[iChannel] += framesJustRead;
}
if (framesJustRead < framesToReadRightNow) {
break;
}
}
return totalFramesRead;
}
}
ma_uint64 ma_src_read_deinterleaved__linear(ma_src* pSRC, ma_uint64 frameCount, void** ppSamplesOut, void* pUserData)
{
float* ppNextSamplesOut[MA_MAX_CHANNELS];
float factor;
ma_uint32 maxFrameCountPerChunkIn;
ma_uint64 totalFramesRead;
ma_assert(pSRC != NULL);
ma_assert(frameCount > 0);
ma_assert(ppSamplesOut != NULL);
ma_copy_memory(ppNextSamplesOut, ppSamplesOut, sizeof(void*) * pSRC->config.channels);
factor = (float)pSRC->config.sampleRateIn / pSRC->config.sampleRateOut;
maxFrameCountPerChunkIn = ma_countof(pSRC->linear.input[0]);
totalFramesRead = 0;
while (totalFramesRead < frameCount) {
ma_uint32 iChannel;
float tBeg;
float tEnd;
float tAvailable;
float tNext;
float* ppSamplesFromClient[MA_MAX_CHANNELS];
ma_uint32 iNextFrame;
ma_uint32 maxOutputFramesToRead;
ma_uint32 maxOutputFramesToRead4;
ma_uint32 framesToReadFromClient;
ma_uint32 framesReadFromClient;
ma_uint64 framesRemaining = frameCount - totalFramesRead;
ma_uint64 framesToRead = framesRemaining;
if (framesToRead > 16384) {
framesToRead = 16384; /* <-- Keep this small because we're using 32-bit floats for calculating sample positions and I don't want to run out of precision with huge sample counts. */
}
/* Read Input Data */
tBeg = pSRC->linear.timeIn;
tEnd = tBeg + ((ma_int64)framesToRead*factor); /* Cast to int64 required for VC6. */
framesToReadFromClient = (ma_uint32)(tEnd) + 1 + 1; /* +1 to make tEnd 1-based and +1 because we always need to an extra sample for interpolation. */
if (framesToReadFromClient >= maxFrameCountPerChunkIn) {
framesToReadFromClient = maxFrameCountPerChunkIn;
}
for (iChannel = 0; iChannel < pSRC->config.channels; ++iChannel) {
ppSamplesFromClient[iChannel] = pSRC->linear.input[iChannel] + pSRC->linear.leftoverFrames;
}
framesReadFromClient = 0;
if (framesToReadFromClient > pSRC->linear.leftoverFrames) {
framesReadFromClient = (ma_uint32)pSRC->config.onReadDeinterleaved(pSRC, (ma_uint32)framesToReadFromClient - pSRC->linear.leftoverFrames, (void**)ppSamplesFromClient, pUserData);
}
framesReadFromClient += pSRC->linear.leftoverFrames; /* <-- You can sort of think of it as though we've re-read the leftover samples from the client. */
if (framesReadFromClient < 2) {
break;
}
for (iChannel = 0; iChannel < pSRC->config.channels; ++iChannel) {
ppSamplesFromClient[iChannel] = pSRC->linear.input[iChannel];
}
/* Write Output Data */
/*
At this point we have a bunch of frames that the client has given to us for processing. From this we can determine the maximum number of output frames
that can be processed from this input. We want to output as many samples as possible from our input data.
*/
tAvailable = framesReadFromClient - tBeg - 1; /* Subtract 1 because the last input sample is needed for interpolation and cannot be included in the output sample count calculation. */
maxOutputFramesToRead = (ma_uint32)(tAvailable / factor);
if (maxOutputFramesToRead == 0) {
maxOutputFramesToRead = 1;
}
if (maxOutputFramesToRead > framesToRead) {
maxOutputFramesToRead = (ma_uint32)framesToRead;
}
/* Output frames are always read in groups of 4 because I'm planning on using this as a reference for some SIMD-y stuff later. */
maxOutputFramesToRead4 = maxOutputFramesToRead/4;
for (iChannel = 0; iChannel < pSRC->config.channels; ++iChannel) {
ma_uint32 iFrameOut;
float t0 = pSRC->linear.timeIn + factor*0;
float t1 = pSRC->linear.timeIn + factor*1;
float t2 = pSRC->linear.timeIn + factor*2;
float t3 = pSRC->linear.timeIn + factor*3;
float t;
for (iFrameOut = 0; iFrameOut < maxOutputFramesToRead4; iFrameOut += 1) {
float iPrevSample0 = (float)floor(t0);
float iPrevSample1 = (float)floor(t1);
float iPrevSample2 = (float)floor(t2);
float iPrevSample3 = (float)floor(t3);
float iNextSample0 = iPrevSample0 + 1;
float iNextSample1 = iPrevSample1 + 1;
float iNextSample2 = iPrevSample2 + 1;
float iNextSample3 = iPrevSample3 + 1;
float alpha0 = t0 - iPrevSample0;
float alpha1 = t1 - iPrevSample1;
float alpha2 = t2 - iPrevSample2;
float alpha3 = t3 - iPrevSample3;
float prevSample0 = ppSamplesFromClient[iChannel][(ma_uint32)iPrevSample0];
float prevSample1 = ppSamplesFromClient[iChannel][(ma_uint32)iPrevSample1];
float prevSample2 = ppSamplesFromClient[iChannel][(ma_uint32)iPrevSample2];
float prevSample3 = ppSamplesFromClient[iChannel][(ma_uint32)iPrevSample3];
float nextSample0 = ppSamplesFromClient[iChannel][(ma_uint32)iNextSample0];
float nextSample1 = ppSamplesFromClient[iChannel][(ma_uint32)iNextSample1];
float nextSample2 = ppSamplesFromClient[iChannel][(ma_uint32)iNextSample2];
float nextSample3 = ppSamplesFromClient[iChannel][(ma_uint32)iNextSample3];
ppNextSamplesOut[iChannel][iFrameOut*4 + 0] = ma_mix_f32_fast(prevSample0, nextSample0, alpha0);
ppNextSamplesOut[iChannel][iFrameOut*4 + 1] = ma_mix_f32_fast(prevSample1, nextSample1, alpha1);
ppNextSamplesOut[iChannel][iFrameOut*4 + 2] = ma_mix_f32_fast(prevSample2, nextSample2, alpha2);
ppNextSamplesOut[iChannel][iFrameOut*4 + 3] = ma_mix_f32_fast(prevSample3, nextSample3, alpha3);
t0 += factor*4;
t1 += factor*4;
t2 += factor*4;
t3 += factor*4;
}
t = pSRC->linear.timeIn + (factor*maxOutputFramesToRead4*4);
for (iFrameOut = (maxOutputFramesToRead4*4); iFrameOut < maxOutputFramesToRead; iFrameOut += 1) {
float iPrevSample = (float)floor(t);
float iNextSample = iPrevSample + 1;
float alpha = t - iPrevSample;
float prevSample;
float nextSample;
ma_assert(iPrevSample < ma_countof(pSRC->linear.input[iChannel]));
ma_assert(iNextSample < ma_countof(pSRC->linear.input[iChannel]));
prevSample = ppSamplesFromClient[iChannel][(ma_uint32)iPrevSample];
nextSample = ppSamplesFromClient[iChannel][(ma_uint32)iNextSample];
ppNextSamplesOut[iChannel][iFrameOut] = ma_mix_f32_fast(prevSample, nextSample, alpha);
t += factor;
}
ppNextSamplesOut[iChannel] += maxOutputFramesToRead;
}
totalFramesRead += maxOutputFramesToRead;
/* Residual */
tNext = pSRC->linear.timeIn + (maxOutputFramesToRead*factor);
pSRC->linear.timeIn = tNext;
ma_assert(tNext <= framesReadFromClient+1);
iNextFrame = (ma_uint32)floor(tNext);
pSRC->linear.leftoverFrames = framesReadFromClient - iNextFrame;
pSRC->linear.timeIn = tNext - iNextFrame;
for (iChannel = 0; iChannel < pSRC->config.channels; ++iChannel) {
ma_uint32 iFrame;
for (iFrame = 0; iFrame < pSRC->linear.leftoverFrames; ++iFrame) {
float sample = ppSamplesFromClient[iChannel][framesReadFromClient-pSRC->linear.leftoverFrames + iFrame];
ppSamplesFromClient[iChannel][iFrame] = sample;
}
}
/* Exit the loop if we've found everything from the client. */
if (framesReadFromClient < framesToReadFromClient) {
break;
}
}
return totalFramesRead;
}
ma_src_config ma_src_config_init_new()
{
ma_src_config config;
ma_zero_object(&config);
return config;
}
ma_src_config ma_src_config_init(ma_uint32 sampleRateIn, ma_uint32 sampleRateOut, ma_uint32 channels, ma_src_read_deinterleaved_proc onReadDeinterleaved, void* pUserData)
{
ma_src_config config = ma_src_config_init_new();
config.sampleRateIn = sampleRateIn;
config.sampleRateOut = sampleRateOut;
config.channels = channels;
config.onReadDeinterleaved = onReadDeinterleaved;
config.pUserData = pUserData;
return config;
}
/**************************************************************************************************************************************************************
Sinc Sample Rate Conversion
===========================
The sinc SRC algorithm uses a windowed sinc to perform interpolation of samples. Currently, miniaudio's implementation supports rectangular and Hann window
methods.
Whenever an output sample is being computed, it looks at a sub-section of the input samples. I've called this sub-section in the code below the "window",
which I realize is a bit ambigous with the mathematical "window", but it works for me when I need to conceptualize things in my head. The window is made up
of two halves. The first half contains past input samples (initialized to zero), and the second half contains future input samples. As time moves forward
and input samples are consumed, the window moves forward. The larger the window, the better the quality at the expense of slower processing. The window is
limited the range [MA_SRC_SINC_MIN_WINDOW_WIDTH, MA_SRC_SINC_MAX_WINDOW_WIDTH] and defaults to MA_SRC_SINC_DEFAULT_WINDOW_WIDTH.
Input samples are cached for efficiency (to prevent frequently requesting tiny numbers of samples from the client). When the window gets to the end of the
cache, it's moved back to the start, and more samples are read from the client. If the client has no more data to give, the cache is filled with zeros and
the last of the input samples will be consumed. Once the last of the input samples have been consumed, no more samples will be output.
When reading output samples, we always first read whatever is already in the input cache. Only when the cache has been fully consumed do we read more data
from the client.
To access samples in the input buffer you do so relative to the window. When the window itself is at position 0, the first item in the buffer is accessed
with "windowPos + windowWidth". Generally, to access any sample relative to the window you do "windowPos + windowWidth + sampleIndexRelativeToWindow".
**************************************************************************************************************************************************************/
/* Comment this to disable interpolation of table lookups. Less accurate, but faster. */
#define MA_USE_SINC_TABLE_INTERPOLATION
/* Retrieves a sample from the input buffer's window. Values >= 0 retrieve future samples. Negative values return past samples. */
static MA_INLINE float ma_src_sinc__get_input_sample_from_window(const ma_src* pSRC, ma_uint32 channel, ma_uint32 windowPosInSamples, ma_int32 sampleIndex)
{
ma_assert(pSRC != NULL);
ma_assert(channel < pSRC->config.channels);
ma_assert(sampleIndex >= -(ma_int32)pSRC->config.sinc.windowWidth);
ma_assert(sampleIndex < (ma_int32)pSRC->config.sinc.windowWidth);
/* The window should always be contained within the input cache. */
ma_assert(windowPosInSamples < ma_countof(pSRC->sinc.input[0]) - pSRC->config.sinc.windowWidth);
return pSRC->sinc.input[channel][windowPosInSamples + pSRC->config.sinc.windowWidth + sampleIndex];
}
static MA_INLINE float ma_src_sinc__interpolation_factor(const ma_src* pSRC, float x)
{
float xabs;
ma_int32 ixabs;
ma_assert(pSRC != NULL);
xabs = (float)fabs(x);
xabs = xabs * MA_SRC_SINC_LOOKUP_TABLE_RESOLUTION;
ixabs = (ma_int32)xabs;
#if defined(MA_USE_SINC_TABLE_INTERPOLATION)
{
float a = xabs - ixabs;
return ma_mix_f32_fast(pSRC->sinc.table[ixabs], pSRC->sinc.table[ixabs+1], a);
}
#else
return pSRC->sinc.table[ixabs];
#endif
}
#if defined(MA_SUPPORT_SSE2)
static MA_INLINE __m128 ma_fabsf_sse2(__m128 x)
{
return _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF)), x);
}
static MA_INLINE __m128 ma_truncf_sse2(__m128 x)
{
return _mm_cvtepi32_ps(_mm_cvttps_epi32(x));
}
static MA_INLINE __m128 ma_src_sinc__interpolation_factor__sse2(const ma_src* pSRC, __m128 x)
{
__m128 resolution128;
__m128 xabs;
__m128i ixabs;
__m128 lo;
__m128 hi;
__m128 a;
__m128 r;
int* ixabsv;
resolution128 = _mm_set1_ps(MA_SRC_SINC_LOOKUP_TABLE_RESOLUTION);
xabs = ma_fabsf_sse2(x);
xabs = _mm_mul_ps(xabs, resolution128);
ixabs = _mm_cvttps_epi32(xabs);
ixabsv = (int*)&ixabs;
lo = _mm_set_ps(
pSRC->sinc.table[ixabsv[3]],
pSRC->sinc.table[ixabsv[2]],
pSRC->sinc.table[ixabsv[1]],
pSRC->sinc.table[ixabsv[0]]
);
hi = _mm_set_ps(
pSRC->sinc.table[ixabsv[3]+1],
pSRC->sinc.table[ixabsv[2]+1],
pSRC->sinc.table[ixabsv[1]+1],
pSRC->sinc.table[ixabsv[0]+1]
);
a = _mm_sub_ps(xabs, _mm_cvtepi32_ps(ixabs));
r = ma_mix_f32_fast__sse2(lo, hi, a);
return r;
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE __m256 ma_fabsf_avx2(__m256 x)
{
return _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x7FFFFFFF)), x);
}
#if 0
static MA_INLINE __m256 ma_src_sinc__interpolation_factor__avx2(const ma_src* pSRC, __m256 x)
{
__m256 resolution256 = _mm256_set1_ps(MA_SRC_SINC_LOOKUP_TABLE_RESOLUTION);
__m256 xabs = ma_fabsf_avx2(x);
xabs = _mm256_mul_ps(xabs, resolution256);
__m256i ixabs = _mm256_cvttps_epi32(xabs);
__m256 a = _mm256_sub_ps(xabs, _mm256_cvtepi32_ps(ixabs));
int* ixabsv = (int*)&ixabs;
__m256 lo = _mm256_set_ps(
pSRC->sinc.table[ixabsv[7]],
pSRC->sinc.table[ixabsv[6]],
pSRC->sinc.table[ixabsv[5]],
pSRC->sinc.table[ixabsv[4]],
pSRC->sinc.table[ixabsv[3]],
pSRC->sinc.table[ixabsv[2]],
pSRC->sinc.table[ixabsv[1]],
pSRC->sinc.table[ixabsv[0]]
);
__m256 hi = _mm256_set_ps(
pSRC->sinc.table[ixabsv[7]+1],
pSRC->sinc.table[ixabsv[6]+1],
pSRC->sinc.table[ixabsv[5]+1],
pSRC->sinc.table[ixabsv[4]+1],
pSRC->sinc.table[ixabsv[3]+1],
pSRC->sinc.table[ixabsv[2]+1],
pSRC->sinc.table[ixabsv[1]+1],
pSRC->sinc.table[ixabsv[0]+1]
);
__m256 r = ma_mix_f32_fast__avx2(lo, hi, a);
return r;
}
#endif
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE float32x4_t ma_fabsf_neon(float32x4_t x)
{
return vabdq_f32(vmovq_n_f32(0), x);
}
static MA_INLINE float32x4_t ma_src_sinc__interpolation_factor__neon(const ma_src* pSRC, float32x4_t x)
{
float32x4_t xabs;
int32x4_t ixabs;
float32x4_t a;
float32x4_t r;
int* ixabsv;
float lo[4];
float hi[4];
xabs = ma_fabsf_neon(x);
xabs = vmulq_n_f32(xabs, MA_SRC_SINC_LOOKUP_TABLE_RESOLUTION);
ixabs = vcvtq_s32_f32(xabs);
ixabsv = (int*)&ixabs;
lo[0] = pSRC->sinc.table[ixabsv[0]];
lo[1] = pSRC->sinc.table[ixabsv[1]];
lo[2] = pSRC->sinc.table[ixabsv[2]];
lo[3] = pSRC->sinc.table[ixabsv[3]];
hi[0] = pSRC->sinc.table[ixabsv[0]+1];
hi[1] = pSRC->sinc.table[ixabsv[1]+1];
hi[2] = pSRC->sinc.table[ixabsv[2]+1];
hi[3] = pSRC->sinc.table[ixabsv[3]+1];
a = vsubq_f32(xabs, vcvtq_f32_s32(ixabs));
r = ma_mix_f32_fast__neon(vld1q_f32(lo), vld1q_f32(hi), a);
return r;
}
#endif
ma_uint64 ma_src_read_deinterleaved__sinc(ma_src* pSRC, ma_uint64 frameCount, void** ppSamplesOut, void* pUserData)
{
float factor;
float inverseFactor;
ma_int32 windowWidth;
ma_int32 windowWidth2;
ma_int32 windowWidthSIMD;
ma_int32 windowWidthSIMD2;
float* ppNextSamplesOut[MA_MAX_CHANNELS];
float _windowSamplesUnaligned[MA_SRC_SINC_MAX_WINDOW_WIDTH*2 + MA_SIMD_ALIGNMENT];
float* windowSamples;
float _iWindowFUnaligned[MA_SRC_SINC_MAX_WINDOW_WIDTH*2 + MA_SIMD_ALIGNMENT];
float* iWindowF;
ma_int32 i;
ma_uint64 totalOutputFramesRead;
ma_assert(pSRC != NULL);
ma_assert(frameCount > 0);
ma_assert(ppSamplesOut != NULL);
factor = (float)pSRC->config.sampleRateIn / pSRC->config.sampleRateOut;
inverseFactor = 1/factor;
windowWidth = (ma_int32)pSRC->config.sinc.windowWidth;
windowWidth2 = windowWidth*2;
/*
There are cases where it's actually more efficient to increase the window width so that it's aligned with the respective
SIMD pipeline being used.
*/
windowWidthSIMD = windowWidth;
if (pSRC->useNEON) {
windowWidthSIMD = (windowWidthSIMD + 1) & ~(1);
} else if (pSRC->useAVX512) {
windowWidthSIMD = (windowWidthSIMD + 7) & ~(7);
} else if (pSRC->useAVX2) {
windowWidthSIMD = (windowWidthSIMD + 3) & ~(3);
} else if (pSRC->useSSE2) {
windowWidthSIMD = (windowWidthSIMD + 1) & ~(1);
}
windowWidthSIMD2 = windowWidthSIMD*2;
(void)windowWidthSIMD2; /* <-- Silence a warning when SIMD is disabled. */
ma_copy_memory(ppNextSamplesOut, ppSamplesOut, sizeof(void*) * pSRC->config.channels);
windowSamples = (float*)(((ma_uintptr)_windowSamplesUnaligned + MA_SIMD_ALIGNMENT-1) & ~(MA_SIMD_ALIGNMENT-1));
ma_zero_memory(windowSamples, MA_SRC_SINC_MAX_WINDOW_WIDTH*2 * sizeof(float));
iWindowF = (float*)(((ma_uintptr)_iWindowFUnaligned + MA_SIMD_ALIGNMENT-1) & ~(MA_SIMD_ALIGNMENT-1));
ma_zero_memory(iWindowF, MA_SRC_SINC_MAX_WINDOW_WIDTH*2 * sizeof(float));
for (i = 0; i < windowWidth2; ++i) {
iWindowF[i] = (float)(i - windowWidth);
}
totalOutputFramesRead = 0;
while (totalOutputFramesRead < frameCount) {
ma_uint32 maxInputSamplesAvailableInCache;
float timeInBeg;
float timeInEnd;
ma_uint64 maxOutputFramesToRead;
ma_uint64 outputFramesRemaining;
ma_uint64 outputFramesToRead;
ma_uint32 iChannel;
ma_uint32 prevWindowPosInSamples;
ma_uint32 availableOutputFrames;
/*
The maximum number of frames we can read this iteration depends on how many input samples we have available to us. This is the number
of input samples between the end of the window and the end of the cache.
*/
maxInputSamplesAvailableInCache = ma_countof(pSRC->sinc.input[0]) - (pSRC->config.sinc.windowWidth*2) - pSRC->sinc.windowPosInSamples;
if (maxInputSamplesAvailableInCache > pSRC->sinc.inputFrameCount) {
maxInputSamplesAvailableInCache = pSRC->sinc.inputFrameCount;
}
/* Never consume the tail end of the input data if requested. */
if (pSRC->config.neverConsumeEndOfInput) {
if (maxInputSamplesAvailableInCache >= pSRC->config.sinc.windowWidth) {
maxInputSamplesAvailableInCache -= pSRC->config.sinc.windowWidth;
} else {
maxInputSamplesAvailableInCache = 0;
}
}
timeInBeg = pSRC->sinc.timeIn;
timeInEnd = (float)(pSRC->sinc.windowPosInSamples + maxInputSamplesAvailableInCache);
ma_assert(timeInBeg >= 0);
ma_assert(timeInBeg <= timeInEnd);
maxOutputFramesToRead = (ma_uint64)(((timeInEnd - timeInBeg) * inverseFactor));
outputFramesRemaining = frameCount - totalOutputFramesRead;
outputFramesToRead = outputFramesRemaining;
if (outputFramesToRead > maxOutputFramesToRead) {
outputFramesToRead = maxOutputFramesToRead;
}
for (iChannel = 0; iChannel < pSRC->config.channels; iChannel += 1) {
/* Do SRC. */
float timeIn = timeInBeg;
ma_uint32 iSample;
for (iSample = 0; iSample < outputFramesToRead; iSample += 1) {
float sampleOut = 0;
float iTimeInF = ma_floorf(timeIn);
ma_uint32 iTimeIn = (ma_uint32)iTimeInF;
ma_int32 iWindow = 0;
float tScalar;
/* Pre-load the window samples into an aligned buffer to begin with. Need to put these into an aligned buffer to make SIMD easier. */
windowSamples[0] = 0; /* <-- The first sample is always zero. */
for (i = 1; i < windowWidth2; ++i) {
windowSamples[i] = pSRC->sinc.input[iChannel][iTimeIn + i];
}
#if defined(MA_SUPPORT_AVX2) || defined(MA_SUPPORT_AVX512)
if (pSRC->useAVX2 || pSRC->useAVX512) {
__m256i ixabs[MA_SRC_SINC_MAX_WINDOW_WIDTH*2/8];
__m256 a[MA_SRC_SINC_MAX_WINDOW_WIDTH*2/8];
__m256 resolution256;
__m256 t;
__m256 r;
ma_int32 windowWidth8;
ma_int32 iWindow8;
resolution256 = _mm256_set1_ps(MA_SRC_SINC_LOOKUP_TABLE_RESOLUTION);
t = _mm256_set1_ps((timeIn - iTimeInF));
r = _mm256_set1_ps(0);
windowWidth8 = windowWidthSIMD2 >> 3;
for (iWindow8 = 0; iWindow8 < windowWidth8; iWindow8 += 1) {
__m256 w = *((__m256*)iWindowF + iWindow8);
__m256 xabs = _mm256_sub_ps(t, w);
xabs = ma_fabsf_avx2(xabs);
xabs = _mm256_mul_ps(xabs, resolution256);
ixabs[iWindow8] = _mm256_cvttps_epi32(xabs);
a[iWindow8] = _mm256_sub_ps(xabs, _mm256_cvtepi32_ps(ixabs[iWindow8]));
}
for (iWindow8 = 0; iWindow8 < windowWidth8; iWindow8 += 1) {
int* ixabsv = (int*)&ixabs[iWindow8];
__m256 lo = _mm256_set_ps(
pSRC->sinc.table[ixabsv[7]],
pSRC->sinc.table[ixabsv[6]],
pSRC->sinc.table[ixabsv[5]],
pSRC->sinc.table[ixabsv[4]],
pSRC->sinc.table[ixabsv[3]],
pSRC->sinc.table[ixabsv[2]],
pSRC->sinc.table[ixabsv[1]],
pSRC->sinc.table[ixabsv[0]]
);
__m256 hi = _mm256_set_ps(
pSRC->sinc.table[ixabsv[7]+1],
pSRC->sinc.table[ixabsv[6]+1],
pSRC->sinc.table[ixabsv[5]+1],
pSRC->sinc.table[ixabsv[4]+1],
pSRC->sinc.table[ixabsv[3]+1],
pSRC->sinc.table[ixabsv[2]+1],
pSRC->sinc.table[ixabsv[1]+1],
pSRC->sinc.table[ixabsv[0]+1]
);
__m256 s = *((__m256*)windowSamples + iWindow8);
r = _mm256_add_ps(r, _mm256_mul_ps(s, ma_mix_f32_fast__avx2(lo, hi, a[iWindow8])));
}
/* Horizontal add. */
__m256 x = _mm256_hadd_ps(r, _mm256_permute2f128_ps(r, r, 1));
x = _mm256_hadd_ps(x, x);
x = _mm256_hadd_ps(x, x);
sampleOut += _mm_cvtss_f32(_mm256_castps256_ps128(x));
iWindow += windowWidth8 * 8;
}
else
#endif
#if defined(MA_SUPPORT_SSE2)
if (pSRC->useSSE2) {
__m128 t = _mm_set1_ps((timeIn - iTimeInF));
__m128 r = _mm_set1_ps(0);
ma_int32 windowWidth4 = windowWidthSIMD2 >> 2;
ma_int32 iWindow4;
for (iWindow4 = 0; iWindow4 < windowWidth4; iWindow4 += 1) {
__m128* s = (__m128*)windowSamples + iWindow4;
__m128* w = (__m128*)iWindowF + iWindow4;
__m128 a = ma_src_sinc__interpolation_factor__sse2(pSRC, _mm_sub_ps(t, *w));
r = _mm_add_ps(r, _mm_mul_ps(*s, a));
}
sampleOut += ((float*)(&r))[0];
sampleOut += ((float*)(&r))[1];
sampleOut += ((float*)(&r))[2];
sampleOut += ((float*)(&r))[3];
iWindow += windowWidth4 * 4;
}
else
#endif
#if defined(MA_SUPPORT_NEON)
if (pSRC->useNEON) {
float32x4_t t = vmovq_n_f32((timeIn - iTimeInF));
float32x4_t r = vmovq_n_f32(0);
ma_int32 windowWidth4 = windowWidthSIMD2 >> 2;
ma_int32 iWindow4;
for (iWindow4 = 0; iWindow4 < windowWidth4; iWindow4 += 1) {
float32x4_t* s = (float32x4_t*)windowSamples + iWindow4;
float32x4_t* w = (float32x4_t*)iWindowF + iWindow4;
float32x4_t a = ma_src_sinc__interpolation_factor__neon(pSRC, vsubq_f32(t, *w));
r = vaddq_f32(r, vmulq_f32(*s, a));
}
sampleOut += ((float*)(&r))[0];
sampleOut += ((float*)(&r))[1];
sampleOut += ((float*)(&r))[2];
sampleOut += ((float*)(&r))[3];
iWindow += windowWidth4 * 4;
}
else
#endif
{
iWindow += 1; /* The first one is a dummy for SIMD alignment purposes. Skip it. */
}
/* Non-SIMD/Reference implementation. */
tScalar = (timeIn - iTimeIn);
for (; iWindow < windowWidth2; iWindow += 1) {
float s = windowSamples[iWindow];
float w = iWindowF[iWindow];
float a = ma_src_sinc__interpolation_factor(pSRC, (tScalar - w));
float r = s * a;
sampleOut += r;
}
ppNextSamplesOut[iChannel][iSample] = (float)sampleOut;
timeIn += factor;
}
ppNextSamplesOut[iChannel] += outputFramesToRead;
}
totalOutputFramesRead += outputFramesToRead;
prevWindowPosInSamples = pSRC->sinc.windowPosInSamples;
pSRC->sinc.timeIn += ((ma_int64)outputFramesToRead * factor); /* Cast to int64 required for VC6. */
pSRC->sinc.windowPosInSamples = (ma_uint32)pSRC->sinc.timeIn;
pSRC->sinc.inputFrameCount -= pSRC->sinc.windowPosInSamples - prevWindowPosInSamples;
/* If the window has reached a point where we cannot read a whole output sample it needs to be moved back to the start. */
availableOutputFrames = (ma_uint32)((timeInEnd - pSRC->sinc.timeIn) * inverseFactor);
if (availableOutputFrames == 0) {
size_t samplesToMove = ma_countof(pSRC->sinc.input[0]) - pSRC->sinc.windowPosInSamples;
pSRC->sinc.timeIn -= ma_floorf(pSRC->sinc.timeIn);
pSRC->sinc.windowPosInSamples = 0;
/* Move everything from the end of the cache up to the front. */
for (iChannel = 0; iChannel < pSRC->config.channels; iChannel += 1) {
memmove(pSRC->sinc.input[iChannel], pSRC->sinc.input[iChannel] + ma_countof(pSRC->sinc.input[iChannel]) - samplesToMove, samplesToMove * sizeof(*pSRC->sinc.input[iChannel]));
}
}
/* Read more data from the client if required. */
if (pSRC->isEndOfInputLoaded) {
pSRC->isEndOfInputLoaded = MA_FALSE;
break;
}
/*
Everything beyond this point is reloading. If we're at the end of the input data we do _not_ want to try reading any more in this function call. If the
caller wants to keep trying, they can reload their internal data sources and call this function again. We should never be
*/
ma_assert(pSRC->isEndOfInputLoaded == MA_FALSE);
if (pSRC->sinc.inputFrameCount <= pSRC->config.sinc.windowWidth || availableOutputFrames == 0) {
float* ppInputDst[MA_MAX_CHANNELS] = {0};
ma_uint32 framesToReadFromClient;
ma_uint32 framesReadFromClient;
ma_uint32 leftoverFrames;
for (iChannel = 0; iChannel < pSRC->config.channels; iChannel += 1) {
ppInputDst[iChannel] = pSRC->sinc.input[iChannel] + pSRC->config.sinc.windowWidth + pSRC->sinc.inputFrameCount;
}
/* Now read data from the client. */
framesToReadFromClient = ma_countof(pSRC->sinc.input[0]) - (pSRC->config.sinc.windowWidth + pSRC->sinc.inputFrameCount);
framesReadFromClient = 0;
if (framesToReadFromClient > 0) {
framesReadFromClient = pSRC->config.onReadDeinterleaved(pSRC, framesToReadFromClient, (void**)ppInputDst, pUserData);
}
if (framesReadFromClient != framesToReadFromClient) {
pSRC->isEndOfInputLoaded = MA_TRUE;
} else {
pSRC->isEndOfInputLoaded = MA_FALSE;
}
if (framesReadFromClient != 0) {
pSRC->sinc.inputFrameCount += framesReadFromClient;
} else {
/* We couldn't get anything more from the client. If no more output samples can be computed from the available input samples we need to return. */
if (pSRC->config.neverConsumeEndOfInput) {
if ((pSRC->sinc.inputFrameCount * inverseFactor) <= pSRC->config.sinc.windowWidth) {
break;
}
} else {
if ((pSRC->sinc.inputFrameCount * inverseFactor) < 1) {
break;
}
}
}
/* Anything left over in the cache must be set to zero. */
leftoverFrames = ma_countof(pSRC->sinc.input[0]) - (pSRC->config.sinc.windowWidth + pSRC->sinc.inputFrameCount);
if (leftoverFrames > 0) {
for (iChannel = 0; iChannel < pSRC->config.channels; iChannel += 1) {
ma_zero_memory(pSRC->sinc.input[iChannel] + pSRC->config.sinc.windowWidth + pSRC->sinc.inputFrameCount, leftoverFrames * sizeof(float));
}
}
}
}
return totalOutputFramesRead;
}
/**************************************************************************************************************************************************************
Format Conversion
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment