Commit 819ca335 authored by David Reid's avatar David Reid

Improvements to linear sample rate conversion.

parent c2666b9d
...@@ -780,15 +780,14 @@ struct mal_src ...@@ -780,15 +780,14 @@ struct mal_src
{ {
mal_src_config config; mal_src_config config;
float bin[MAL_MAX_CHANNELS][32]; float bin[MAL_MAX_CHANNELS][32];
mal_src_cache cache; // <-- For simplifying and optimizing client -> memory reading.
union union
{ {
struct struct
{ {
float alpha; float samplesFromClient[MAL_MAX_CHANNELS][256];
mal_bool32 isPrevFramesLoaded : 1; float t;
mal_bool32 isNextFramesLoaded : 1; mal_uint32 leftoverFrames;
} linear; } linear;
}; };
}; };
...@@ -17715,77 +17714,6 @@ mal_uint64 mal_calculate_frame_count_after_src(mal_uint32 sampleRateOut, mal_uin ...@@ -17715,77 +17714,6 @@ mal_uint64 mal_calculate_frame_count_after_src(mal_uint32 sampleRateOut, mal_uin
} }
void mal_src_cache_init(mal_src* pSRC, mal_src_cache* pCache)
{
mal_assert(pSRC != NULL);
mal_assert(pCache != NULL);
pCache->pSRC = pSRC;
pCache->cachedFrameCount = 0;
pCache->iNextFrame = 0;
}
mal_uint32 mal_src_cache_read_frames_deinterleaved(mal_src_cache* pCache, mal_uint32 frameCount, float** ppSamplesOut, void* pUserData)
{
mal_assert(pCache != NULL);
mal_assert(pCache->pSRC != NULL);
mal_assert(pCache->pSRC->config.onReadDeinterleaved != NULL);
mal_assert(frameCount > 0);
mal_assert(ppSamplesOut != NULL);
mal_uint32 channels = pCache->pSRC->config.channels;
float* ppCachedFrames[MAL_MAX_CHANNELS];
for (mal_uint32 iChannel = 0; iChannel < channels; ++iChannel) {
ppCachedFrames[iChannel] = pCache->cachedFrames[iChannel];
}
mal_uint32 totalFramesRead = 0;
while (frameCount > 0) {
// If there's anything in memory go ahead and copy that over first.
mal_uint32 framesRemainingInMemory = pCache->cachedFrameCount - pCache->iNextFrame;
mal_uint32 framesToReadFromMemory = frameCount;
if (framesToReadFromMemory > framesRemainingInMemory) {
framesToReadFromMemory = framesRemainingInMemory;
}
for (mal_uint32 iChannel = 0; iChannel < channels; ++iChannel) {
mal_copy_memory(ppSamplesOut[iChannel], pCache->cachedFrames[iChannel] + pCache->iNextFrame, framesToReadFromMemory*sizeof(float));
}
pCache->iNextFrame += framesToReadFromMemory;
totalFramesRead += framesToReadFromMemory;
frameCount -= framesToReadFromMemory;
if (frameCount == 0) {
break;
}
// At this point there are still more frames to read from the client, so we'll need to reload the cache with fresh data.
mal_assert(frameCount > 0);
pCache->iNextFrame = 0;
for (mal_uint32 iChannel = 0; iChannel < channels; ++iChannel) {
ppSamplesOut[iChannel] += framesToReadFromMemory;
}
mal_uint32 framesToReadFromClient = mal_countof(pCache->cachedFrames[0]);
if (framesToReadFromClient > MAL_SRC_CACHE_SIZE_IN_FRAMES) {
framesToReadFromClient = MAL_SRC_CACHE_SIZE_IN_FRAMES;
}
pCache->cachedFrameCount = pCache->pSRC->config.onReadDeinterleaved(pCache->pSRC, framesToReadFromClient, (void**)ppCachedFrames, pUserData);
// Get out of this loop if nothing was able to be retrieved.
if (pCache->cachedFrameCount == 0) {
break;
}
}
return totalFramesRead;
}
mal_uint64 mal_src_read_deinterleaved__passthrough(mal_src* pSRC, mal_uint64 frameCount, void** ppSamplesOut, mal_bool32 flush, void* pUserData); mal_uint64 mal_src_read_deinterleaved__passthrough(mal_src* pSRC, mal_uint64 frameCount, void** ppSamplesOut, mal_bool32 flush, void* pUserData);
mal_uint64 mal_src_read_deinterleaved__linear(mal_src* pSRC, mal_uint64 frameCount, void** ppSamplesOut, mal_bool32 flush, void* pUserData); mal_uint64 mal_src_read_deinterleaved__linear(mal_src* pSRC, mal_uint64 frameCount, void** ppSamplesOut, mal_bool32 flush, void* pUserData);
...@@ -17806,7 +17734,6 @@ mal_result mal_src_init(const mal_src_config* pConfig, mal_src* pSRC) ...@@ -17806,7 +17734,6 @@ mal_result mal_src_init(const mal_src_config* pConfig, mal_src* pSRC)
pSRC->config = *pConfig; pSRC->config = *pConfig;
mal_src_cache_init(pSRC, &pSRC->cache);
return MAL_SUCCESS; return MAL_SUCCESS;
} }
...@@ -17887,7 +17814,7 @@ mal_uint64 mal_src_read_deinterleaved__passthrough(mal_src* pSRC, mal_uint64 fra ...@@ -17887,7 +17814,7 @@ mal_uint64 mal_src_read_deinterleaved__passthrough(mal_src* pSRC, mal_uint64 fra
framesToReadRightNow = 0xFFFFFFFF; framesToReadRightNow = 0xFFFFFFFF;
} }
mal_uint32 framesRead = (mal_uint32)pSRC->config.onReadDeinterleaved(pSRC, (mal_uint32)framesToReadRightNow, ppNextSamplesOut, pUserData); mal_uint32 framesRead = (mal_uint32)pSRC->config.onReadDeinterleaved(pSRC, (mal_uint32)framesToReadRightNow, (void**)ppNextSamplesOut, pUserData);
if (framesRead == 0) { if (framesRead == 0) {
break; break;
} }
...@@ -17904,84 +17831,160 @@ mal_uint64 mal_src_read_deinterleaved__passthrough(mal_src* pSRC, mal_uint64 fra ...@@ -17904,84 +17831,160 @@ mal_uint64 mal_src_read_deinterleaved__passthrough(mal_src* pSRC, mal_uint64 fra
mal_uint64 mal_src_read_deinterleaved__linear(mal_src* pSRC, mal_uint64 frameCount, void** ppSamplesOut, mal_bool32 flush, void* pUserData) mal_uint64 mal_src_read_deinterleaved__linear(mal_src* pSRC, mal_uint64 frameCount, void** ppSamplesOut, mal_bool32 flush, void* pUserData)
{ {
(void)flush; // No flushing at the moment.
mal_assert(pSRC != NULL); mal_assert(pSRC != NULL);
mal_assert(frameCount > 0); mal_assert(frameCount > 0);
mal_assert(ppSamplesOut != NULL); mal_assert(ppSamplesOut != NULL);
// For linear SRC, the bin is only 2 frames: 1 prior, 1 future.
float* ppNextSamplesOut[MAL_MAX_CHANNELS]; float* ppNextSamplesOut[MAL_MAX_CHANNELS];
mal_copy_memory(ppNextSamplesOut, ppSamplesOut, sizeof(void*) * pSRC->config.channels); mal_copy_memory(ppNextSamplesOut, ppSamplesOut, sizeof(void*) * pSRC->config.channels);
// Load the bin if necessary.
float* ppPrevFrame[MAL_MAX_CHANNELS];
float* ppNextFrame[MAL_MAX_CHANNELS];
for (mal_uint32 iChannel = 0; iChannel < pSRC->config.channels; ++iChannel) {
ppPrevFrame[iChannel] = &pSRC->bin[iChannel][0];
ppNextFrame[iChannel] = &pSRC->bin[iChannel][1];
}
if (!pSRC->linear.isPrevFramesLoaded) { float factor = (float)pSRC->config.sampleRateIn / pSRC->config.sampleRateOut;
mal_uint32 framesRead = mal_src_cache_read_frames_deinterleaved(&pSRC->cache, 1, ppPrevFrame, pUserData);
if (framesRead == 0) { mal_uint32 maxFrameCountPerChunkIn = mal_countof(pSRC->linear.samplesFromClient[0]);
return 0;
mal_uint64 totalFramesRead = 0;
while (totalFramesRead < frameCount) {
mal_uint64 framesRemaining = frameCount - totalFramesRead;
mal_uint64 framesToRead = framesRemaining;
if (framesToRead > 16384) {
framesToRead = 16384; // <-- Keep this small because we're using 32-bit floats for calculating sample positions and I don't want to run out of precision with huge sample counts.
} }
pSRC->linear.isPrevFramesLoaded = MAL_TRUE;
}
if (!pSRC->linear.isNextFramesLoaded) { // Read Input Data
mal_uint32 framesRead = mal_src_cache_read_frames_deinterleaved(&pSRC->cache, 1, ppNextFrame, pUserData); // ===============
if (framesRead == 0) { float tBeg = pSRC->linear.t;
return 0; float tEnd = tBeg + (framesToRead*factor);
mal_uint32 framesToReadFromClient = (mal_uint32)(tEnd) + 1 + 1; // +1 to make tEnd 1-based and +1 because we always need to an extra sample for interpolation.
if (framesToReadFromClient >= maxFrameCountPerChunkIn) {
framesToReadFromClient = maxFrameCountPerChunkIn;
} }
pSRC->linear.isNextFramesLoaded = MAL_TRUE;
}
float factor = (float)pSRC->config.sampleRateIn / pSRC->config.sampleRateOut; float* ppSamplesFromClient[MAL_MAX_CHANNELS];
for (mal_uint32 iChannel = 0; iChannel < pSRC->config.channels; ++iChannel) {
ppSamplesFromClient[iChannel] = pSRC->linear.samplesFromClient[iChannel] + pSRC->linear.leftoverFrames;
}
mal_uint32 framesReadFromClient = 0;
if (framesToReadFromClient > pSRC->linear.leftoverFrames) {
framesReadFromClient = (mal_uint32)pSRC->config.onReadDeinterleaved(pSRC, (mal_uint32)framesToReadFromClient - pSRC->linear.leftoverFrames, (void**)ppSamplesFromClient, pUserData);
}
framesReadFromClient += pSRC->linear.leftoverFrames; // <-- You can sort of think of it as though we've re-read the leftover samples from the client.
if (framesReadFromClient < 2) {
break;
}
mal_uint64 totalFramesRead = 0;
while (frameCount > 0) {
for (mal_uint32 iChannel = 0; iChannel < pSRC->config.channels; ++iChannel) { for (mal_uint32 iChannel = 0; iChannel < pSRC->config.channels; ++iChannel) {
// The bin is where the previous and next frames are located. ppSamplesFromClient[iChannel] = pSRC->linear.samplesFromClient[iChannel];
float prevSample = pSRC->bin[iChannel][0];
float nextSample = pSRC->bin[iChannel][1];
ppNextSamplesOut[iChannel][0] = mal_mix_f32(prevSample, nextSample, pSRC->linear.alpha);
ppNextSamplesOut[iChannel] = (float*)ppNextSamplesOut[iChannel] + 1;
} }
pSRC->linear.alpha += factor;
// The new alpha value is how we determine whether or not we need to read fresh frames. // Write Output Data
mal_uint32 framesToReadFromClient = (mal_uint32)pSRC->linear.alpha; // =================
pSRC->linear.alpha = pSRC->linear.alpha - framesToReadFromClient;
for (mal_uint32 i = 0; i < framesToReadFromClient; ++i) { // At this point we have a bunch of frames that the client has given to us for processing. From this we can determine the maximum number of output frames
for (mal_uint32 iChannel = 0; iChannel < pSRC->config.channels; ++iChannel) { // that can be processed from this input. We want to output as many samples as possible from our input data.
ppPrevFrame[iChannel][0] = ppNextFrame[iChannel][0]; float tAvailable = framesReadFromClient - tBeg;
mal_uint32 maxOutputFramesToRead = (mal_uint32)(tAvailable / factor);
if (maxOutputFramesToRead == 0) {
maxOutputFramesToRead = 1;
}
if (maxOutputFramesToRead > framesToRead) {
maxOutputFramesToRead = (mal_uint32)framesToRead;
}
// Output frames are always read in groups of 4 because I'm planning on using this as a reference for some SIMD-y stuff later.
mal_uint32 maxOutputFramesToRead4 = maxOutputFramesToRead/4;
for (mal_uint32 iChannel = 0; iChannel < pSRC->config.channels; ++iChannel) {
float t0 = pSRC->linear.t + factor*0;
float t1 = pSRC->linear.t + factor*1;
float t2 = pSRC->linear.t + factor*2;
float t3 = pSRC->linear.t + factor*3;
for (mal_uint32 iFrameOut = 0; iFrameOut < maxOutputFramesToRead4; iFrameOut += 1) {
float iPrevSample0 = (float)floor(t0);
float iPrevSample1 = (float)floor(t1);
float iPrevSample2 = (float)floor(t2);
float iPrevSample3 = (float)floor(t3);
float iNextSample0 = iPrevSample0 + 1;
float iNextSample1 = iPrevSample1 + 1;
float iNextSample2 = iPrevSample2 + 1;
float iNextSample3 = iPrevSample3 + 1;
float alpha0 = t0 - iPrevSample0;
float alpha1 = t1 - iPrevSample1;
float alpha2 = t2 - iPrevSample2;
float alpha3 = t3 - iPrevSample3;
float prevSample0 = ppSamplesFromClient[iChannel][(mal_uint32)iPrevSample0];
float prevSample1 = ppSamplesFromClient[iChannel][(mal_uint32)iPrevSample1];
float prevSample2 = ppSamplesFromClient[iChannel][(mal_uint32)iPrevSample2];
float prevSample3 = ppSamplesFromClient[iChannel][(mal_uint32)iPrevSample3];
float nextSample0 = ppSamplesFromClient[iChannel][(mal_uint32)iNextSample0];
float nextSample1 = ppSamplesFromClient[iChannel][(mal_uint32)iNextSample1];
float nextSample2 = ppSamplesFromClient[iChannel][(mal_uint32)iNextSample2];
float nextSample3 = ppSamplesFromClient[iChannel][(mal_uint32)iNextSample3];
ppNextSamplesOut[iChannel][iFrameOut*4 + 0] = mal_mix_f32(prevSample0, nextSample0, alpha0);
ppNextSamplesOut[iChannel][iFrameOut*4 + 1] = mal_mix_f32(prevSample1, nextSample1, alpha1);
ppNextSamplesOut[iChannel][iFrameOut*4 + 2] = mal_mix_f32(prevSample2, nextSample2, alpha2);
ppNextSamplesOut[iChannel][iFrameOut*4 + 3] = mal_mix_f32(prevSample3, nextSample3, alpha3);
t0 += factor*4;
t1 += factor*4;
t2 += factor*4;
t3 += factor*4;
} }
mal_uint32 framesRead = mal_src_cache_read_frames_deinterleaved(&pSRC->cache, 1, ppNextFrame, pUserData); float t = pSRC->linear.t + (factor*maxOutputFramesToRead4*4);
if (framesRead == 0) { for (mal_uint32 iFrameOut = (maxOutputFramesToRead4*4); iFrameOut < maxOutputFramesToRead; iFrameOut += 1) {
for (mal_uint32 j = 0; j < pSRC->config.channels; ++j) { float iPrevSample = (float)floor(t);
ppNextFrame[j][0] = 0; float iNextSample = iPrevSample + 1;
} float alpha = t - iPrevSample;
if (pSRC->linear.isNextFramesLoaded) { float prevSample = ppSamplesFromClient[iChannel][(mal_uint32)iPrevSample];
pSRC->linear.isNextFramesLoaded = MAL_FALSE; float nextSample = ppSamplesFromClient[iChannel][(mal_uint32)iNextSample];
} else {
if (flush) {
pSRC->linear.isPrevFramesLoaded = MAL_FALSE;
}
}
break; ppNextSamplesOut[iChannel][iFrameOut] = mal_mix_f32(prevSample, nextSample, alpha);
t += factor;
} }
ppNextSamplesOut[iChannel] += maxOutputFramesToRead;
} }
frameCount -= 1; totalFramesRead += maxOutputFramesToRead;
totalFramesRead += 1;
// Residual
// ========
float tNext = pSRC->linear.t + (maxOutputFramesToRead*factor);
pSRC->linear.t = tNext;
mal_assert(tNext <= framesReadFromClient+1);
mal_uint32 iNextFrame = (mal_uint32)floor(tNext);
pSRC->linear.leftoverFrames = framesReadFromClient - iNextFrame;
pSRC->linear.t = tNext - iNextFrame;
for (mal_uint32 iChannel = 0; iChannel < pSRC->config.channels; ++iChannel) {
for (mal_uint32 iFrame = 0; iFrame < pSRC->linear.leftoverFrames; ++iFrame) {
float sample = ppSamplesFromClient[iChannel][framesReadFromClient-pSRC->linear.leftoverFrames + iFrame];
ppSamplesFromClient[iChannel][iFrame] = sample;
}
}
// If there's no frames available we need to get out of this loop.
if (!pSRC->linear.isNextFramesLoaded && (!flush || !pSRC->linear.isPrevFramesLoaded)) { // Exit the loop if we've found everything from the client.
if (framesReadFromClient < framesToReadFromClient) {
break; break;
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment