Commit 699fae5d authored by David Reid's avatar David Reid

Prep work for SIMD optimizations to format conversion.

parent a51be49a
...@@ -850,7 +850,7 @@ typedef struct ...@@ -850,7 +850,7 @@ typedef struct
MAL_ALIGNED_STRUCT(MAL_SIMD_ALIGNMENT) mal_src MAL_ALIGNED_STRUCT(MAL_SIMD_ALIGNMENT) mal_src
{ {
float samplesFromClient[MAL_MAX_CHANNELS][256]; MAL_ALIGN(MAL_SIMD_ALIGNMENT) float samplesFromClient[MAL_MAX_CHANNELS][256];
mal_src_config config; mal_src_config config;
union union
...@@ -3038,6 +3038,40 @@ static MAL_INLINE float mal_mix_f32(float x, float y, float a) ...@@ -3038,6 +3038,40 @@ static MAL_INLINE float mal_mix_f32(float x, float y, float a)
} }
// Splits a buffer into parts of equal length and of the given alignment. The returned size of the split buffers will be a
// multiple of the alignment. The alignment must be a power of 2.
void mal_split_buffer(void* pBuffer, size_t bufferSize, size_t splitCount, size_t alignment, void** ppBuffersOut, size_t* pSplitSizeOut)
{
if (pBuffer == NULL || bufferSize == 0 || splitCount == 0) {
return;
}
if (alignment == 0) {
alignment = 1;
}
mal_uintptr pBufferUnaligned = (mal_uintptr)pBuffer;
mal_uintptr pBufferAligned = (pBufferUnaligned + (alignment-1)) & ~(alignment-1);
size_t unalignedBytes = (size_t)(pBufferAligned - pBufferUnaligned);
size_t splitSize = 0;
if (bufferSize >= unalignedBytes) {
splitSize = (bufferSize - unalignedBytes) / splitCount;
splitSize = splitSize & ~(alignment-1);
}
if (ppBuffersOut != NULL) {
for (size_t i = 0; i < splitCount; ++i) {
ppBuffersOut[i] = (mal_uint8*)(pBufferAligned + (splitSize*i));
}
}
if (pSplitSizeOut) {
*pSplitSizeOut = splitSize;
}
}
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// //
// Atomics // Atomics
...@@ -17312,7 +17346,7 @@ mal_uint64 mal_format_converter_read(mal_format_converter* pConverter, mal_uint6 ...@@ -17312,7 +17346,7 @@ mal_uint64 mal_format_converter_read(mal_format_converter* pConverter, mal_uint6
} }
} else { } else {
// Conversion required. // Conversion required.
mal_uint8 temp[MAL_MAX_CHANNELS * MAL_MAX_PCM_SAMPLE_SIZE_IN_BYTES * 128]; MAL_ALIGN(MAL_SIMD_ALIGNMENT) mal_uint8 temp[MAL_MAX_CHANNELS * MAL_MAX_PCM_SAMPLE_SIZE_IN_BYTES * 128];
mal_assert(sizeof(temp) <= 0xFFFFFFFF); mal_assert(sizeof(temp) <= 0xFFFFFFFF);
mal_uint32 maxFramesToReadAtATime = sizeof(temp) / sampleSizeIn / pConverter->config.channels; mal_uint32 maxFramesToReadAtATime = sizeof(temp) / sampleSizeIn / pConverter->config.channels;
...@@ -17337,15 +17371,14 @@ mal_uint64 mal_format_converter_read(mal_format_converter* pConverter, mal_uint6 ...@@ -17337,15 +17371,14 @@ mal_uint64 mal_format_converter_read(mal_format_converter* pConverter, mal_uint6
} }
} else { } else {
// Input data is deinterleaved. If a conversion is required we need to do an intermediary step. // Input data is deinterleaved. If a conversion is required we need to do an intermediary step.
mal_uint8 tempSamplesOfOutFormat[MAL_MAX_CHANNELS][MAL_MAX_PCM_SAMPLE_SIZE_IN_BYTES * 128]; MAL_ALIGN(MAL_SIMD_ALIGNMENT) mal_uint8 tempSamplesOfOutFormat[MAL_MAX_CHANNELS * MAL_MAX_PCM_SAMPLE_SIZE_IN_BYTES * 128];
mal_assert(sizeof(tempSamplesOfOutFormat[0]) <= 0xFFFFFFFFF); mal_assert(sizeof(tempSamplesOfOutFormat) <= 0xFFFFFFFFF);
void* ppTempSampleOfOutFormat[MAL_MAX_CHANNELS]; void* ppTempSamplesOfOutFormat[MAL_MAX_CHANNELS];
for (mal_uint32 i = 0; i < pConverter->config.channels; ++i) { size_t splitBufferSizeOut;
ppTempSampleOfOutFormat[i] = &tempSamplesOfOutFormat[i]; mal_split_buffer(tempSamplesOfOutFormat, sizeof(tempSamplesOfOutFormat), pConverter->config.channels, MAL_SIMD_ALIGNMENT, (void**)&ppTempSamplesOfOutFormat, &splitBufferSizeOut);
}
mal_uint32 maxFramesToReadAtATime = sizeof(tempSamplesOfOutFormat[0]) / sampleSizeIn; mal_uint32 maxFramesToReadAtATime = (mal_uint32)(splitBufferSizeOut / sampleSizeIn);
while (totalFramesRead < frameCount) { while (totalFramesRead < frameCount) {
mal_uint64 framesRemaining = (frameCount - totalFramesRead); mal_uint64 framesRemaining = (frameCount - totalFramesRead);
...@@ -17358,31 +17391,33 @@ mal_uint64 mal_format_converter_read(mal_format_converter* pConverter, mal_uint6 ...@@ -17358,31 +17391,33 @@ mal_uint64 mal_format_converter_read(mal_format_converter* pConverter, mal_uint6
if (pConverter->config.formatIn == pConverter->config.formatOut) { if (pConverter->config.formatIn == pConverter->config.formatOut) {
// Only interleaving. // Only interleaving.
framesJustRead = (mal_uint32)pConverter->config.onReadDeinterleaved(pConverter, (mal_uint32)framesToReadRightNow, ppTempSampleOfOutFormat, pUserData); framesJustRead = (mal_uint32)pConverter->config.onReadDeinterleaved(pConverter, (mal_uint32)framesToReadRightNow, ppTempSamplesOfOutFormat, pUserData);
if (framesJustRead == 0) { if (framesJustRead == 0) {
break; break;
} }
} else { } else {
// Interleaving + Conversion. Convert first, then interleave. // Interleaving + Conversion. Convert first, then interleave.
mal_uint8 tempSamplesOfInFormat[MAL_MAX_CHANNELS][MAL_MAX_PCM_SAMPLE_SIZE_IN_BYTES * 128]; MAL_ALIGN(MAL_SIMD_ALIGNMENT) mal_uint8 tempSamplesOfInFormat[MAL_MAX_CHANNELS * MAL_MAX_PCM_SAMPLE_SIZE_IN_BYTES * 128];
void* ppTempSampleOfInFormat[MAL_MAX_CHANNELS]; void* ppTempSamplesOfInFormat[MAL_MAX_CHANNELS];
for (mal_uint32 i = 0; i < pConverter->config.channels; ++i) { size_t splitBufferSizeIn;
ppTempSampleOfInFormat[i] = &tempSamplesOfInFormat[i]; mal_split_buffer(tempSamplesOfInFormat, sizeof(tempSamplesOfInFormat), pConverter->config.channels, MAL_SIMD_ALIGNMENT, (void**)&ppTempSamplesOfInFormat, &splitBufferSizeIn);
}
if (framesToReadRightNow > (splitBufferSizeIn / sampleSizeIn)) {
framesToReadRightNow = (splitBufferSizeIn / sampleSizeIn);
}
framesJustRead = (mal_uint32)pConverter->config.onReadDeinterleaved(pConverter, (mal_uint32)framesToReadRightNow, ppTempSampleOfInFormat, pUserData); framesJustRead = (mal_uint32)pConverter->config.onReadDeinterleaved(pConverter, (mal_uint32)framesToReadRightNow, ppTempSamplesOfInFormat, pUserData);
if (framesJustRead == 0) { if (framesJustRead == 0) {
break; break;
} }
for (mal_uint32 iChannel = 0; iChannel < pConverter->config.channels; iChannel += 1) { for (mal_uint32 iChannel = 0; iChannel < pConverter->config.channels; iChannel += 1) {
pConverter->onConvertPCM(tempSamplesOfOutFormat[iChannel], tempSamplesOfInFormat[iChannel], framesJustRead, pConverter->config.ditherMode); pConverter->onConvertPCM(ppTempSamplesOfOutFormat[iChannel], ppTempSamplesOfInFormat[iChannel], framesJustRead, pConverter->config.ditherMode);
} }
} }
pConverter->onInterleavePCM(pNextFramesOut, (const void**)ppTempSampleOfOutFormat, framesJustRead, pConverter->config.channels); pConverter->onInterleavePCM(pNextFramesOut, (const void**)ppTempSamplesOfOutFormat, framesJustRead, pConverter->config.channels);
totalFramesRead += framesJustRead; totalFramesRead += framesJustRead;
pNextFramesOut += framesJustRead * frameSizeOut; pNextFramesOut += framesJustRead * frameSizeOut;
...@@ -17407,7 +17442,7 @@ mal_uint64 mal_format_converter_read_deinterleaved(mal_format_converter* pConver ...@@ -17407,7 +17442,7 @@ mal_uint64 mal_format_converter_read_deinterleaved(mal_format_converter* pConver
if (pConverter->config.onRead != NULL) { if (pConverter->config.onRead != NULL) {
// Input data is interleaved. // Input data is interleaved.
mal_uint8 tempSamplesOfOutFormat[MAL_MAX_CHANNELS * MAL_MAX_PCM_SAMPLE_SIZE_IN_BYTES * 128]; MAL_ALIGN(MAL_SIMD_ALIGNMENT) mal_uint8 tempSamplesOfOutFormat[MAL_MAX_CHANNELS * MAL_MAX_PCM_SAMPLE_SIZE_IN_BYTES * 128];
mal_assert(sizeof(tempSamplesOfOutFormat) <= 0xFFFFFFFF); mal_assert(sizeof(tempSamplesOfOutFormat) <= 0xFFFFFFFF);
mal_uint32 maxFramesToReadAtATime = sizeof(tempSamplesOfOutFormat) / sampleSizeIn / pConverter->config.channels; mal_uint32 maxFramesToReadAtATime = sizeof(tempSamplesOfOutFormat) / sampleSizeIn / pConverter->config.channels;
...@@ -17429,7 +17464,7 @@ mal_uint64 mal_format_converter_read_deinterleaved(mal_format_converter* pConver ...@@ -17429,7 +17464,7 @@ mal_uint64 mal_format_converter_read_deinterleaved(mal_format_converter* pConver
} }
} else { } else {
// De-interleaving + Conversion. Convert first, then de-interleave. // De-interleaving + Conversion. Convert first, then de-interleave.
mal_uint8 tempSamplesOfInFormat[sizeof(tempSamplesOfOutFormat)]; MAL_ALIGN(MAL_SIMD_ALIGNMENT) mal_uint8 tempSamplesOfInFormat[sizeof(tempSamplesOfOutFormat)];
framesJustRead = (mal_uint32)pConverter->config.onRead(pConverter, (mal_uint32)framesToReadRightNow, tempSamplesOfInFormat, pUserData); framesJustRead = (mal_uint32)pConverter->config.onRead(pConverter, (mal_uint32)framesToReadRightNow, tempSamplesOfInFormat, pUserData);
if (framesJustRead == 0) { if (framesJustRead == 0) {
...@@ -17469,15 +17504,14 @@ mal_uint64 mal_format_converter_read_deinterleaved(mal_format_converter* pConver ...@@ -17469,15 +17504,14 @@ mal_uint64 mal_format_converter_read_deinterleaved(mal_format_converter* pConver
} }
} else { } else {
// Conversion required. // Conversion required.
mal_uint8 temp[MAL_MAX_CHANNELS][MAL_MAX_PCM_SAMPLE_SIZE_IN_BYTES * 128]; MAL_ALIGN(MAL_SIMD_ALIGNMENT) mal_uint8 temp[MAL_MAX_CHANNELS][MAL_MAX_PCM_SAMPLE_SIZE_IN_BYTES * 128];
mal_assert(sizeof(temp[0]) <= 0xFFFFFFFF); mal_assert(sizeof(temp) <= 0xFFFFFFFF);
void* ppTemp[MAL_MAX_CHANNELS]; void* ppTemp[MAL_MAX_CHANNELS];
for (mal_uint32 i = 0; i < pConverter->config.channels; ++i) { size_t splitBufferSize;
ppTemp[i] = &temp[i]; mal_split_buffer(temp, sizeof(temp), pConverter->config.channels, MAL_SIMD_ALIGNMENT, (void**)&ppTemp, &splitBufferSize);
}
mal_uint32 maxFramesToReadAtATime = sizeof(temp[0]) / sampleSizeIn; mal_uint32 maxFramesToReadAtATime = (mal_uint32)(splitBufferSize / sampleSizeIn);
while (totalFramesRead < frameCount) { while (totalFramesRead < frameCount) {
mal_uint64 framesRemaining = (frameCount - totalFramesRead); mal_uint64 framesRemaining = (frameCount - totalFramesRead);
...@@ -17512,40 +17546,6 @@ mal_uint64 mal_format_converter_read_deinterleaved(mal_format_converter* pConver ...@@ -17512,40 +17546,6 @@ mal_uint64 mal_format_converter_read_deinterleaved(mal_format_converter* pConver
// //
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Splits a buffer into parts of equal length and of the given alignment. The returned size of the split buffers will be a
// multiple of the alignment. The alignment must be a power of 2.
void mal_split_buffer(void* pBuffer, size_t bufferSize, size_t splitCount, size_t alignment, void** ppBuffersOut, size_t* pSplitSizeOut)
{
if (pBuffer == NULL || bufferSize == 0 || splitCount == 0) {
return;
}
if (alignment == 0) {
alignment = 1;
}
mal_uintptr pBufferUnaligned = (mal_uintptr)pBuffer;
mal_uintptr pBufferAligned = (pBufferUnaligned + (alignment-1)) & ~(alignment-1);
size_t unalignedBytes = (size_t)(pBufferAligned - pBufferUnaligned);
size_t splitSize = 0;
if (bufferSize >= unalignedBytes) {
splitSize = (bufferSize - unalignedBytes) / splitCount;
splitSize = splitSize & ~(alignment-1);
}
if (ppBuffersOut != NULL) {
for (size_t i = 0; i < splitCount; ++i) {
ppBuffersOut[i] = (mal_uint8*)(pBufferAligned + (splitSize*i));
}
}
if (pSplitSizeOut) {
*pSplitSizeOut = splitSize;
}
}
// -X = Left, +X = Right // -X = Left, +X = Right
// -Y = Bottom, +Y = Top // -Y = Bottom, +Y = Top
// -Z = Front, +Z = Back // -Z = Front, +Z = Back
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment