Commit 58219ed8 authored by David Reid's avatar David Reid

Add some hand-written SSE2 code.

parent 8db30c36
...@@ -51877,6 +51877,28 @@ static ma_result ma_channel_map_apply_mono_in_f32(float* MA_RESTRICT pFramesOut, ...@@ -51877,6 +51877,28 @@ static ma_result ma_channel_map_apply_mono_in_f32(float* MA_RESTRICT pFramesOut,
} }
} }
} else if (channelsOut == 6) { } else if (channelsOut == 6) {
#if defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
/* We want to do two frames in each iteration so we can have a multiple of 4 samples. */
ma_uint64 unrolledFrameCount = frameCount >> 1;
for (iFrame = 0; iFrame < unrolledFrameCount; iFrame += 1) {
__m128 in0 = _mm_set1_ps(pFramesIn[iFrame*2 + 0]);
__m128 in1 = _mm_set1_ps(pFramesIn[iFrame*2 + 1]);
_mm_storeu_ps(&pFramesOut[iFrame*12 + 0], in0);
_mm_storeu_ps(&pFramesOut[iFrame*12 + 4], _mm_shuffle_ps(in1, in0, _MM_SHUFFLE(0, 0, 0, 0)));
_mm_storeu_ps(&pFramesOut[iFrame*12 + 8], in1);
}
/* Tail. */
for (iFrame = unrolledFrameCount << 1; iFrame < frameCount; iFrame += 1) {
for (iChannelOut = 0; iChannelOut < 6; iChannelOut += 1) {
pFramesOut[iFrame*6 + iChannelOut] = pFramesIn[iFrame];
}
}
} else
#endif
for (iFrame = 0; iFrame < frameCount; iFrame += 1) { for (iFrame = 0; iFrame < frameCount; iFrame += 1) {
for (iChannelOut = 0; iChannelOut < 6; iChannelOut += 1) { for (iChannelOut = 0; iChannelOut < 6; iChannelOut += 1) {
pFramesOut[iFrame*6 + iChannelOut] = pFramesIn[iFrame]; pFramesOut[iFrame*6 + iChannelOut] = pFramesIn[iFrame];
...@@ -51917,7 +51939,7 @@ static ma_result ma_channel_map_apply_mono_in_f32(float* MA_RESTRICT pFramesOut, ...@@ -51917,7 +51939,7 @@ static ma_result ma_channel_map_apply_mono_in_f32(float* MA_RESTRICT pFramesOut,
} }
} }
} else { } else {
/* Slot path. Too many channels to store on the stack. */ /* Slow path. Too many channels to store on the stack. */
for (iFrame = 0; iFrame < frameCount; iFrame += 1) { for (iFrame = 0; iFrame < frameCount; iFrame += 1) {
for (iChannelOut = 0; iChannelOut < channelsOut; iChannelOut += 1) { for (iChannelOut = 0; iChannelOut < channelsOut; iChannelOut += 1) {
ma_channel channelOut = ma_channel_map_get_channel(pChannelMapOut, channelsOut, iChannelOut); ma_channel channelOut = ma_channel_map_get_channel(pChannelMapOut, channelsOut, iChannelOut);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment