Commit 6054b8a1 authored by David Reid's avatar David Reid

Clean up.

parent dfe27a64
...@@ -3335,6 +3335,10 @@ static MAL_INLINE mal_int32 mal_dither_s32(mal_dither_mode ditherMode, mal_int32 ...@@ -3335,6 +3335,10 @@ static MAL_INLINE mal_int32 mal_dither_s32(mal_dither_mode ditherMode, mal_int32
// multiple of the alignment. The alignment must be a power of 2. // multiple of the alignment. The alignment must be a power of 2.
void mal_split_buffer(void* pBuffer, size_t bufferSize, size_t splitCount, size_t alignment, void** ppBuffersOut, size_t* pSplitSizeOut) void mal_split_buffer(void* pBuffer, size_t bufferSize, size_t splitCount, size_t alignment, void** ppBuffersOut, size_t* pSplitSizeOut)
{ {
if (pSplitSizeOut) {
*pSplitSizeOut = 0;
}
if (pBuffer == NULL || bufferSize == 0 || splitCount == 0) { if (pBuffer == NULL || bufferSize == 0 || splitCount == 0) {
return; return;
} }
...@@ -20075,6 +20079,9 @@ mal_src_config mal_src_config_init(mal_uint32 sampleRateIn, mal_uint32 sampleRat ...@@ -20075,6 +20079,9 @@ mal_src_config mal_src_config_init(mal_uint32 sampleRateIn, mal_uint32 sampleRat
// //
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Comment this to disable interpolation of table lookups. Less accurate, but faster.
#define MAL_USE_SINC_TABLE_INTERPOLATION
// Retrieves a sample from the input buffer's window. Values >= 0 retrieve future samples. Negative values return past samples. // Retrieves a sample from the input buffer's window. Values >= 0 retrieve future samples. Negative values return past samples.
static MAL_INLINE float mal_src_sinc__get_input_sample_from_window(const mal_src* pSRC, mal_uint32 channel, mal_uint32 windowPosInSamples, mal_int32 sampleIndex) static MAL_INLINE float mal_src_sinc__get_input_sample_from_window(const mal_src* pSRC, mal_uint32 channel, mal_uint32 windowPosInSamples, mal_int32 sampleIndex)
{ {
...@@ -20095,14 +20102,14 @@ static MAL_INLINE float mal_src_sinc__interpolation_factor(const mal_src* pSRC, ...@@ -20095,14 +20102,14 @@ static MAL_INLINE float mal_src_sinc__interpolation_factor(const mal_src* pSRC,
mal_assert(pSRC != NULL); mal_assert(pSRC != NULL);
float xabs = (float)fabs(x); float xabs = (float)fabs(x);
if (xabs >= MAL_SRC_SINC_MAX_WINDOW_WIDTH /*pSRC->config.sinc.windowWidth*/) { //if (xabs >= MAL_SRC_SINC_MAX_WINDOW_WIDTH /*pSRC->config.sinc.windowWidth*/) {
xabs = 1; // <-- A non-zero integer will always return 0. // xabs = 1; // <-- A non-zero integer will always return 0.
} //}
xabs = xabs * MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION; xabs = xabs * MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION;
mal_int32 ixabs = (mal_int32)xabs; mal_int32 ixabs = (mal_int32)xabs;
#if 1 #if defined(MAL_USE_SINC_TABLE_INTERPOLATION)
float a = xabs - ixabs; float a = xabs - ixabs;
return mal_mix_f32_fast(pSRC->sinc.table[ixabs], pSRC->sinc.table[ixabs+1], a); return mal_mix_f32_fast(pSRC->sinc.table[ixabs], pSRC->sinc.table[ixabs+1], a);
#else #else
...@@ -20113,14 +20120,7 @@ static MAL_INLINE float mal_src_sinc__interpolation_factor(const mal_src* pSRC, ...@@ -20113,14 +20120,7 @@ static MAL_INLINE float mal_src_sinc__interpolation_factor(const mal_src* pSRC,
#if defined(MAL_SUPPORT_SSE2) #if defined(MAL_SUPPORT_SSE2)
static MAL_INLINE __m128 mal_fabsf_sse2(__m128 x) static MAL_INLINE __m128 mal_fabsf_sse2(__m128 x)
{ {
static MAL_ALIGN(16) mal_uint32 mask[4] = { return _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF)), x);
0x7FFFFFFF,
0x7FFFFFFF,
0x7FFFFFFF,
0x7FFFFFFF
};
return _mm_and_ps(*(__m128*)mask, x);
} }
static MAL_INLINE __m128 mal_truncf_sse2(__m128 x) static MAL_INLINE __m128 mal_truncf_sse2(__m128 x)
...@@ -20175,15 +20175,15 @@ static MAL_INLINE __m256 mal_fabsf_avx(__m256 x) ...@@ -20175,15 +20175,15 @@ static MAL_INLINE __m256 mal_fabsf_avx(__m256 x)
#if 0 #if 0
static MAL_INLINE __m256 mal_src_sinc__interpolation_factor__avx(const mal_src* pSRC, __m256 x) static MAL_INLINE __m256 mal_src_sinc__interpolation_factor__avx(const mal_src* pSRC, __m256 x)
{ {
__m256 windowWidth256 = _mm256_set1_ps(MAL_SRC_SINC_MAX_WINDOW_WIDTH); //__m256 windowWidth256 = _mm256_set1_ps(MAL_SRC_SINC_MAX_WINDOW_WIDTH);
__m256 resolution256 = _mm256_set1_ps(MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION); __m256 resolution256 = _mm256_set1_ps(MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION);
__m256 one = _mm256_set1_ps(1); //__m256 one = _mm256_set1_ps(1);
__m256 xabs = mal_fabsf_avx(x); __m256 xabs = mal_fabsf_avx(x);
// if (MAL_SRC_SINC_MAX_WINDOW_WIDTH <= xabs) xabs = 1 else xabs = xabs; // if (MAL_SRC_SINC_MAX_WINDOW_WIDTH <= xabs) xabs = 1 else xabs = xabs;
__m256 xcmp = _mm256_cmp_ps(windowWidth256, xabs, 2); // 2 = Less than or equal = _mm_cmple_ps. //__m256 xcmp = _mm256_cmp_ps(windowWidth256, xabs, 2); // 2 = Less than or equal = _mm_cmple_ps.
xabs = _mm256_or_ps(_mm256_and_ps(one, xcmp), _mm256_andnot_ps(xcmp, xabs)); // xabs = (xcmp) ? 1 : xabs; //xabs = _mm256_or_ps(_mm256_and_ps(one, xcmp), _mm256_andnot_ps(xcmp, xabs)); // xabs = (xcmp) ? 1 : xabs;
xabs = _mm256_mul_ps(xabs, resolution256); xabs = _mm256_mul_ps(xabs, resolution256);
...@@ -20308,8 +20308,6 @@ mal_uint64 mal_src_read_deinterleaved__sinc(mal_src* pSRC, mal_uint64 frameCount ...@@ -20308,8 +20308,6 @@ mal_uint64 mal_src_read_deinterleaved__sinc(mal_src* pSRC, mal_uint64 frameCount
float iTimeInF = mal_floorf(timeIn); float iTimeInF = mal_floorf(timeIn);
mal_uint32 iTimeIn = (mal_uint32)iTimeInF; mal_uint32 iTimeIn = (mal_uint32)iTimeInF;
//mal_int32 iWindowBeg = -windowWidth+1;
//mal_int32 iWindowEnd = windowWidth;
mal_int32 iWindow = 0; mal_int32 iWindow = 0;
// Pre-load the window samples into an aligned buffer to begin with. Need to put these into an aligned buffer to make SIMD easier. // Pre-load the window samples into an aligned buffer to begin with. Need to put these into an aligned buffer to make SIMD easier.
...@@ -20339,12 +20337,10 @@ mal_uint64 mal_src_read_deinterleaved__sinc(mal_src* pSRC, mal_uint64 frameCount ...@@ -20339,12 +20337,10 @@ mal_uint64 mal_src_read_deinterleaved__sinc(mal_src* pSRC, mal_uint64 frameCount
a[iWindow8] = _mm256_sub_ps(xabs, _mm256_cvtepi32_ps(ixabs[iWindow8])); a[iWindow8] = _mm256_sub_ps(xabs, _mm256_cvtepi32_ps(ixabs[iWindow8]));
} }
__m256 lo[MAL_SRC_SINC_MAX_WINDOW_WIDTH*2/8];
__m256 hi[MAL_SRC_SINC_MAX_WINDOW_WIDTH*2/8];
for (mal_int32 iWindow8 = 0; iWindow8 < windowWidth8; iWindow8 += 1) { for (mal_int32 iWindow8 = 0; iWindow8 < windowWidth8; iWindow8 += 1) {
int* ixabsv = (int*)&ixabs[iWindow8]; int* ixabsv = (int*)&ixabs[iWindow8];
lo[iWindow8] = _mm256_set_ps( __m256 lo = _mm256_set_ps(
pSRC->sinc.table[ixabsv[7]], pSRC->sinc.table[ixabsv[7]],
pSRC->sinc.table[ixabsv[6]], pSRC->sinc.table[ixabsv[6]],
pSRC->sinc.table[ixabsv[5]], pSRC->sinc.table[ixabsv[5]],
...@@ -20355,7 +20351,7 @@ mal_uint64 mal_src_read_deinterleaved__sinc(mal_src* pSRC, mal_uint64 frameCount ...@@ -20355,7 +20351,7 @@ mal_uint64 mal_src_read_deinterleaved__sinc(mal_src* pSRC, mal_uint64 frameCount
pSRC->sinc.table[ixabsv[0]] pSRC->sinc.table[ixabsv[0]]
); );
hi[iWindow8] = _mm256_set_ps( __m256 hi = _mm256_set_ps(
pSRC->sinc.table[ixabsv[7]+1], pSRC->sinc.table[ixabsv[7]+1],
pSRC->sinc.table[ixabsv[6]+1], pSRC->sinc.table[ixabsv[6]+1],
pSRC->sinc.table[ixabsv[5]+1], pSRC->sinc.table[ixabsv[5]+1],
...@@ -20367,7 +20363,7 @@ mal_uint64 mal_src_read_deinterleaved__sinc(mal_src* pSRC, mal_uint64 frameCount ...@@ -20367,7 +20363,7 @@ mal_uint64 mal_src_read_deinterleaved__sinc(mal_src* pSRC, mal_uint64 frameCount
); );
__m256 s = *((__m256*)windowSamples + iWindow8); __m256 s = *((__m256*)windowSamples + iWindow8);
r = _mm256_add_ps(r, _mm256_mul_ps(s, mal_mix_f32_fast__avx(lo[iWindow8], hi[iWindow8], a[iWindow8]))); r = _mm256_add_ps(r, _mm256_mul_ps(s, mal_mix_f32_fast__avx(lo, hi, a[iWindow8])));
} }
// Horizontal add. // Horizontal add.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment