Commit 0a788f79 authored by David Reid's avatar David Reid

Remove placeholders for hand-written AVX2 code.

Maintaining AVX2 optimizations by hand is not worth the effort. The use
of AVX2 is now dependent on the compiler's auto-vectorization function.
parent da5c5c2e
......@@ -11516,15 +11516,12 @@ static MA_INLINE ma_bool32 ma_has_neon(void)
#define MA_SIMD_NONE 0
#define MA_SIMD_SSE2 1
#define MA_SIMD_AVX2 2
#define MA_SIMD_NEON 3
#ifndef MA_PREFERRED_SIMD
/* Prefer SSE2 over AVX2 if AVX2 has not bee explicitly requested. */
# if defined(MA_SUPPORT_SSE2) && (defined(MA_PREFER_SSE2) || !defined(MA_PREFER_AVX2))
# if defined(MA_SUPPORT_SSE2) && defined(MA_PREFER_SSE2)
#define MA_PREFERRED_SIMD MA_SIMD_SSE2
#elif defined(MA_SUPPORT_AVX2) && defined(MA_PREFER_AVX2)
#define MA_PREFERRED_SIMD MA_SIMD_AVX2
#elif defined(MA_SUPPORT_NEON) && defined(MA_PREFER_NEON)
#define MA_PREFERRED_SIMD MA_SIMD_NEON
#else
......@@ -42223,12 +42220,6 @@ static MA_INLINE void ma_pcm_u8_to_s16__sse2(void* dst, const void* src, ma_uint
ma_pcm_u8_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_u8_to_s16__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_pcm_u8_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_u8_to_s16__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -42241,11 +42232,7 @@ MA_API void ma_pcm_u8_to_s16(void* dst, const void* src, ma_uint64 count, ma_dit
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_u8_to_s16__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_u8_to_s16__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_u8_to_s16__sse2(dst, src, count, ditherMode);
} else
......@@ -42290,12 +42277,6 @@ static MA_INLINE void ma_pcm_u8_to_s24__sse2(void* dst, const void* src, ma_uint
ma_pcm_u8_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_u8_to_s24__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_pcm_u8_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_u8_to_s24__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -42308,11 +42289,7 @@ MA_API void ma_pcm_u8_to_s24(void* dst, const void* src, ma_uint64 count, ma_dit
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_u8_to_s24__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_u8_to_s24__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_u8_to_s24__sse2(dst, src, count, ditherMode);
} else
......@@ -42355,12 +42332,6 @@ static MA_INLINE void ma_pcm_u8_to_s32__sse2(void* dst, const void* src, ma_uint
ma_pcm_u8_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_u8_to_s32__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_pcm_u8_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_u8_to_s32__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -42373,11 +42344,7 @@ MA_API void ma_pcm_u8_to_s32(void* dst, const void* src, ma_uint64 count, ma_dit
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_u8_to_s32__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_u8_to_s32__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_u8_to_s32__sse2(dst, src, count, ditherMode);
} else
......@@ -42421,12 +42388,6 @@ static MA_INLINE void ma_pcm_u8_to_f32__sse2(void* dst, const void* src, ma_uint
ma_pcm_u8_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_u8_to_f32__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_pcm_u8_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_u8_to_f32__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -42439,11 +42400,7 @@ MA_API void ma_pcm_u8_to_f32(void* dst, const void* src, ma_uint64 count, ma_dit
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_u8_to_f32__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_u8_to_f32__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_u8_to_f32__sse2(dst, src, count, ditherMode);
} else
......@@ -42583,12 +42540,6 @@ static MA_INLINE void ma_pcm_s16_to_u8__sse2(void* dst, const void* src, ma_uint
ma_pcm_s16_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_s16_to_u8__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_pcm_s16_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_s16_to_u8__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -42601,11 +42552,7 @@ MA_API void ma_pcm_s16_to_u8(void* dst, const void* src, ma_uint64 count, ma_dit
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_s16_to_u8__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_s16_to_u8__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_s16_to_u8__sse2(dst, src, count, ditherMode);
} else
......@@ -42654,12 +42601,6 @@ static MA_INLINE void ma_pcm_s16_to_s24__sse2(void* dst, const void* src, ma_uin
ma_pcm_s16_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_s16_to_s24__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_pcm_s16_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_s16_to_s24__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -42672,11 +42613,7 @@ MA_API void ma_pcm_s16_to_s24(void* dst, const void* src, ma_uint64 count, ma_di
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_s16_to_s24__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_s16_to_s24__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_s16_to_s24__sse2(dst, src, count, ditherMode);
} else
......@@ -42716,12 +42653,6 @@ static MA_INLINE void ma_pcm_s16_to_s32__sse2(void* dst, const void* src, ma_uin
ma_pcm_s16_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_s16_to_s32__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_pcm_s16_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_s16_to_s32__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -42734,11 +42665,7 @@ MA_API void ma_pcm_s16_to_s32(void* dst, const void* src, ma_uint64 count, ma_di
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_s16_to_s32__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_s16_to_s32__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_s16_to_s32__sse2(dst, src, count, ditherMode);
} else
......@@ -42790,12 +42717,6 @@ static MA_INLINE void ma_pcm_s16_to_f32__sse2(void* dst, const void* src, ma_uin
ma_pcm_s16_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_s16_to_f32__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_pcm_s16_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_s16_to_f32__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -42808,11 +42729,7 @@ MA_API void ma_pcm_s16_to_f32(void* dst, const void* src, ma_uint64 count, ma_di
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_s16_to_f32__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_s16_to_f32__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_s16_to_f32__sse2(dst, src, count, ditherMode);
} else
......@@ -42928,12 +42845,6 @@ static MA_INLINE void ma_pcm_s24_to_u8__sse2(void* dst, const void* src, ma_uint
ma_pcm_s24_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_s24_to_u8__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_pcm_s24_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_s24_to_u8__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -42946,11 +42857,7 @@ MA_API void ma_pcm_s24_to_u8(void* dst, const void* src, ma_uint64 count, ma_dit
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_s24_to_u8__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_s24_to_u8__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_s24_to_u8__sse2(dst, src, count, ditherMode);
} else
......@@ -43008,12 +42915,6 @@ static MA_INLINE void ma_pcm_s24_to_s16__sse2(void* dst, const void* src, ma_uin
ma_pcm_s24_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_s24_to_s16__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_pcm_s24_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_s24_to_s16__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -43026,11 +42927,7 @@ MA_API void ma_pcm_s24_to_s16(void* dst, const void* src, ma_uint64 count, ma_di
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_s24_to_s16__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_s24_to_s16__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_s24_to_s16__sse2(dst, src, count, ditherMode);
} else
......@@ -43078,12 +42975,6 @@ static MA_INLINE void ma_pcm_s24_to_s32__sse2(void* dst, const void* src, ma_uin
ma_pcm_s24_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_s24_to_s32__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_pcm_s24_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_s24_to_s32__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -43096,11 +42987,7 @@ MA_API void ma_pcm_s24_to_s32(void* dst, const void* src, ma_uint64 count, ma_di
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_s24_to_s32__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_s24_to_s32__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_s24_to_s32__sse2(dst, src, count, ditherMode);
} else
......@@ -43152,12 +43039,6 @@ static MA_INLINE void ma_pcm_s24_to_f32__sse2(void* dst, const void* src, ma_uin
ma_pcm_s24_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_s24_to_f32__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_pcm_s24_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_s24_to_f32__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -43170,11 +43051,7 @@ MA_API void ma_pcm_s24_to_f32(void* dst, const void* src, ma_uint64 count, ma_di
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_s24_to_f32__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_s24_to_f32__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_s24_to_f32__sse2(dst, src, count, ditherMode);
} else
......@@ -43298,12 +43175,6 @@ static MA_INLINE void ma_pcm_s32_to_u8__sse2(void* dst, const void* src, ma_uint
ma_pcm_s32_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_s32_to_u8__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_pcm_s32_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_s32_to_u8__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -43316,11 +43187,7 @@ MA_API void ma_pcm_s32_to_u8(void* dst, const void* src, ma_uint64 count, ma_dit
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_s32_to_u8__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_s32_to_u8__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_s32_to_u8__sse2(dst, src, count, ditherMode);
} else
......@@ -43378,12 +43245,6 @@ static MA_INLINE void ma_pcm_s32_to_s16__sse2(void* dst, const void* src, ma_uin
ma_pcm_s32_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_s32_to_s16__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_pcm_s32_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_s32_to_s16__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -43396,11 +43257,7 @@ MA_API void ma_pcm_s32_to_s16(void* dst, const void* src, ma_uint64 count, ma_di
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_s32_to_s16__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_s32_to_s16__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_s32_to_s16__sse2(dst, src, count, ditherMode);
} else
......@@ -43443,12 +43300,6 @@ static MA_INLINE void ma_pcm_s32_to_s24__sse2(void* dst, const void* src, ma_uin
ma_pcm_s32_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_s32_to_s24__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_pcm_s32_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_s32_to_s24__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -43461,11 +43312,7 @@ MA_API void ma_pcm_s32_to_s24(void* dst, const void* src, ma_uint64 count, ma_di
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_s32_to_s24__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_s32_to_s24__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_s32_to_s24__sse2(dst, src, count, ditherMode);
} else
......@@ -43523,12 +43370,6 @@ static MA_INLINE void ma_pcm_s32_to_f32__sse2(void* dst, const void* src, ma_uin
ma_pcm_s32_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_s32_to_f32__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_pcm_s32_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_s32_to_f32__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -43541,11 +43382,7 @@ MA_API void ma_pcm_s32_to_f32(void* dst, const void* src, ma_uint64 count, ma_di
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_s32_to_f32__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_s32_to_f32__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_s32_to_f32__sse2(dst, src, count, ditherMode);
} else
......@@ -43656,12 +43493,6 @@ static MA_INLINE void ma_pcm_f32_to_u8__sse2(void* dst, const void* src, ma_uint
ma_pcm_f32_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_f32_to_u8__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_pcm_f32_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_f32_to_u8__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -43674,11 +43505,7 @@ MA_API void ma_pcm_f32_to_u8(void* dst, const void* src, ma_uint64 count, ma_dit
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_f32_to_u8__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_f32_to_u8__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_f32_to_u8__sse2(dst, src, count, ditherMode);
} else
......@@ -43886,129 +43713,6 @@ static MA_INLINE void ma_pcm_f32_to_s16__sse2(void* dst, const void* src, ma_uin
}
#endif /* SSE2 */
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_f32_to_s16__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_uint64 i;
ma_uint64 i16;
ma_uint64 count16;
ma_int16* dst_s16;
const float* src_f32;
float ditherMin;
float ditherMax;
/* Both the input and output buffers need to be aligned to 32 bytes. */
if ((((ma_uintptr)dst & 31) != 0) || (((ma_uintptr)src & 31) != 0)) {
ma_pcm_f32_to_s16__optimized(dst, src, count, ditherMode);
return;
}
dst_s16 = (ma_int16*)dst;
src_f32 = (const float*)src;
ditherMin = 0;
ditherMax = 0;
if (ditherMode != ma_dither_mode_none) {
ditherMin = 1.0f / -32768;
ditherMax = 1.0f / 32767;
}
i = 0;
/* AVX2. AVX2 allows us to output 16 s16's at a time which means our loop is unrolled 16 times. */
count16 = count >> 4;
for (i16 = 0; i16 < count16; i16 += 1) {
__m256 d0;
__m256 d1;
__m256 x0;
__m256 x1;
__m256i i0;
__m256i i1;
__m256i p0;
__m256i p1;
__m256i r;
if (ditherMode == ma_dither_mode_none) {
d0 = _mm256_set1_ps(0);
d1 = _mm256_set1_ps(0);
} else if (ditherMode == ma_dither_mode_rectangle) {
d0 = _mm256_set_ps(
ma_dither_f32_rectangle(ditherMin, ditherMax),
ma_dither_f32_rectangle(ditherMin, ditherMax),
ma_dither_f32_rectangle(ditherMin, ditherMax),
ma_dither_f32_rectangle(ditherMin, ditherMax),
ma_dither_f32_rectangle(ditherMin, ditherMax),
ma_dither_f32_rectangle(ditherMin, ditherMax),
ma_dither_f32_rectangle(ditherMin, ditherMax),
ma_dither_f32_rectangle(ditherMin, ditherMax)
);
d1 = _mm256_set_ps(
ma_dither_f32_rectangle(ditherMin, ditherMax),
ma_dither_f32_rectangle(ditherMin, ditherMax),
ma_dither_f32_rectangle(ditherMin, ditherMax),
ma_dither_f32_rectangle(ditherMin, ditherMax),
ma_dither_f32_rectangle(ditherMin, ditherMax),
ma_dither_f32_rectangle(ditherMin, ditherMax),
ma_dither_f32_rectangle(ditherMin, ditherMax),
ma_dither_f32_rectangle(ditherMin, ditherMax)
);
} else {
d0 = _mm256_set_ps(
ma_dither_f32_triangle(ditherMin, ditherMax),
ma_dither_f32_triangle(ditherMin, ditherMax),
ma_dither_f32_triangle(ditherMin, ditherMax),
ma_dither_f32_triangle(ditherMin, ditherMax),
ma_dither_f32_triangle(ditherMin, ditherMax),
ma_dither_f32_triangle(ditherMin, ditherMax),
ma_dither_f32_triangle(ditherMin, ditherMax),
ma_dither_f32_triangle(ditherMin, ditherMax)
);
d1 = _mm256_set_ps(
ma_dither_f32_triangle(ditherMin, ditherMax),
ma_dither_f32_triangle(ditherMin, ditherMax),
ma_dither_f32_triangle(ditherMin, ditherMax),
ma_dither_f32_triangle(ditherMin, ditherMax),
ma_dither_f32_triangle(ditherMin, ditherMax),
ma_dither_f32_triangle(ditherMin, ditherMax),
ma_dither_f32_triangle(ditherMin, ditherMax),
ma_dither_f32_triangle(ditherMin, ditherMax)
);
}
x0 = *((__m256*)(src_f32 + i) + 0);
x1 = *((__m256*)(src_f32 + i) + 1);
x0 = _mm256_add_ps(x0, d0);
x1 = _mm256_add_ps(x1, d1);
x0 = _mm256_mul_ps(x0, _mm256_set1_ps(32767.0f));
x1 = _mm256_mul_ps(x1, _mm256_set1_ps(32767.0f));
/* Computing the final result is a little more complicated for AVX2 than SSE2. */
i0 = _mm256_cvttps_epi32(x0);
i1 = _mm256_cvttps_epi32(x1);
p0 = _mm256_permute2x128_si256(i0, i1, 0 | 32);
p1 = _mm256_permute2x128_si256(i0, i1, 1 | 48);
r = _mm256_packs_epi32(p0, p1);
_mm256_stream_si256(((__m256i*)(dst_s16 + i)), r);
i += 16;
}
/* Leftover. */
for (; i < count; i += 1) {
float x = src_f32[i];
x = x + ma_dither_f32(ditherMode, ditherMin, ditherMax);
x = ((x < -1) ? -1 : ((x > 1) ? 1 : x)); /* clip */
x = x * 32767.0f; /* -1..1 to -32767..32767 */
dst_s16[i] = (ma_int16)x;
}
}
#endif /* AVX2 */
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_f32_to_s16__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -44121,11 +43825,7 @@ MA_API void ma_pcm_f32_to_s16(void* dst, const void* src, ma_uint64 count, ma_di
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_f32_to_s16__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_f32_to_s16__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_f32_to_s16__sse2(dst, src, count, ditherMode);
} else
......@@ -44182,12 +43882,6 @@ static MA_INLINE void ma_pcm_f32_to_s24__sse2(void* dst, const void* src, ma_uin
ma_pcm_f32_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_f32_to_s24__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_pcm_f32_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_f32_to_s24__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -44200,11 +43894,7 @@ MA_API void ma_pcm_f32_to_s24(void* dst, const void* src, ma_uint64 count, ma_di
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_f32_to_s24__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_f32_to_s24__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_f32_to_s24__sse2(dst, src, count, ditherMode);
} else
......@@ -44257,12 +43947,6 @@ static MA_INLINE void ma_pcm_f32_to_s32__sse2(void* dst, const void* src, ma_uin
ma_pcm_f32_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_AVX2)
static MA_INLINE void ma_pcm_f32_to_s32__avx2(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
ma_pcm_f32_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MA_SUPPORT_NEON)
static MA_INLINE void ma_pcm_f32_to_s32__neon(void* dst, const void* src, ma_uint64 count, ma_dither_mode ditherMode)
{
......@@ -44275,11 +43959,7 @@ MA_API void ma_pcm_f32_to_s32(void* dst, const void* src, ma_uint64 count, ma_di
#ifdef MA_USE_REFERENCE_CONVERSION_APIS
ma_pcm_f32_to_s32__reference(dst, src, count, ditherMode);
#else
# if MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
ma_pcm_f32_to_s32__avx2(dst, src, count, ditherMode);
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
# if MA_PREFERRED_SIMD == MA_SIMD_SSE2 && defined(MA_SUPPORT_SSE2)
if (ma_has_sse2()) {
ma_pcm_f32_to_s32__sse2(dst, src, count, ditherMode);
} else
......@@ -48148,7 +47828,6 @@ static /*__attribute__((noinline))*/ ma_result ma_gainer_process_pcm_frames_inte
iFrame = unrolledLoopCount << 1;
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
#endif
{
/*
......@@ -48223,7 +47902,6 @@ static /*__attribute__((noinline))*/ ma_result ma_gainer_process_pcm_frames_inte
iFrame = unrolledLoopCount << 1;
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
#endif
{
for (; iFrame < interpolatedFrameCount; iFrame += 1) {
......@@ -48254,16 +47932,6 @@ static /*__attribute__((noinline))*/ ma_result ma_gainer_process_pcm_frames_inte
runningGain1 = _mm_add_ps(runningGain1, runningGainDelta1);
}
} else
#elif MA_PREFERRED_SIMD == MA_SIMD_AVX2 && defined(MA_SUPPORT_AVX2)
if (ma_has_avx2()) {
__m256 runningGain0 = _mm256_loadu_ps(&pRunningGain[0]);
__m256 runningGainDelta0 = _mm256_loadu_ps(&pRunningGainDelta[0]);
for (; iFrame < interpolatedFrameCount; iFrame += 1) {
_mm256_storeu_ps(&pFramesOutF32[iFrame*8 + 0], _mm256_mul_ps(_mm256_loadu_ps(&pFramesInF32[iFrame*8 + 0]), runningGain0));
runningGain0 = _mm256_add_ps(runningGain0, runningGainDelta0);
}
} else
#endif
{
/* This is crafted so that it auto-vectorizes when compiled with Clang. */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment