Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
M
miniaudio
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
MyCard
miniaudio
Commits
ef2ad300
Commit
ef2ad300
authored
May 27, 2018
by
David Reid
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add AVX optimized f32 -> s16 conversion.
parent
b2815ccf
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
664 additions
and
15 deletions
+664
-15
mini_al.h
mini_al.h
+454
-8
tests/mal_profiling.c
tests/mal_profiling.c
+210
-7
No files found.
mini_al.h
View file @
ef2ad300
...
...
@@ -17288,6 +17288,24 @@ void mal_pcm_u8_to_s16__sse2(void* dst, const void* src, mal_uint64 count, mal_d
mal_pcm_u8_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_u8_to_s16__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void mal_pcm_u8_to_s16__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_u8_to_s16__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
void mal_pcm_u8_to_s16(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
...
...
@@ -17328,18 +17346,32 @@ void mal_pcm_u8_to_s24__sse2(void* dst, const void* src, mal_uint64 count, mal_d
mal_pcm_u8_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_u8_to_s24__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void mal_pcm_u8_to_s24__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_u8_to_s24__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
void mal_pcm_u8_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_u8_to_s24__reference(dst, src, count, ditherMode);
#else
#if defined(MAL_SUPPORT_SSE2)
mal_pcm_u8_to_s24__sse2(dst, src, count, ditherMode);
#else
mal_pcm_u8_to_s24__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17370,6 +17402,24 @@ void mal_pcm_u8_to_s32__sse2(void* dst, const void* src, mal_uint64 count, mal_d
mal_pcm_u8_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_u8_to_s32__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void mal_pcm_u8_to_s32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_u8_to_s32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
void mal_pcm_u8_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
...
...
@@ -17409,6 +17459,24 @@ void mal_pcm_u8_to_f32__sse2(void* dst, const void* src, mal_uint64 count, mal_d
mal_pcm_u8_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_u8_to_f32__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_SSE2)
void mal_pcm_u8_to_f32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_u8_to_f32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
void mal_pcm_u8_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
...
...
@@ -17543,6 +17611,24 @@ void mal_pcm_s16_to_u8__sse2(void* dst, const void* src, mal_uint64 count, mal_d
mal_pcm_s16_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_s16_to_u8__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void mal_pcm_s16_to_u8__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_s16_to_u8__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
void mal_pcm_s16_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
...
...
@@ -17587,6 +17673,24 @@ void mal_pcm_s16_to_s24__sse2(void* dst, const void* src, mal_uint64 count, mal_
mal_pcm_s16_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_s16_to_s24__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void mal_pcm_s16_to_s24__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_s16_to_s24__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
void mal_pcm_s16_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
...
...
@@ -17622,6 +17726,24 @@ void mal_pcm_s16_to_s32__sse2(void* dst, const void* src, mal_uint64 count, mal_
mal_pcm_s16_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_s16_to_s32__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void mal_pcm_s16_to_s32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_s16_to_s32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
void mal_pcm_s16_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
...
...
@@ -17669,6 +17791,24 @@ void mal_pcm_s16_to_f32__sse2(void* dst, const void* src, mal_uint64 count, mal_
mal_pcm_s16_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_s16_to_f32__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void mal_pcm_s16_to_f32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_s16_to_f32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
void mal_pcm_s16_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
...
...
@@ -17781,6 +17921,24 @@ void mal_pcm_s24_to_u8__sse2(void* dst, const void* src, mal_uint64 count, mal_d
mal_pcm_s24_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_s24_to_u8__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void mal_pcm_s24_to_u8__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_s24_to_u8__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
void mal_pcm_s24_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
...
...
@@ -17834,6 +17992,24 @@ void mal_pcm_s24_to_s16__sse2(void* dst, const void* src, mal_uint64 count, mal_
mal_pcm_s24_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_s24_to_s16__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void mal_pcm_s24_to_s16__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_s24_to_s16__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
void mal_pcm_s24_to_s16(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
...
...
@@ -17877,6 +18053,24 @@ void mal_pcm_s24_to_s32__sse2(void* dst, const void* src, mal_uint64 count, mal_
mal_pcm_s24_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_s24_to_s32__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void mal_pcm_s24_to_s32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_s24_to_s32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
void mal_pcm_s24_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
...
...
@@ -17924,6 +18118,24 @@ void mal_pcm_s24_to_f32__sse2(void* dst, const void* src, mal_uint64 count, mal_
mal_pcm_s24_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_s24_to_f32__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void mal_pcm_s24_to_f32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_s24_to_f32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
void mal_pcm_s24_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
...
...
@@ -18043,6 +18255,24 @@ void mal_pcm_s32_to_u8__sse2(void* dst, const void* src, mal_uint64 count, mal_d
mal_pcm_s32_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_s32_to_u8__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void mal_pcm_s32_to_u8__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_s32_to_u8__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
void mal_pcm_s32_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
...
...
@@ -18096,6 +18326,24 @@ void mal_pcm_s32_to_s16__sse2(void* dst, const void* src, mal_uint64 count, mal_
mal_pcm_s32_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_s32_to_s16__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void mal_pcm_s32_to_s16__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_s32_to_s16__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
void mal_pcm_s32_to_s16(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
...
...
@@ -18134,6 +18382,24 @@ void mal_pcm_s32_to_s24__sse2(void* dst, const void* src, mal_uint64 count, mal_
mal_pcm_s32_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_s32_to_s24__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void mal_pcm_s32_to_s24__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_s32_to_s24__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
void mal_pcm_s32_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
...
...
@@ -18187,6 +18453,24 @@ void mal_pcm_s32_to_f32__sse2(void* dst, const void* src, mal_uint64 count, mal_
mal_pcm_s32_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_s32_to_f32__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void mal_pcm_s32_to_f32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_s32_to_f32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_f32__optimized(dst, src, count, ditherMode);
}
#endif
void mal_pcm_s32_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
...
...
@@ -18292,6 +18576,24 @@ void mal_pcm_f32_to_u8__sse2(void* dst, const void* src, mal_uint64 count, mal_d
mal_pcm_f32_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_f32_to_u8__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_f32_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void mal_pcm_f32_to_u8__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_f32_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_f32_to_u8__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_f32_to_u8__optimized(dst, src, count, ditherMode);
}
#endif
void mal_pcm_f32_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
...
...
@@ -18399,7 +18701,6 @@ void mal_pcm_f32_to_s16__optimized(void* dst, const void* src, mal_uint64 count,
#if defined(MAL_SUPPORT_SSE2)
void mal_pcm_f32_to_s16__sse2(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
#if 1
mal_int16* dst_s16 = (mal_int16*)dst;
const float* src_f32 = (const float*)src;
...
...
@@ -18457,7 +18758,7 @@ void mal_pcm_f32_to_s16__sse2(void* dst, const void* src, mal_uint64 count, mal_
x0 = _mm_mul_ps(x0, _mm_set1_ps(32767.0f));
x1 = _mm_mul_ps(x1, _mm_set1_ps(32767.0f));
*((__m128i*)(dst_s16 + i)) = _mm_packs_epi32(_mm_cvt
ps_epi32(x0), _mm_cv
tps_epi32(x1));
*((__m128i*)(dst_s16 + i)) = _mm_packs_epi32(_mm_cvt
tps_epi32(x0), _mm_cvt
tps_epi32(x1));
i += 8;
}
...
...
@@ -18472,9 +18773,118 @@ void mal_pcm_f32_to_s16__sse2(void* dst, const void* src, mal_uint64 count, mal_
dst_s16[i] = (mal_int16)x;
}
#else
mal_pcm_f32_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_f32_to_s16__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_int16* dst_s16 = (mal_int16*)dst;
const float* src_f32 = (const float*)src;
float ditherMin = 0;
float ditherMax = 0;
if (ditherMode != mal_dither_mode_none) {
ditherMin = 1.0f / -32768;
ditherMax = 1.0f / 32767;
}
mal_uint64 i = 0;
// AVX. AVX allows us to output 16 s16's at a time which means our loop is unrolled 16 times.
mal_uint64 count16 = count >> 4;
for (mal_uint64 i16 = 0; i16 < count16; i16 += 1) {
__m256 d0;
__m256 d1;
if (ditherMode == mal_dither_mode_none) {
d0 = _mm256_set1_ps(0);
d1 = _mm256_set1_ps(0);
} else if (ditherMode == mal_dither_mode_rectangle) {
d0 = _mm256_set_ps(
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax)
);
d1 = _mm256_set_ps(
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax)
);
} else {
d0 = _mm256_set_ps(
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax)
);
d1 = _mm256_set_ps(
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax)
);
}
__m256 x0 = *((__m256*)(src_f32 + i) + 0);
__m256 x1 = *((__m256*)(src_f32 + i) + 1);
x0 = _mm256_add_ps(x0, d0);
x1 = _mm256_add_ps(x1, d1);
x0 = _mm256_mul_ps(x0, _mm256_set1_ps(32767.0f));
x1 = _mm256_mul_ps(x1, _mm256_set1_ps(32767.0f));
// Computing the final result is a little more complicated for AVX than SSE.
__m256i i0 = _mm256_cvttps_epi32(x0);
__m256i i1 = _mm256_cvttps_epi32(x1);
__m256i p0 = _mm256_permute2x128_si256(i0, i1, 32);
__m256i p1 = _mm256_permute2x128_si256(i0, i1, 49);
__m256i r = _mm256_packs_epi32(p0, p1);
*((__m256i*)(dst_s16 + i)) = r;
i += 16;
}
// Leftover.
for (; i < count; i += 1) {
float x = src_f32[i];
x = x + mal_dither_f32(ditherMode, ditherMin, ditherMax);
x = ((x < -1) ? -1 : ((x > 1) ? 1 : x)); // clip
x = x * 32767.0f; // -1..1 to -32767..32767
dst_s16[i] = (mal_int16)x;
}
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void mal_pcm_f32_to_s16__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
// TODO: Convert this from AVX to AVX-512.
mal_pcm_f32_to_s16__avx(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_f32_to_s16__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_f32_to_s16__optimized(dst, src, count, ditherMode);
}
#endif
...
...
@@ -18528,6 +18938,24 @@ void mal_pcm_f32_to_s24__sse2(void* dst, const void* src, mal_uint64 count, mal_
mal_pcm_f32_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_f32_to_s24__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_f32_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void mal_pcm_f32_to_s24__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_f32_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_f32_to_s24__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_f32_to_s24__optimized(dst, src, count, ditherMode);
}
#endif
void mal_pcm_f32_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
...
...
@@ -18576,6 +19004,24 @@ void mal_pcm_f32_to_s32__sse2(void* dst, const void* src, mal_uint64 count, mal_
mal_pcm_f32_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX)
void mal_pcm_f32_to_s32__avx(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_f32_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void mal_pcm_f32_to_s32__avx512(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_f32_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void mal_pcm_f32_to_s32__neon(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_f32_to_s32__optimized(dst, src, count, ditherMode);
}
#endif
void mal_pcm_f32_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
...
...
tests/mal_profiling.c
View file @
ef2ad300
...
...
@@ -269,21 +269,210 @@ void pcm_convert__sse2(void* pOut, mal_format formatOut, const void* pIn, mal_fo
#if defined(MAL_SUPPORT_AVX)
void
pcm_convert__avx
(
void
*
pOut
,
mal_format
formatOut
,
const
void
*
pIn
,
mal_format
formatIn
,
mal_uint64
sampleCount
,
mal_dither_mode
ditherMode
)
{
pcm_convert__sse2
(
pOut
,
formatOut
,
pIn
,
formatIn
,
sampleCount
,
ditherMode
);
switch
(
formatIn
)
{
case
mal_format_u8
:
{
switch
(
formatOut
)
{
case
mal_format_s16
:
mal_pcm_u8_to_s16__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_u8_to_s24__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_u8_to_s32__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_u8_to_f32__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s16
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s16_to_u8__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_s16_to_s24__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_s16_to_s32__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s16_to_f32__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s24
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s24_to_u8__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_s24_to_s16__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_s24_to_s32__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s24_to_f32__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s32
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s32_to_u8__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_s32_to_s16__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_s32_to_s24__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s32_to_f32__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_f32
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_f32_to_u8__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_f32_to_s16__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_f32_to_s24__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_f32_to_s32__avx
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
default:
break
;
}
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void
pcm_convert__avx512
(
void
*
pOut
,
mal_format
formatOut
,
const
void
*
pIn
,
mal_format
formatIn
,
mal_uint64
sampleCount
,
mal_dither_mode
ditherMode
)
{
pcm_convert__avx
(
pOut
,
formatOut
,
pIn
,
formatIn
,
sampleCount
,
ditherMode
);
switch
(
formatIn
)
{
case
mal_format_u8
:
{
switch
(
formatOut
)
{
case
mal_format_s16
:
mal_pcm_u8_to_s16__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_u8_to_s24__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_u8_to_s32__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_u8_to_f32__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s16
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s16_to_u8__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_s16_to_s24__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_s16_to_s32__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s16_to_f32__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s24
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s24_to_u8__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_s24_to_s16__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_s24_to_s32__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s24_to_f32__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s32
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s32_to_u8__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_s32_to_s16__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_s32_to_s24__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s32_to_f32__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_f32
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_f32_to_u8__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_f32_to_s16__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_f32_to_s24__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_f32_to_s32__avx512
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
default:
break
;
}
}
#endif
#if defined(MAL_SUPPORT_NEON)
void
pcm_convert__neon
(
void
*
pOut
,
mal_format
formatOut
,
const
void
*
pIn
,
mal_format
formatIn
,
mal_uint64
sampleCount
,
mal_dither_mode
ditherMode
)
{
pcm_convert__reference
(
pOut
,
formatOut
,
pIn
,
formatIn
,
sampleCount
,
ditherMode
);
switch
(
formatIn
)
{
case
mal_format_u8
:
{
switch
(
formatOut
)
{
case
mal_format_s16
:
mal_pcm_u8_to_s16__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_u8_to_s24__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_u8_to_s32__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_u8_to_f32__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s16
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s16_to_u8__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_s16_to_s24__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_s16_to_s32__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s16_to_f32__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s24
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s24_to_u8__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_s24_to_s16__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_s24_to_s32__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s24_to_f32__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s32
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s32_to_u8__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_s32_to_s16__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_s32_to_s24__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s32_to_f32__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_f32
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_f32_to_u8__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_f32_to_s16__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_f32_to_s24__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_f32_to_s32__neon
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
default:
break
;
}
}
#endif
...
...
@@ -359,7 +548,7 @@ int do_profiling__format_conversion__profile_individual(mal_format formatIn, mal
{
mal_int16
a
=
((
const
mal_int16
*
)
pReferenceData
)[
iSample
];
mal_int16
b
=
((
const
mal_int16
*
)
pTestData
)[
iSample
];
if
(
abs
(
a
-
b
)
>
1
)
{
if
(
abs
(
a
-
b
)
>
0
)
{
printf
(
"Incorrect Sample: (%d) %d != %d
\n
"
,
(
int
)
iSample
,
a
,
b
);
passed
=
MAL_FALSE
;
}
...
...
@@ -900,9 +1089,19 @@ int do_profiling__src()
// Converts two 4xf32 vectors to one 8xi16 vector with signed saturation.
static
inline
__m128i
drmath_vf32_to_vi16__sse2
(
__m128
f32_0
,
__m128
f32_1
)
__m128i
drmath_vf32_to_vi16__sse2
(
__m128
f32_0
,
__m128
f32_1
)
{
return
_mm_packs_epi32
(
_mm_cvtps_epi32
(
f32_0
),
_mm_cvtps_epi32
(
f32_1
));
return
_mm_packs_epi32
(
_mm_cvttps_epi32
(
f32_0
),
_mm_cvttps_epi32
(
f32_1
));
}
__m256i
drmath_vf32_to_vi16__avx
(
__m256
f32_0
,
__m256
f32_1
)
{
__m256i
i0
=
_mm256_cvttps_epi32
(
f32_0
);
__m256i
i1
=
_mm256_cvttps_epi32
(
f32_1
);
__m256i
p0
=
_mm256_permute2x128_si256
(
i0
,
i1
,
32
);
__m256i
p1
=
_mm256_permute2x128_si256
(
i0
,
i1
,
49
);
__m256i
r
=
_mm256_packs_epi32
(
p0
,
p1
);
return
r
;
}
int
main
(
int
argc
,
char
**
argv
)
...
...
@@ -916,7 +1115,11 @@ int main(int argc, char** argv)
//__m128 f1 = _mm_set_ps(-32780, 6, 5, 4);
//__m128i r = drmath_vf32_to_vi16__sse2(f0, f1);
//int a = 5;
__m256
f0
=
_mm256_set_ps
(
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
);
__m256
f1
=
_mm256_set_ps
(
15
,
14
,
13
,
12
,
11
,
10
,
9
,
8
);
__m256i
r
=
drmath_vf32_to_vi16__avx
(
f0
,
f1
);
int
a
=
5
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment