Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
M
miniaudio
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
MyCard
miniaudio
Commits
b2815ccf
Commit
b2815ccf
authored
May 27, 2018
by
David Reid
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add SSE2 optimized implementation of f32 -> s16 conversion.
parent
073e89e4
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
651 additions
and
140 deletions
+651
-140
mini_al.h
mini_al.h
+188
-124
tests/mal_profiling.c
tests/mal_profiling.c
+448
-3
tests/mal_test_0.vcxproj
tests/mal_test_0.vcxproj
+15
-13
No files found.
mini_al.h
View file @
b2815ccf
...
...
@@ -3306,16 +3306,25 @@ static MAL_INLINE mal_int32 mal_rand_range_s32(mal_int32 lo, mal_int32 hi)
}
static MAL_INLINE float mal_dither_f32_rectangle(float ditherMin, float ditherMax)
{
return mal_rand_range_f32(ditherMin, ditherMax);
}
static MAL_INLINE float mal_dither_f32_triangle(float ditherMin, float ditherMax)
{
float a = mal_rand_range_f32(ditherMin, 0);
float b = mal_rand_range_f32(0, ditherMax);
return a + b;
}
static MAL_INLINE float mal_dither_f32(mal_dither_mode ditherMode, float ditherMin, float ditherMax)
{
if (ditherMode == mal_dither_mode_rectangle) {
float a = mal_rand_range_f32(ditherMin, ditherMax);
return a;
return mal_dither_f32_rectangle(ditherMin, ditherMax);
}
if (ditherMode == mal_dither_mode_triangle) {
float a = mal_rand_range_f32(ditherMin, 0);
float b = mal_rand_range_f32(0, ditherMax);
return a + b;
return mal_dither_f32_triangle(ditherMin, ditherMax);
}
return 0;
...
...
@@ -17273,8 +17282,8 @@ void mal_pcm_u8_to_s16__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_u8_to_s16__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_u8_to_s16__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_u8_to_s16__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_s16__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17284,13 +17293,9 @@ void mal_pcm_u8_to_s16(void* dst, const void* src, mal_uint64 count, mal_dither_
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_u8_to_s16__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_u8_to_s16__sse(dst, src, count, ditherMode);
#else
mal_pcm_u8_to_s16__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17317,8 +17322,8 @@ void mal_pcm_u8_to_s24__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_u8_to_s24__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_u8_to_s24__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_u8_to_s24__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_s24__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17329,8 +17334,8 @@ void mal_pcm_u8_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither_
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_u8_to_s24__reference(dst, src, count, ditherMode);
#else
#if
def MAL_USE_SSE
mal_pcm_u8_to_s24__sse(dst, src, count, ditherMode);
#if
defined(MAL_SUPPORT_SSE2)
mal_pcm_u8_to_s24__sse
2
(dst, src, count, ditherMode);
#else
mal_pcm_u8_to_s24__optimized(dst, src, count, ditherMode);
#endif
...
...
@@ -17359,8 +17364,8 @@ void mal_pcm_u8_to_s32__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_u8_to_s32__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_u8_to_s32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_u8_to_s32__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_s32__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17370,13 +17375,9 @@ void mal_pcm_u8_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither_
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_u8_to_s32__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_u8_to_s32__sse(dst, src, count, ditherMode);
#else
mal_pcm_u8_to_s32__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17402,8 +17403,8 @@ void mal_pcm_u8_to_f32__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_u8_to_f32__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_u8_to_f32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_u8_to_f32__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_f32__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17413,13 +17414,9 @@ void mal_pcm_u8_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither_
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_u8_to_f32__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_u8_to_f32__sse(dst, src, count, ditherMode);
#else
mal_pcm_u8_to_f32__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17540,8 +17537,8 @@ void mal_pcm_s16_to_u8__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s16_to_u8__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s16_to_u8__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s16_to_u8__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_u8__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17551,13 +17548,9 @@ void mal_pcm_s16_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s16_to_u8__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s16_to_u8__sse(dst, src, count, ditherMode);
#else
mal_pcm_s16_to_u8__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17588,8 +17581,8 @@ void mal_pcm_s16_to_s24__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s16_to_s24__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s16_to_s24__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s16_to_s24__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_s24__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17599,13 +17592,9 @@ void mal_pcm_s16_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s16_to_s24__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s16_to_s24__sse(dst, src, count, ditherMode);
#else
mal_pcm_s16_to_s24__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17627,8 +17616,8 @@ void mal_pcm_s16_to_s32__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s16_to_s32__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s16_to_s32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s16_to_s32__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_s32__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17638,13 +17627,9 @@ void mal_pcm_s16_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s16_to_s32__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s16_to_s32__sse(dst, src, count, ditherMode);
#else
mal_pcm_s16_to_s32__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17678,8 +17663,8 @@ void mal_pcm_s16_to_f32__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s16_to_f32__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s16_to_f32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s16_to_f32__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_f32__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17689,13 +17674,9 @@ void mal_pcm_s16_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s16_to_f32__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s16_to_f32__sse(dst, src, count, ditherMode);
#else
mal_pcm_s16_to_f32__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17794,8 +17775,8 @@ void mal_pcm_s24_to_u8__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s24_to_u8__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s24_to_u8__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s24_to_u8__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_u8__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17805,13 +17786,9 @@ void mal_pcm_s24_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s24_to_u8__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s24_to_u8__sse(dst, src, count, ditherMode);
#else
mal_pcm_s24_to_u8__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17851,8 +17828,8 @@ void mal_pcm_s24_to_s16__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s24_to_s16__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s24_to_s16__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s24_to_s16__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_s16__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17862,13 +17839,9 @@ void mal_pcm_s24_to_s16(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s24_to_s16__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s24_to_s16__sse(dst, src, count, ditherMode);
#else
mal_pcm_s24_to_s16__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17898,8 +17871,8 @@ void mal_pcm_s24_to_s32__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s24_to_s32__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s24_to_s32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s24_to_s32__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_s32__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17909,13 +17882,9 @@ void mal_pcm_s24_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s24_to_s32__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s24_to_s32__sse(dst, src, count, ditherMode);
#else
mal_pcm_s24_to_s32__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17949,8 +17918,8 @@ void mal_pcm_s24_to_f32__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s24_to_f32__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s24_to_f32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s24_to_f32__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_f32__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17960,13 +17929,9 @@ void mal_pcm_s24_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s24_to_f32__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s24_to_f32__sse(dst, src, count, ditherMode);
#else
mal_pcm_s24_to_f32__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -18072,8 +18037,8 @@ void mal_pcm_s32_to_u8__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s32_to_u8__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s32_to_u8__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s32_to_u8__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_u8__optimized(dst, src, count, ditherMode);
}
...
...
@@ -18083,13 +18048,9 @@ void mal_pcm_s32_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s32_to_u8__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s32_to_u8__sse(dst, src, count, ditherMode);
#else
mal_pcm_s32_to_u8__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -18129,8 +18090,8 @@ void mal_pcm_s32_to_s16__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s32_to_s16__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s32_to_s16__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s32_to_s16__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_s16__optimized(dst, src, count, ditherMode);
}
...
...
@@ -18140,13 +18101,9 @@ void mal_pcm_s32_to_s16(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s32_to_s16__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s32_to_s16__sse(dst, src, count, ditherMode);
#else
mal_pcm_s32_to_s16__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -18171,8 +18128,8 @@ void mal_pcm_s32_to_s24__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s32_to_s24__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s32_to_s24__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s32_to_s24__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_s24__optimized(dst, src, count, ditherMode);
}
...
...
@@ -18182,13 +18139,9 @@ void mal_pcm_s32_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s32_to_s24__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s32_to_s24__sse(dst, src, count, ditherMode);
#else
mal_pcm_s32_to_s24__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -18228,8 +18181,8 @@ void mal_pcm_s32_to_f32__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s32_to_f32__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s32_to_f32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s32_to_f32__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_f32__optimized(dst, src, count, ditherMode);
}
...
...
@@ -18239,13 +18192,9 @@ void mal_pcm_s32_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s32_to_f32__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s32_to_f32__sse(dst, src, count, ditherMode);
#else
mal_pcm_s32_to_f32__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -18337,8 +18286,8 @@ void mal_pcm_f32_to_u8__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_f32_to_u8__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_f32_to_u8__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_f32_to_u8__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_f32_to_u8__optimized(dst, src, count, ditherMode);
}
...
...
@@ -18348,13 +18297,9 @@ void mal_pcm_f32_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_f32_to_u8__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_f32_to_u8__sse(dst, src, count, ditherMode);
#else
mal_pcm_f32_to_u8__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -18392,13 +18337,144 @@ void mal_pcm_f32_to_s16__reference(void* dst, const void* src, mal_uint64 count,
void mal_pcm_f32_to_s16__optimized(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_f32_to_s16__reference(dst, src, count, ditherMode);
mal_int16* dst_s16 = (mal_int16*)dst;
const float* src_f32 = (const float*)src;
float ditherMin = 0;
float ditherMax = 0;
if (ditherMode != mal_dither_mode_none) {
ditherMin = 1.0f / -32768;
ditherMax = 1.0f / 32767;
}
mal_uint64 i = 0;
// Unrolled.
mal_uint64 count4 = count >> 2;
for (mal_uint64 i4 = 0; i4 < count4; i4 += 1) {
float d0 = mal_dither_f32(ditherMode, ditherMin, ditherMax);
float d1 = mal_dither_f32(ditherMode, ditherMin, ditherMax);
float d2 = mal_dither_f32(ditherMode, ditherMin, ditherMax);
float d3 = mal_dither_f32(ditherMode, ditherMin, ditherMax);
float x0 = src_f32[i+0];
float x1 = src_f32[i+1];
float x2 = src_f32[i+2];
float x3 = src_f32[i+3];
x0 = x0 + d0;
x1 = x1 + d1;
x2 = x2 + d2;
x3 = x3 + d3;
x0 = ((x0 < -1) ? -1 : ((x0 > 1) ? 1 : x0));
x1 = ((x1 < -1) ? -1 : ((x1 > 1) ? 1 : x1));
x2 = ((x2 < -1) ? -1 : ((x2 > 1) ? 1 : x2));
x3 = ((x3 < -1) ? -1 : ((x3 > 1) ? 1 : x3));
x0 = x0 * 32767.0f;
x1 = x1 * 32767.0f;
x2 = x2 * 32767.0f;
x3 = x3 * 32767.0f;
dst_s16[i+0] = (mal_int16)x0;
dst_s16[i+1] = (mal_int16)x1;
dst_s16[i+2] = (mal_int16)x2;
dst_s16[i+3] = (mal_int16)x3;
i += 4;
}
// Leftover.
for (; i < count; i += 1) {
float x = src_f32[i];
x = x + mal_dither_f32(ditherMode, ditherMin, ditherMax);
x = ((x < -1) ? -1 : ((x > 1) ? 1 : x)); // clip
x = x * 32767.0f; // -1..1 to -32767..32767
dst_s16[i] = (mal_int16)x;
}
}
#if
def MAL_USE_SSE
void mal_pcm_f32_to_s16__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_f32_to_s16__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
#if 1
mal_int16* dst_s16 = (mal_int16*)dst;
const float* src_f32 = (const float*)src;
float ditherMin = 0;
float ditherMax = 0;
if (ditherMode != mal_dither_mode_none) {
ditherMin = 1.0f / -32768;
ditherMax = 1.0f / 32767;
}
mal_uint64 i = 0;
// SSE2. SSE allows us to output 8 s16's at a time which means our loop is unrolled 8 times.
mal_uint64 count8 = count >> 3;
for (mal_uint64 i8 = 0; i8 < count8; i8 += 1) {
__m128 d0;
__m128 d1;
if (ditherMode == mal_dither_mode_none) {
d0 = _mm_set1_ps(0);
d1 = _mm_set1_ps(0);
} else if (ditherMode == mal_dither_mode_rectangle) {
d0 = _mm_set_ps(
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax)
);
d1 = _mm_set_ps(
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax)
);
} else {
d0 = _mm_set_ps(
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax)
);
d1 = _mm_set_ps(
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax)
);
}
__m128 x0 = *((__m128*)(src_f32 + i) + 0);
__m128 x1 = *((__m128*)(src_f32 + i) + 1);
x0 = _mm_add_ps(x0, d0);
x1 = _mm_add_ps(x1, d1);
x0 = _mm_mul_ps(x0, _mm_set1_ps(32767.0f));
x1 = _mm_mul_ps(x1, _mm_set1_ps(32767.0f));
*((__m128i*)(dst_s16 + i)) = _mm_packs_epi32(_mm_cvtps_epi32(x0), _mm_cvtps_epi32(x1));
i += 8;
}
// Leftover.
for (; i < count; i += 1) {
float x = src_f32[i];
x = x + mal_dither_f32(ditherMode, ditherMin, ditherMax);
x = ((x < -1) ? -1 : ((x > 1) ? 1 : x)); // clip
x = x * 32767.0f; // -1..1 to -32767..32767
dst_s16[i] = (mal_int16)x;
}
#else
mal_pcm_f32_to_s16__optimized(dst, src, count, ditherMode);
#endif
}
#endif
...
...
@@ -18406,13 +18482,9 @@ void mal_pcm_f32_to_s16(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_f32_to_s16__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_f32_to_s16__sse(dst, src, count, ditherMode);
#else
mal_pcm_f32_to_s16__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -18450,8 +18522,8 @@ void mal_pcm_f32_to_s24__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_f32_to_s24__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_f32_to_s24__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_f32_to_s24__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_f32_to_s24__optimized(dst, src, count, ditherMode);
}
...
...
@@ -18461,13 +18533,9 @@ void mal_pcm_f32_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_f32_to_s24__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_f32_to_s24__sse(dst, src, count, ditherMode);
#else
mal_pcm_f32_to_s24__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -18502,8 +18570,8 @@ void mal_pcm_f32_to_s32__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_f32_to_s32__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_f32_to_s32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_f32_to_s32__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_f32_to_s32__optimized(dst, src, count, ditherMode);
}
...
...
@@ -18513,13 +18581,9 @@ void mal_pcm_f32_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_f32_to_s32__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_f32_to_s32__sse(dst, src, count, ditherMode);
#else
mal_pcm_f32_to_s32__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
tests/mal_profiling.c
View file @
b2815ccf
...
...
@@ -34,6 +34,429 @@ const char* mal_src_algorithm_to_string(mal_src_algorithm algorithm)
return
"Unknown"
;
}
const
char
*
mal_dither_mode_to_string
(
mal_dither_mode
ditherMode
)
{
switch
(
ditherMode
)
{
case
mal_dither_mode_none
:
return
"None"
;
case
mal_dither_mode_rectangle
:
return
"Rectangle"
;
case
mal_dither_mode_triangle
:
return
"Triangle"
;
}
return
"Unkown"
;
}
///////////////////////////////////////////////////////////////////////////////
//
// Format Conversion
//
///////////////////////////////////////////////////////////////////////////////
typedef
struct
{
void
*
pBaseData
;
mal_uint64
sampleCount
;
mal_uint64
iNextSample
;
}
format_conversion_data
;
void
pcm_convert__reference
(
void
*
pOut
,
mal_format
formatOut
,
const
void
*
pIn
,
mal_format
formatIn
,
mal_uint64
sampleCount
,
mal_dither_mode
ditherMode
)
{
switch
(
formatIn
)
{
case
mal_format_u8
:
{
switch
(
formatOut
)
{
case
mal_format_s16
:
mal_pcm_u8_to_s16__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_u8_to_s24__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_u8_to_s32__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_u8_to_f32__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s16
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s16_to_u8__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_s16_to_s24__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_s16_to_s32__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s16_to_f32__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s24
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s24_to_u8__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_s24_to_s16__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_s24_to_s32__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s24_to_f32__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s32
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s32_to_u8__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_s32_to_s16__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_s32_to_s24__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s32_to_f32__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_f32
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_f32_to_u8__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_f32_to_s16__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_f32_to_s24__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_f32_to_s32__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
default:
break
;
}
}
void
pcm_convert__optimized
(
void
*
pOut
,
mal_format
formatOut
,
const
void
*
pIn
,
mal_format
formatIn
,
mal_uint64
sampleCount
,
mal_dither_mode
ditherMode
)
{
switch
(
formatIn
)
{
case
mal_format_u8
:
{
switch
(
formatOut
)
{
case
mal_format_s16
:
mal_pcm_u8_to_s16__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_u8_to_s24__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_u8_to_s32__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_u8_to_f32__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s16
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s16_to_u8__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_s16_to_s24__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_s16_to_s32__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s16_to_f32__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s24
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s24_to_u8__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_s24_to_s16__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_s24_to_s32__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s24_to_f32__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s32
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s32_to_u8__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_s32_to_s16__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_s32_to_s24__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s32_to_f32__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_f32
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_f32_to_u8__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_f32_to_s16__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_f32_to_s24__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_f32_to_s32__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
default:
break
;
}
}
#if defined(MAL_SUPPORT_SSE2)
void
pcm_convert__sse2
(
void
*
pOut
,
mal_format
formatOut
,
const
void
*
pIn
,
mal_format
formatIn
,
mal_uint64
sampleCount
,
mal_dither_mode
ditherMode
)
{
switch
(
formatIn
)
{
case
mal_format_u8
:
{
switch
(
formatOut
)
{
case
mal_format_s16
:
mal_pcm_u8_to_s16__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_u8_to_s24__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_u8_to_s32__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_u8_to_f32__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s16
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s16_to_u8__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_s16_to_s24__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_s16_to_s32__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s16_to_f32__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s24
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s24_to_u8__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_s24_to_s16__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_s24_to_s32__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s24_to_f32__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s32
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s32_to_u8__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_s32_to_s16__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_s32_to_s24__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s32_to_f32__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_f32
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_f32_to_u8__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_f32_to_s16__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_f32_to_s24__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_f32_to_s32__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
default:
break
;
}
}
#endif
#if defined(MAL_SUPPORT_AVX)
void
pcm_convert__avx
(
void
*
pOut
,
mal_format
formatOut
,
const
void
*
pIn
,
mal_format
formatIn
,
mal_uint64
sampleCount
,
mal_dither_mode
ditherMode
)
{
pcm_convert__sse2
(
pOut
,
formatOut
,
pIn
,
formatIn
,
sampleCount
,
ditherMode
);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void
pcm_convert__avx512
(
void
*
pOut
,
mal_format
formatOut
,
const
void
*
pIn
,
mal_format
formatIn
,
mal_uint64
sampleCount
,
mal_dither_mode
ditherMode
)
{
pcm_convert__avx
(
pOut
,
formatOut
,
pIn
,
formatIn
,
sampleCount
,
ditherMode
);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void
pcm_convert__neon
(
void
*
pOut
,
mal_format
formatOut
,
const
void
*
pIn
,
mal_format
formatIn
,
mal_uint64
sampleCount
,
mal_dither_mode
ditherMode
)
{
pcm_convert__reference
(
pOut
,
formatOut
,
pIn
,
formatIn
,
sampleCount
,
ditherMode
);
}
#endif
void
pcm_convert
(
void
*
pOut
,
mal_format
formatOut
,
const
void
*
pIn
,
mal_format
formatIn
,
mal_uint64
sampleCount
,
mal_dither_mode
ditherMode
,
simd_mode
mode
)
{
// For testing, we always reset the seed for dithering so we can get consistent results for comparisons.
mal_seed
(
1234
);
switch
(
mode
)
{
case
simd_mode_scalar
:
{
pcm_convert__optimized
(
pOut
,
formatOut
,
pIn
,
formatIn
,
sampleCount
,
ditherMode
);
}
break
;
#if defined(MAL_SUPPORT_SSE2)
case
simd_mode_sse2
:
{
pcm_convert__sse2
(
pOut
,
formatOut
,
pIn
,
formatIn
,
sampleCount
,
ditherMode
);
}
break
;
#endif
#if defined(MAL_SUPPORT_AVX)
case
simd_mode_avx
:
{
pcm_convert__avx
(
pOut
,
formatOut
,
pIn
,
formatIn
,
sampleCount
,
ditherMode
);
}
break
;
#endif
#if defined(MAL_SUPPORT_AVX512)
case
simd_mode_avx512
:
{
pcm_convert__avx512
(
pOut
,
formatOut
,
pIn
,
formatIn
,
sampleCount
,
ditherMode
);
}
break
;
#endif
#if defined(MAL_SUPPORT_NEON)
case
simd_mode_neon
:
{
pcm_convert__neon
(
pOut
,
formatOut
,
pIn
,
formatIn
,
sampleCount
,
ditherMode
);
}
break
;
#endif
}
}
int
do_profiling__format_conversion__profile_individual
(
mal_format
formatIn
,
mal_format
formatOut
,
mal_dither_mode
ditherMode
,
const
void
*
pBaseData
,
mal_uint64
sampleCount
,
simd_mode
mode
,
const
void
*
pReferenceData
,
double
referenceTime
)
{
void
*
pTestData
=
mal_aligned_malloc
((
size_t
)(
sampleCount
*
mal_get_bytes_per_sample
(
formatOut
)),
MAL_SIMD_ALIGNMENT
);
if
(
pTestData
==
NULL
)
{
printf
(
"Out of memory.
\n
"
);
return
-
1
;
}
mal_timer
timer
;
mal_timer_init
(
&
timer
);
double
timeTaken
=
mal_timer_get_time_in_seconds
(
&
timer
);
{
pcm_convert
(
pTestData
,
formatOut
,
pBaseData
,
formatIn
,
sampleCount
,
ditherMode
,
mode
);
}
timeTaken
=
mal_timer_get_time_in_seconds
(
&
timer
)
-
timeTaken
;
// Compare with the reference for correctness.
mal_bool32
passed
=
MAL_TRUE
;
for
(
mal_uint64
iSample
=
0
;
iSample
<
sampleCount
;
++
iSample
)
{
mal_uint32
bps
=
mal_get_bytes_per_sample
(
formatOut
);
// We need to compare on a format by format basis because we allow for very slight deviations in results depending on the output format.
switch
(
formatOut
)
{
case
mal_format_s16
:
{
mal_int16
a
=
((
const
mal_int16
*
)
pReferenceData
)[
iSample
];
mal_int16
b
=
((
const
mal_int16
*
)
pTestData
)[
iSample
];
if
(
abs
(
a
-
b
)
>
1
)
{
printf
(
"Incorrect Sample: (%d) %d != %d
\n
"
,
(
int
)
iSample
,
a
,
b
);
passed
=
MAL_FALSE
;
}
}
break
;
default:
{
if
(
memcmp
(
mal_offset_ptr
(
pReferenceData
,
iSample
*
bps
),
mal_offset_ptr
(
pTestData
,
iSample
*
bps
),
bps
)
!=
0
)
{
printf
(
"Incorrect Sample: (%d)
\n
"
,
(
int
)
iSample
);
passed
=
MAL_FALSE
;
}
}
break
;
}
}
if
(
passed
)
{
printf
(
" [PASSED] "
);
}
else
{
printf
(
" [FAILED] "
);
}
printf
(
"(Dither = %s) %s -> %s (%s): %.4fms (%.2f%%)
\n
"
,
mal_dither_mode_to_string
(
ditherMode
),
mal_get_format_name
(
formatIn
),
mal_get_format_name
(
formatOut
),
simd_mode_to_string
(
mode
),
timeTaken
*
1000
,
referenceTime
/
timeTaken
*
100
);
mal_aligned_free
(
pTestData
);
return
0
;
}
int
do_profiling__format_conversion__profile_set
(
mal_format
formatIn
,
mal_format
formatOut
,
mal_dither_mode
ditherMode
)
{
// Generate our base data to begin with. This is generated from an f32 sine wave which is converted to formatIn. That then becomes our base data.
mal_uint32
sampleCount
=
1000000
;
float
*
pSourceData
=
(
float
*
)
mal_aligned_malloc
(
sampleCount
*
sizeof
(
*
pSourceData
),
MAL_SIMD_ALIGNMENT
);
if
(
pSourceData
==
NULL
)
{
printf
(
"Out of memory.
\n
"
);
return
-
1
;
}
mal_sine_wave
sineWave
;
mal_sine_wave_init
(
1
.
0
,
400
,
48000
,
&
sineWave
);
mal_sine_wave_read
(
&
sineWave
,
sampleCount
,
pSourceData
);
void
*
pBaseData
=
mal_aligned_malloc
(
sampleCount
*
mal_get_bytes_per_sample
(
formatIn
),
MAL_SIMD_ALIGNMENT
);
mal_pcm_convert
(
pBaseData
,
formatIn
,
pSourceData
,
mal_format_f32
,
sampleCount
,
mal_dither_mode_none
);
// Reference first so we can get a benchmark.
void
*
pReferenceData
=
mal_aligned_malloc
(
sampleCount
*
mal_get_bytes_per_sample
(
formatOut
),
MAL_SIMD_ALIGNMENT
);
mal_timer
timer
;
mal_timer_init
(
&
timer
);
double
referenceTime
=
mal_timer_get_time_in_seconds
(
&
timer
);
{
pcm_convert__reference
(
pReferenceData
,
formatOut
,
pBaseData
,
formatIn
,
sampleCount
,
ditherMode
);
}
referenceTime
=
mal_timer_get_time_in_seconds
(
&
timer
)
-
referenceTime
;
// Here is where each optimized implementation is profiled.
do_profiling__format_conversion__profile_individual
(
formatIn
,
formatOut
,
ditherMode
,
pBaseData
,
sampleCount
,
simd_mode_scalar
,
pReferenceData
,
referenceTime
);
if
(
mal_has_sse2
())
{
do_profiling__format_conversion__profile_individual
(
formatIn
,
formatOut
,
ditherMode
,
pBaseData
,
sampleCount
,
simd_mode_sse2
,
pReferenceData
,
referenceTime
);
}
if
(
mal_has_avx
())
{
do_profiling__format_conversion__profile_individual
(
formatIn
,
formatOut
,
ditherMode
,
pBaseData
,
sampleCount
,
simd_mode_avx
,
pReferenceData
,
referenceTime
);
}
if
(
mal_has_avx512f
())
{
do_profiling__format_conversion__profile_individual
(
formatIn
,
formatOut
,
ditherMode
,
pBaseData
,
sampleCount
,
simd_mode_avx512
,
pReferenceData
,
referenceTime
);
}
if
(
mal_has_neon
())
{
do_profiling__format_conversion__profile_individual
(
formatIn
,
formatOut
,
ditherMode
,
pBaseData
,
sampleCount
,
simd_mode_neon
,
pReferenceData
,
referenceTime
);
}
mal_aligned_free
(
pReferenceData
);
mal_aligned_free
(
pBaseData
);
mal_aligned_free
(
pSourceData
);
return
0
;
}
int
do_profiling__format_conversion
()
{
// First we need to generate our base data.
do_profiling__format_conversion__profile_set
(
mal_format_f32
,
mal_format_s16
,
mal_dither_mode_none
);
return
0
;
}
///////////////////////////////////////////////////////////////////////////////
//
// Channel Routing
//
///////////////////////////////////////////////////////////////////////////////
float
g_ChannelRouterProfilingOutputBenchmark
[
8
][
48000
];
float
g_ChannelRouterProfilingOutput
[
8
][
48000
];
...
...
@@ -416,6 +839,7 @@ int do_profiling__src__profile_set(src_data* pBaseData, mal_uint32 sampleRateIn,
// Now that we have the reference data to compare against we can go ahead and measure the SIMD optimizations.
do_profiling__src__profile_individual
(
pBaseData
,
sampleRateIn
,
sampleRateOut
,
algorithm
,
simd_mode_scalar
,
&
referenceData
);
if
(
mal_has_sse2
())
{
do_profiling__src__profile_individual
(
pBaseData
,
sampleRateIn
,
sampleRateOut
,
algorithm
,
simd_mode_sse2
,
&
referenceData
);
}
...
...
@@ -446,7 +870,7 @@ int do_profiling__src()
src_data
baseData
;
mal_zero_object
(
&
baseData
);
baseData
.
channels
=
8
;
baseData
.
frameCount
=
10000
;
baseData
.
frameCount
=
10000
0
;
for
(
mal_uint32
iChannel
=
0
;
iChannel
<
baseData
.
channels
;
++
iChannel
)
{
baseData
.
pFrameData
[
iChannel
]
=
(
float
*
)
mal_aligned_malloc
((
size_t
)(
baseData
.
frameCount
*
sizeof
(
float
)),
MAL_SIMD_ALIGNMENT
);
if
(
baseData
.
pFrameData
[
iChannel
]
==
NULL
)
{
...
...
@@ -475,16 +899,33 @@ int do_profiling__src()
}
// Converts two 4xf32 vectors to one 8xi16 vector with signed saturation.
static
inline
__m128i
drmath_vf32_to_vi16__sse2
(
__m128
f32_0
,
__m128
f32_1
)
{
return
_mm_packs_epi32
(
_mm_cvtps_epi32
(
f32_0
),
_mm_cvtps_epi32
(
f32_1
));
}
int
main
(
int
argc
,
char
**
argv
)
{
(
void
)
argc
;
(
void
)
argv
;
{
//__m128 f0 = _mm_set_ps(32780, 2, 1, 0);
//__m128 f1 = _mm_set_ps(-32780, 6, 5, 4);
//__m128i r = drmath_vf32_to_vi16__sse2(f0, f1);
//int a = 5;
}
// Summary.
if
(
mal_has_sse2
())
{
printf
(
"Has SSE
:
YES
\n
"
);
printf
(
"Has SSE
2:
YES
\n
"
);
}
else
{
printf
(
"Has SSE
:
NO
\n
"
);
printf
(
"Has SSE
2:
NO
\n
"
);
}
if
(
mal_has_avx
())
{
printf
(
"Has AVX: YES
\n
"
);
...
...
@@ -505,6 +946,10 @@ int main(int argc, char** argv)
printf
(
"
\n
"
);
// Format conversion.
do_profiling__format_conversion
();
printf
(
"
\n\n
"
);
// Channel routing.
do_profiling__channel_routing
();
printf
(
"
\n\n
"
);
...
...
tests/mal_test_0.vcxproj
View file @
b2815ccf
...
...
@@ -141,7 +141,7 @@
<AdditionalIncludeDirectories>
%(AdditionalIncludeDirectories)
</AdditionalIncludeDirectories>
<RuntimeLibrary>
MultiThreadedDebug
</RuntimeLibrary>
<CompileAs>
Default
</CompileAs>
<EnableEnhancedInstructionSet>
No
Extensions
</EnableEnhancedInstructionSet>
<EnableEnhancedInstructionSet>
No
tSet
</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<SubSystem>
Console
</SubSystem>
...
...
@@ -162,7 +162,7 @@
<AdditionalIncludeDirectories>
%(AdditionalIncludeDirectories)
</AdditionalIncludeDirectories>
<RuntimeLibrary>
MultiThreadedDebug
</RuntimeLibrary>
<CompileAs>
Default
</CompileAs>
<EnableEnhancedInstructionSet>
No
Extensions
</EnableEnhancedInstructionSet>
<EnableEnhancedInstructionSet>
No
tSet
</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<SubSystem>
Console
</SubSystem>
...
...
@@ -183,6 +183,7 @@
<AdditionalIncludeDirectories>
%(AdditionalIncludeDirectories)
</AdditionalIncludeDirectories>
<RuntimeLibrary>
MultiThreadedDebug
</RuntimeLibrary>
<CompileAs>
Default
</CompileAs>
<EnableEnhancedInstructionSet>
NotSet
</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<SubSystem>
Console
</SubSystem>
...
...
@@ -202,7 +203,7 @@
<SDLCheck>
true
</SDLCheck>
<AdditionalIncludeDirectories>
%(AdditionalIncludeDirectories)
</AdditionalIncludeDirectories>
<CompileAs>
Default
</CompileAs>
<EnableEnhancedInstructionSet>
No
Extensions
</EnableEnhancedInstructionSet>
<EnableEnhancedInstructionSet>
No
tSet
</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<SubSystem>
Console
</SubSystem>
...
...
@@ -226,7 +227,7 @@
<SDLCheck>
true
</SDLCheck>
<AdditionalIncludeDirectories>
%(AdditionalIncludeDirectories)
</AdditionalIncludeDirectories>
<CompileAs>
Default
</CompileAs>
<EnableEnhancedInstructionSet>
No
Extensions
</EnableEnhancedInstructionSet>
<EnableEnhancedInstructionSet>
No
tSet
</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<SubSystem>
Console
</SubSystem>
...
...
@@ -250,6 +251,7 @@
<SDLCheck>
true
</SDLCheck>
<AdditionalIncludeDirectories>
%(AdditionalIncludeDirectories)
</AdditionalIncludeDirectories>
<CompileAs>
Default
</CompileAs>
<EnableEnhancedInstructionSet>
NotSet
</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<SubSystem>
Console
</SubSystem>
...
...
@@ -269,21 +271,21 @@
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|x64'"
>
true
</ExcludedFromBuild>
</ClCompile>
<ClCompile
Include=
"mal_profiling.c"
>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|Win32'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|Win32'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|ARM'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|ARM'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|x64'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|x64'"
>
true
</ExcludedFromBuild>
</ClCompile>
<ClCompile
Include=
"mal_test_0.c"
>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|Win32'"
>
false
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|ARM'"
>
false
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|Win32'"
>
false
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|ARM'"
>
false
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|ARM'"
>
false
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|x64'"
>
false
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|x64'"
>
false
</ExcludedFromBuild>
</ClCompile>
<ClCompile
Include=
"mal_test_0.c"
>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|Win32'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|ARM'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|Win32'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|ARM'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|x64'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|x64'"
>
true
</ExcludedFromBuild>
</ClCompile>
<ClCompile
Include=
"mal_test_0.cpp"
>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|Win32'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|ARM'"
>
true
</ExcludedFromBuild>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment