Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
M
miniaudio
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
MyCard
miniaudio
Commits
b2815ccf
"...svn:/svn.code.sf.net/p/irrlicht/code/trunk@643" did not exist on "c00ce1d372a86762ca0fad9d3c6a342aa3feda89"
Commit
b2815ccf
authored
May 27, 2018
by
David Reid
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add SSE2 optimized implementation of f32 -> s16 conversion.
parent
073e89e4
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
651 additions
and
140 deletions
+651
-140
mini_al.h
mini_al.h
+188
-124
tests/mal_profiling.c
tests/mal_profiling.c
+448
-3
tests/mal_test_0.vcxproj
tests/mal_test_0.vcxproj
+15
-13
No files found.
mini_al.h
View file @
b2815ccf
...
...
@@ -3306,16 +3306,25 @@ static MAL_INLINE mal_int32 mal_rand_range_s32(mal_int32 lo, mal_int32 hi)
}
static MAL_INLINE float mal_dither_f32_rectangle(float ditherMin, float ditherMax)
{
return mal_rand_range_f32(ditherMin, ditherMax);
}
static MAL_INLINE float mal_dither_f32_triangle(float ditherMin, float ditherMax)
{
float a = mal_rand_range_f32(ditherMin, 0);
float b = mal_rand_range_f32(0, ditherMax);
return a + b;
}
static MAL_INLINE float mal_dither_f32(mal_dither_mode ditherMode, float ditherMin, float ditherMax)
{
if (ditherMode == mal_dither_mode_rectangle) {
float a = mal_rand_range_f32(ditherMin, ditherMax);
return a;
return mal_dither_f32_rectangle(ditherMin, ditherMax);
}
if (ditherMode == mal_dither_mode_triangle) {
float a = mal_rand_range_f32(ditherMin, 0);
float b = mal_rand_range_f32(0, ditherMax);
return a + b;
return mal_dither_f32_triangle(ditherMin, ditherMax);
}
return 0;
...
...
@@ -17273,8 +17282,8 @@ void mal_pcm_u8_to_s16__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_u8_to_s16__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_u8_to_s16__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_u8_to_s16__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_s16__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17284,13 +17293,9 @@ void mal_pcm_u8_to_s16(void* dst, const void* src, mal_uint64 count, mal_dither_
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_u8_to_s16__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_u8_to_s16__sse(dst, src, count, ditherMode);
#else
mal_pcm_u8_to_s16__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17317,8 +17322,8 @@ void mal_pcm_u8_to_s24__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_u8_to_s24__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_u8_to_s24__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_u8_to_s24__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_s24__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17329,8 +17334,8 @@ void mal_pcm_u8_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither_
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_u8_to_s24__reference(dst, src, count, ditherMode);
#else
#if
def MAL_USE_SSE
mal_pcm_u8_to_s24__sse(dst, src, count, ditherMode);
#if
defined(MAL_SUPPORT_SSE2)
mal_pcm_u8_to_s24__sse
2
(dst, src, count, ditherMode);
#else
mal_pcm_u8_to_s24__optimized(dst, src, count, ditherMode);
#endif
...
...
@@ -17359,8 +17364,8 @@ void mal_pcm_u8_to_s32__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_u8_to_s32__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_u8_to_s32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_u8_to_s32__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_s32__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17370,13 +17375,9 @@ void mal_pcm_u8_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither_
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_u8_to_s32__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_u8_to_s32__sse(dst, src, count, ditherMode);
#else
mal_pcm_u8_to_s32__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17402,8 +17403,8 @@ void mal_pcm_u8_to_f32__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_u8_to_f32__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_u8_to_f32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_u8_to_f32__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_u8_to_f32__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17413,13 +17414,9 @@ void mal_pcm_u8_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither_
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_u8_to_f32__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_u8_to_f32__sse(dst, src, count, ditherMode);
#else
mal_pcm_u8_to_f32__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17540,8 +17537,8 @@ void mal_pcm_s16_to_u8__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s16_to_u8__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s16_to_u8__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s16_to_u8__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_u8__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17551,13 +17548,9 @@ void mal_pcm_s16_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s16_to_u8__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s16_to_u8__sse(dst, src, count, ditherMode);
#else
mal_pcm_s16_to_u8__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17588,8 +17581,8 @@ void mal_pcm_s16_to_s24__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s16_to_s24__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s16_to_s24__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s16_to_s24__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_s24__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17599,13 +17592,9 @@ void mal_pcm_s16_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s16_to_s24__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s16_to_s24__sse(dst, src, count, ditherMode);
#else
mal_pcm_s16_to_s24__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17627,8 +17616,8 @@ void mal_pcm_s16_to_s32__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s16_to_s32__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s16_to_s32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s16_to_s32__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_s32__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17638,13 +17627,9 @@ void mal_pcm_s16_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s16_to_s32__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s16_to_s32__sse(dst, src, count, ditherMode);
#else
mal_pcm_s16_to_s32__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17678,8 +17663,8 @@ void mal_pcm_s16_to_f32__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s16_to_f32__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s16_to_f32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s16_to_f32__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s16_to_f32__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17689,13 +17674,9 @@ void mal_pcm_s16_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s16_to_f32__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s16_to_f32__sse(dst, src, count, ditherMode);
#else
mal_pcm_s16_to_f32__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17794,8 +17775,8 @@ void mal_pcm_s24_to_u8__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s24_to_u8__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s24_to_u8__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s24_to_u8__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_u8__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17805,13 +17786,9 @@ void mal_pcm_s24_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s24_to_u8__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s24_to_u8__sse(dst, src, count, ditherMode);
#else
mal_pcm_s24_to_u8__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17851,8 +17828,8 @@ void mal_pcm_s24_to_s16__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s24_to_s16__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s24_to_s16__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s24_to_s16__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_s16__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17862,13 +17839,9 @@ void mal_pcm_s24_to_s16(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s24_to_s16__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s24_to_s16__sse(dst, src, count, ditherMode);
#else
mal_pcm_s24_to_s16__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17898,8 +17871,8 @@ void mal_pcm_s24_to_s32__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s24_to_s32__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s24_to_s32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s24_to_s32__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_s32__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17909,13 +17882,9 @@ void mal_pcm_s24_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s24_to_s32__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s24_to_s32__sse(dst, src, count, ditherMode);
#else
mal_pcm_s24_to_s32__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -17949,8 +17918,8 @@ void mal_pcm_s24_to_f32__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s24_to_f32__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s24_to_f32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s24_to_f32__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s24_to_f32__optimized(dst, src, count, ditherMode);
}
...
...
@@ -17960,13 +17929,9 @@ void mal_pcm_s24_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s24_to_f32__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s24_to_f32__sse(dst, src, count, ditherMode);
#else
mal_pcm_s24_to_f32__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -18072,8 +18037,8 @@ void mal_pcm_s32_to_u8__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s32_to_u8__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s32_to_u8__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s32_to_u8__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_u8__optimized(dst, src, count, ditherMode);
}
...
...
@@ -18083,13 +18048,9 @@ void mal_pcm_s32_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s32_to_u8__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s32_to_u8__sse(dst, src, count, ditherMode);
#else
mal_pcm_s32_to_u8__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -18129,8 +18090,8 @@ void mal_pcm_s32_to_s16__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s32_to_s16__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s32_to_s16__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s32_to_s16__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_s16__optimized(dst, src, count, ditherMode);
}
...
...
@@ -18140,13 +18101,9 @@ void mal_pcm_s32_to_s16(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s32_to_s16__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s32_to_s16__sse(dst, src, count, ditherMode);
#else
mal_pcm_s32_to_s16__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -18171,8 +18128,8 @@ void mal_pcm_s32_to_s24__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s32_to_s24__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s32_to_s24__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s32_to_s24__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_s24__optimized(dst, src, count, ditherMode);
}
...
...
@@ -18182,13 +18139,9 @@ void mal_pcm_s32_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s32_to_s24__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s32_to_s24__sse(dst, src, count, ditherMode);
#else
mal_pcm_s32_to_s24__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -18228,8 +18181,8 @@ void mal_pcm_s32_to_f32__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_s32_to_f32__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_s32_to_f32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_s32_to_f32__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_s32_to_f32__optimized(dst, src, count, ditherMode);
}
...
...
@@ -18239,13 +18192,9 @@ void mal_pcm_s32_to_f32(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_s32_to_f32__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_s32_to_f32__sse(dst, src, count, ditherMode);
#else
mal_pcm_s32_to_f32__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -18337,8 +18286,8 @@ void mal_pcm_f32_to_u8__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_f32_to_u8__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_f32_to_u8__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_f32_to_u8__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_f32_to_u8__optimized(dst, src, count, ditherMode);
}
...
...
@@ -18348,13 +18297,9 @@ void mal_pcm_f32_to_u8(void* dst, const void* src, mal_uint64 count, mal_dither_
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_f32_to_u8__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_f32_to_u8__sse(dst, src, count, ditherMode);
#else
mal_pcm_f32_to_u8__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -18392,13 +18337,144 @@ void mal_pcm_f32_to_s16__reference(void* dst, const void* src, mal_uint64 count,
void mal_pcm_f32_to_s16__optimized(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_f32_to_s16__reference(dst, src, count, ditherMode);
mal_int16* dst_s16 = (mal_int16*)dst;
const float* src_f32 = (const float*)src;
float ditherMin = 0;
float ditherMax = 0;
if (ditherMode != mal_dither_mode_none) {
ditherMin = 1.0f / -32768;
ditherMax = 1.0f / 32767;
}
mal_uint64 i = 0;
// Unrolled.
mal_uint64 count4 = count >> 2;
for (mal_uint64 i4 = 0; i4 < count4; i4 += 1) {
float d0 = mal_dither_f32(ditherMode, ditherMin, ditherMax);
float d1 = mal_dither_f32(ditherMode, ditherMin, ditherMax);
float d2 = mal_dither_f32(ditherMode, ditherMin, ditherMax);
float d3 = mal_dither_f32(ditherMode, ditherMin, ditherMax);
float x0 = src_f32[i+0];
float x1 = src_f32[i+1];
float x2 = src_f32[i+2];
float x3 = src_f32[i+3];
x0 = x0 + d0;
x1 = x1 + d1;
x2 = x2 + d2;
x3 = x3 + d3;
x0 = ((x0 < -1) ? -1 : ((x0 > 1) ? 1 : x0));
x1 = ((x1 < -1) ? -1 : ((x1 > 1) ? 1 : x1));
x2 = ((x2 < -1) ? -1 : ((x2 > 1) ? 1 : x2));
x3 = ((x3 < -1) ? -1 : ((x3 > 1) ? 1 : x3));
x0 = x0 * 32767.0f;
x1 = x1 * 32767.0f;
x2 = x2 * 32767.0f;
x3 = x3 * 32767.0f;
dst_s16[i+0] = (mal_int16)x0;
dst_s16[i+1] = (mal_int16)x1;
dst_s16[i+2] = (mal_int16)x2;
dst_s16[i+3] = (mal_int16)x3;
i += 4;
}
// Leftover.
for (; i < count; i += 1) {
float x = src_f32[i];
x = x + mal_dither_f32(ditherMode, ditherMin, ditherMax);
x = ((x < -1) ? -1 : ((x > 1) ? 1 : x)); // clip
x = x * 32767.0f; // -1..1 to -32767..32767
dst_s16[i] = (mal_int16)x;
}
}
#if
def MAL_USE_SSE
void mal_pcm_f32_to_s16__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_f32_to_s16__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
#if 1
mal_int16* dst_s16 = (mal_int16*)dst;
const float* src_f32 = (const float*)src;
float ditherMin = 0;
float ditherMax = 0;
if (ditherMode != mal_dither_mode_none) {
ditherMin = 1.0f / -32768;
ditherMax = 1.0f / 32767;
}
mal_uint64 i = 0;
// SSE2. SSE allows us to output 8 s16's at a time which means our loop is unrolled 8 times.
mal_uint64 count8 = count >> 3;
for (mal_uint64 i8 = 0; i8 < count8; i8 += 1) {
__m128 d0;
__m128 d1;
if (ditherMode == mal_dither_mode_none) {
d0 = _mm_set1_ps(0);
d1 = _mm_set1_ps(0);
} else if (ditherMode == mal_dither_mode_rectangle) {
d0 = _mm_set_ps(
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax)
);
d1 = _mm_set_ps(
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax),
mal_dither_f32_rectangle(ditherMin, ditherMax)
);
} else {
d0 = _mm_set_ps(
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax)
);
d1 = _mm_set_ps(
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax),
mal_dither_f32_triangle(ditherMin, ditherMax)
);
}
__m128 x0 = *((__m128*)(src_f32 + i) + 0);
__m128 x1 = *((__m128*)(src_f32 + i) + 1);
x0 = _mm_add_ps(x0, d0);
x1 = _mm_add_ps(x1, d1);
x0 = _mm_mul_ps(x0, _mm_set1_ps(32767.0f));
x1 = _mm_mul_ps(x1, _mm_set1_ps(32767.0f));
*((__m128i*)(dst_s16 + i)) = _mm_packs_epi32(_mm_cvtps_epi32(x0), _mm_cvtps_epi32(x1));
i += 8;
}
// Leftover.
for (; i < count; i += 1) {
float x = src_f32[i];
x = x + mal_dither_f32(ditherMode, ditherMin, ditherMax);
x = ((x < -1) ? -1 : ((x > 1) ? 1 : x)); // clip
x = x * 32767.0f; // -1..1 to -32767..32767
dst_s16[i] = (mal_int16)x;
}
#else
mal_pcm_f32_to_s16__optimized(dst, src, count, ditherMode);
#endif
}
#endif
...
...
@@ -18406,13 +18482,9 @@ void mal_pcm_f32_to_s16(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_f32_to_s16__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_f32_to_s16__sse(dst, src, count, ditherMode);
#else
mal_pcm_f32_to_s16__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -18450,8 +18522,8 @@ void mal_pcm_f32_to_s24__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_f32_to_s24__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_f32_to_s24__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_f32_to_s24__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_f32_to_s24__optimized(dst, src, count, ditherMode);
}
...
...
@@ -18461,13 +18533,9 @@ void mal_pcm_f32_to_s24(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_f32_to_s24__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_f32_to_s24__sse(dst, src, count, ditherMode);
#else
mal_pcm_f32_to_s24__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
@@ -18502,8 +18570,8 @@ void mal_pcm_f32_to_s32__optimized(void* dst, const void* src, mal_uint64 count,
mal_pcm_f32_to_s32__reference(dst, src, count, ditherMode);
}
#if
def MAL_USE_SSE
void mal_pcm_f32_to_s32__sse(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
#if
defined(MAL_SUPPORT_SSE2)
void mal_pcm_f32_to_s32__sse
2
(void* dst, const void* src, mal_uint64 count, mal_dither_mode ditherMode)
{
mal_pcm_f32_to_s32__optimized(dst, src, count, ditherMode);
}
...
...
@@ -18513,13 +18581,9 @@ void mal_pcm_f32_to_s32(void* dst, const void* src, mal_uint64 count, mal_dither
{
#ifdef MAL_USE_REFERENCE_CONVERSION_APIS
mal_pcm_f32_to_s32__reference(dst, src, count, ditherMode);
#else
#ifdef MAL_USE_SSE
mal_pcm_f32_to_s32__sse(dst, src, count, ditherMode);
#else
mal_pcm_f32_to_s32__optimized(dst, src, count, ditherMode);
#endif
#endif
}
...
...
tests/mal_profiling.c
View file @
b2815ccf
...
...
@@ -34,6 +34,429 @@ const char* mal_src_algorithm_to_string(mal_src_algorithm algorithm)
return
"Unknown"
;
}
const
char
*
mal_dither_mode_to_string
(
mal_dither_mode
ditherMode
)
{
switch
(
ditherMode
)
{
case
mal_dither_mode_none
:
return
"None"
;
case
mal_dither_mode_rectangle
:
return
"Rectangle"
;
case
mal_dither_mode_triangle
:
return
"Triangle"
;
}
return
"Unkown"
;
}
///////////////////////////////////////////////////////////////////////////////
//
// Format Conversion
//
///////////////////////////////////////////////////////////////////////////////
typedef
struct
{
void
*
pBaseData
;
mal_uint64
sampleCount
;
mal_uint64
iNextSample
;
}
format_conversion_data
;
void
pcm_convert__reference
(
void
*
pOut
,
mal_format
formatOut
,
const
void
*
pIn
,
mal_format
formatIn
,
mal_uint64
sampleCount
,
mal_dither_mode
ditherMode
)
{
switch
(
formatIn
)
{
case
mal_format_u8
:
{
switch
(
formatOut
)
{
case
mal_format_s16
:
mal_pcm_u8_to_s16__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_u8_to_s24__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_u8_to_s32__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_u8_to_f32__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s16
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s16_to_u8__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_s16_to_s24__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_s16_to_s32__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s16_to_f32__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s24
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s24_to_u8__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_s24_to_s16__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_s24_to_s32__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s24_to_f32__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s32
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s32_to_u8__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_s32_to_s16__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_s32_to_s24__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s32_to_f32__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_f32
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_f32_to_u8__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_f32_to_s16__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_f32_to_s24__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_f32_to_s32__reference
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
default:
break
;
}
}
void
pcm_convert__optimized
(
void
*
pOut
,
mal_format
formatOut
,
const
void
*
pIn
,
mal_format
formatIn
,
mal_uint64
sampleCount
,
mal_dither_mode
ditherMode
)
{
switch
(
formatIn
)
{
case
mal_format_u8
:
{
switch
(
formatOut
)
{
case
mal_format_s16
:
mal_pcm_u8_to_s16__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_u8_to_s24__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_u8_to_s32__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_u8_to_f32__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s16
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s16_to_u8__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_s16_to_s24__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_s16_to_s32__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s16_to_f32__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s24
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s24_to_u8__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_s24_to_s16__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_s24_to_s32__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s24_to_f32__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s32
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s32_to_u8__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_s32_to_s16__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_s32_to_s24__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s32_to_f32__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_f32
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_f32_to_u8__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_f32_to_s16__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_f32_to_s24__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_f32_to_s32__optimized
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
default:
break
;
}
}
#if defined(MAL_SUPPORT_SSE2)
void
pcm_convert__sse2
(
void
*
pOut
,
mal_format
formatOut
,
const
void
*
pIn
,
mal_format
formatIn
,
mal_uint64
sampleCount
,
mal_dither_mode
ditherMode
)
{
switch
(
formatIn
)
{
case
mal_format_u8
:
{
switch
(
formatOut
)
{
case
mal_format_s16
:
mal_pcm_u8_to_s16__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_u8_to_s24__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_u8_to_s32__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_u8_to_f32__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s16
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s16_to_u8__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_s16_to_s24__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_s16_to_s32__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s16_to_f32__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s24
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s24_to_u8__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_s24_to_s16__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_s24_to_s32__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s24_to_f32__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_s32
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_s32_to_u8__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_s32_to_s16__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_s32_to_s24__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_f32
:
mal_pcm_s32_to_f32__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
case
mal_format_f32
:
{
switch
(
formatOut
)
{
case
mal_format_u8
:
mal_pcm_f32_to_u8__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s16
:
mal_pcm_f32_to_s16__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s24
:
mal_pcm_f32_to_s24__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
case
mal_format_s32
:
mal_pcm_f32_to_s32__sse2
(
pOut
,
pIn
,
sampleCount
,
ditherMode
);
return
;
default:
break
;
}
}
break
;
default:
break
;
}
}
#endif
#if defined(MAL_SUPPORT_AVX)
void
pcm_convert__avx
(
void
*
pOut
,
mal_format
formatOut
,
const
void
*
pIn
,
mal_format
formatIn
,
mal_uint64
sampleCount
,
mal_dither_mode
ditherMode
)
{
pcm_convert__sse2
(
pOut
,
formatOut
,
pIn
,
formatIn
,
sampleCount
,
ditherMode
);
}
#endif
#if defined(MAL_SUPPORT_AVX512)
void
pcm_convert__avx512
(
void
*
pOut
,
mal_format
formatOut
,
const
void
*
pIn
,
mal_format
formatIn
,
mal_uint64
sampleCount
,
mal_dither_mode
ditherMode
)
{
pcm_convert__avx
(
pOut
,
formatOut
,
pIn
,
formatIn
,
sampleCount
,
ditherMode
);
}
#endif
#if defined(MAL_SUPPORT_NEON)
void
pcm_convert__neon
(
void
*
pOut
,
mal_format
formatOut
,
const
void
*
pIn
,
mal_format
formatIn
,
mal_uint64
sampleCount
,
mal_dither_mode
ditherMode
)
{
pcm_convert__reference
(
pOut
,
formatOut
,
pIn
,
formatIn
,
sampleCount
,
ditherMode
);
}
#endif
void
pcm_convert
(
void
*
pOut
,
mal_format
formatOut
,
const
void
*
pIn
,
mal_format
formatIn
,
mal_uint64
sampleCount
,
mal_dither_mode
ditherMode
,
simd_mode
mode
)
{
// For testing, we always reset the seed for dithering so we can get consistent results for comparisons.
mal_seed
(
1234
);
switch
(
mode
)
{
case
simd_mode_scalar
:
{
pcm_convert__optimized
(
pOut
,
formatOut
,
pIn
,
formatIn
,
sampleCount
,
ditherMode
);
}
break
;
#if defined(MAL_SUPPORT_SSE2)
case
simd_mode_sse2
:
{
pcm_convert__sse2
(
pOut
,
formatOut
,
pIn
,
formatIn
,
sampleCount
,
ditherMode
);
}
break
;
#endif
#if defined(MAL_SUPPORT_AVX)
case
simd_mode_avx
:
{
pcm_convert__avx
(
pOut
,
formatOut
,
pIn
,
formatIn
,
sampleCount
,
ditherMode
);
}
break
;
#endif
#if defined(MAL_SUPPORT_AVX512)
case
simd_mode_avx512
:
{
pcm_convert__avx512
(
pOut
,
formatOut
,
pIn
,
formatIn
,
sampleCount
,
ditherMode
);
}
break
;
#endif
#if defined(MAL_SUPPORT_NEON)
case
simd_mode_neon
:
{
pcm_convert__neon
(
pOut
,
formatOut
,
pIn
,
formatIn
,
sampleCount
,
ditherMode
);
}
break
;
#endif
}
}
int
do_profiling__format_conversion__profile_individual
(
mal_format
formatIn
,
mal_format
formatOut
,
mal_dither_mode
ditherMode
,
const
void
*
pBaseData
,
mal_uint64
sampleCount
,
simd_mode
mode
,
const
void
*
pReferenceData
,
double
referenceTime
)
{
void
*
pTestData
=
mal_aligned_malloc
((
size_t
)(
sampleCount
*
mal_get_bytes_per_sample
(
formatOut
)),
MAL_SIMD_ALIGNMENT
);
if
(
pTestData
==
NULL
)
{
printf
(
"Out of memory.
\n
"
);
return
-
1
;
}
mal_timer
timer
;
mal_timer_init
(
&
timer
);
double
timeTaken
=
mal_timer_get_time_in_seconds
(
&
timer
);
{
pcm_convert
(
pTestData
,
formatOut
,
pBaseData
,
formatIn
,
sampleCount
,
ditherMode
,
mode
);
}
timeTaken
=
mal_timer_get_time_in_seconds
(
&
timer
)
-
timeTaken
;
// Compare with the reference for correctness.
mal_bool32
passed
=
MAL_TRUE
;
for
(
mal_uint64
iSample
=
0
;
iSample
<
sampleCount
;
++
iSample
)
{
mal_uint32
bps
=
mal_get_bytes_per_sample
(
formatOut
);
// We need to compare on a format by format basis because we allow for very slight deviations in results depending on the output format.
switch
(
formatOut
)
{
case
mal_format_s16
:
{
mal_int16
a
=
((
const
mal_int16
*
)
pReferenceData
)[
iSample
];
mal_int16
b
=
((
const
mal_int16
*
)
pTestData
)[
iSample
];
if
(
abs
(
a
-
b
)
>
1
)
{
printf
(
"Incorrect Sample: (%d) %d != %d
\n
"
,
(
int
)
iSample
,
a
,
b
);
passed
=
MAL_FALSE
;
}
}
break
;
default:
{
if
(
memcmp
(
mal_offset_ptr
(
pReferenceData
,
iSample
*
bps
),
mal_offset_ptr
(
pTestData
,
iSample
*
bps
),
bps
)
!=
0
)
{
printf
(
"Incorrect Sample: (%d)
\n
"
,
(
int
)
iSample
);
passed
=
MAL_FALSE
;
}
}
break
;
}
}
if
(
passed
)
{
printf
(
" [PASSED] "
);
}
else
{
printf
(
" [FAILED] "
);
}
printf
(
"(Dither = %s) %s -> %s (%s): %.4fms (%.2f%%)
\n
"
,
mal_dither_mode_to_string
(
ditherMode
),
mal_get_format_name
(
formatIn
),
mal_get_format_name
(
formatOut
),
simd_mode_to_string
(
mode
),
timeTaken
*
1000
,
referenceTime
/
timeTaken
*
100
);
mal_aligned_free
(
pTestData
);
return
0
;
}
int
do_profiling__format_conversion__profile_set
(
mal_format
formatIn
,
mal_format
formatOut
,
mal_dither_mode
ditherMode
)
{
// Generate our base data to begin with. This is generated from an f32 sine wave which is converted to formatIn. That then becomes our base data.
mal_uint32
sampleCount
=
1000000
;
float
*
pSourceData
=
(
float
*
)
mal_aligned_malloc
(
sampleCount
*
sizeof
(
*
pSourceData
),
MAL_SIMD_ALIGNMENT
);
if
(
pSourceData
==
NULL
)
{
printf
(
"Out of memory.
\n
"
);
return
-
1
;
}
mal_sine_wave
sineWave
;
mal_sine_wave_init
(
1
.
0
,
400
,
48000
,
&
sineWave
);
mal_sine_wave_read
(
&
sineWave
,
sampleCount
,
pSourceData
);
void
*
pBaseData
=
mal_aligned_malloc
(
sampleCount
*
mal_get_bytes_per_sample
(
formatIn
),
MAL_SIMD_ALIGNMENT
);
mal_pcm_convert
(
pBaseData
,
formatIn
,
pSourceData
,
mal_format_f32
,
sampleCount
,
mal_dither_mode_none
);
// Reference first so we can get a benchmark.
void
*
pReferenceData
=
mal_aligned_malloc
(
sampleCount
*
mal_get_bytes_per_sample
(
formatOut
),
MAL_SIMD_ALIGNMENT
);
mal_timer
timer
;
mal_timer_init
(
&
timer
);
double
referenceTime
=
mal_timer_get_time_in_seconds
(
&
timer
);
{
pcm_convert__reference
(
pReferenceData
,
formatOut
,
pBaseData
,
formatIn
,
sampleCount
,
ditherMode
);
}
referenceTime
=
mal_timer_get_time_in_seconds
(
&
timer
)
-
referenceTime
;
// Here is where each optimized implementation is profiled.
do_profiling__format_conversion__profile_individual
(
formatIn
,
formatOut
,
ditherMode
,
pBaseData
,
sampleCount
,
simd_mode_scalar
,
pReferenceData
,
referenceTime
);
if
(
mal_has_sse2
())
{
do_profiling__format_conversion__profile_individual
(
formatIn
,
formatOut
,
ditherMode
,
pBaseData
,
sampleCount
,
simd_mode_sse2
,
pReferenceData
,
referenceTime
);
}
if
(
mal_has_avx
())
{
do_profiling__format_conversion__profile_individual
(
formatIn
,
formatOut
,
ditherMode
,
pBaseData
,
sampleCount
,
simd_mode_avx
,
pReferenceData
,
referenceTime
);
}
if
(
mal_has_avx512f
())
{
do_profiling__format_conversion__profile_individual
(
formatIn
,
formatOut
,
ditherMode
,
pBaseData
,
sampleCount
,
simd_mode_avx512
,
pReferenceData
,
referenceTime
);
}
if
(
mal_has_neon
())
{
do_profiling__format_conversion__profile_individual
(
formatIn
,
formatOut
,
ditherMode
,
pBaseData
,
sampleCount
,
simd_mode_neon
,
pReferenceData
,
referenceTime
);
}
mal_aligned_free
(
pReferenceData
);
mal_aligned_free
(
pBaseData
);
mal_aligned_free
(
pSourceData
);
return
0
;
}
int
do_profiling__format_conversion
()
{
// First we need to generate our base data.
do_profiling__format_conversion__profile_set
(
mal_format_f32
,
mal_format_s16
,
mal_dither_mode_none
);
return
0
;
}
///////////////////////////////////////////////////////////////////////////////
//
// Channel Routing
//
///////////////////////////////////////////////////////////////////////////////
float
g_ChannelRouterProfilingOutputBenchmark
[
8
][
48000
];
float
g_ChannelRouterProfilingOutput
[
8
][
48000
];
...
...
@@ -416,6 +839,7 @@ int do_profiling__src__profile_set(src_data* pBaseData, mal_uint32 sampleRateIn,
// Now that we have the reference data to compare against we can go ahead and measure the SIMD optimizations.
do_profiling__src__profile_individual
(
pBaseData
,
sampleRateIn
,
sampleRateOut
,
algorithm
,
simd_mode_scalar
,
&
referenceData
);
if
(
mal_has_sse2
())
{
do_profiling__src__profile_individual
(
pBaseData
,
sampleRateIn
,
sampleRateOut
,
algorithm
,
simd_mode_sse2
,
&
referenceData
);
}
...
...
@@ -446,7 +870,7 @@ int do_profiling__src()
src_data
baseData
;
mal_zero_object
(
&
baseData
);
baseData
.
channels
=
8
;
baseData
.
frameCount
=
10000
;
baseData
.
frameCount
=
10000
0
;
for
(
mal_uint32
iChannel
=
0
;
iChannel
<
baseData
.
channels
;
++
iChannel
)
{
baseData
.
pFrameData
[
iChannel
]
=
(
float
*
)
mal_aligned_malloc
((
size_t
)(
baseData
.
frameCount
*
sizeof
(
float
)),
MAL_SIMD_ALIGNMENT
);
if
(
baseData
.
pFrameData
[
iChannel
]
==
NULL
)
{
...
...
@@ -475,16 +899,33 @@ int do_profiling__src()
}
// Converts two 4xf32 vectors to one 8xi16 vector with signed saturation.
static
inline
__m128i
drmath_vf32_to_vi16__sse2
(
__m128
f32_0
,
__m128
f32_1
)
{
return
_mm_packs_epi32
(
_mm_cvtps_epi32
(
f32_0
),
_mm_cvtps_epi32
(
f32_1
));
}
int
main
(
int
argc
,
char
**
argv
)
{
(
void
)
argc
;
(
void
)
argv
;
{
//__m128 f0 = _mm_set_ps(32780, 2, 1, 0);
//__m128 f1 = _mm_set_ps(-32780, 6, 5, 4);
//__m128i r = drmath_vf32_to_vi16__sse2(f0, f1);
//int a = 5;
}
// Summary.
if
(
mal_has_sse2
())
{
printf
(
"Has SSE
:
YES
\n
"
);
printf
(
"Has SSE
2:
YES
\n
"
);
}
else
{
printf
(
"Has SSE
:
NO
\n
"
);
printf
(
"Has SSE
2:
NO
\n
"
);
}
if
(
mal_has_avx
())
{
printf
(
"Has AVX: YES
\n
"
);
...
...
@@ -505,6 +946,10 @@ int main(int argc, char** argv)
printf
(
"
\n
"
);
// Format conversion.
do_profiling__format_conversion
();
printf
(
"
\n\n
"
);
// Channel routing.
do_profiling__channel_routing
();
printf
(
"
\n\n
"
);
...
...
tests/mal_test_0.vcxproj
View file @
b2815ccf
...
...
@@ -141,7 +141,7 @@
<AdditionalIncludeDirectories>
%(AdditionalIncludeDirectories)
</AdditionalIncludeDirectories>
<RuntimeLibrary>
MultiThreadedDebug
</RuntimeLibrary>
<CompileAs>
Default
</CompileAs>
<EnableEnhancedInstructionSet>
No
Extensions
</EnableEnhancedInstructionSet>
<EnableEnhancedInstructionSet>
No
tSet
</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<SubSystem>
Console
</SubSystem>
...
...
@@ -162,7 +162,7 @@
<AdditionalIncludeDirectories>
%(AdditionalIncludeDirectories)
</AdditionalIncludeDirectories>
<RuntimeLibrary>
MultiThreadedDebug
</RuntimeLibrary>
<CompileAs>
Default
</CompileAs>
<EnableEnhancedInstructionSet>
No
Extensions
</EnableEnhancedInstructionSet>
<EnableEnhancedInstructionSet>
No
tSet
</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<SubSystem>
Console
</SubSystem>
...
...
@@ -183,6 +183,7 @@
<AdditionalIncludeDirectories>
%(AdditionalIncludeDirectories)
</AdditionalIncludeDirectories>
<RuntimeLibrary>
MultiThreadedDebug
</RuntimeLibrary>
<CompileAs>
Default
</CompileAs>
<EnableEnhancedInstructionSet>
NotSet
</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<SubSystem>
Console
</SubSystem>
...
...
@@ -202,7 +203,7 @@
<SDLCheck>
true
</SDLCheck>
<AdditionalIncludeDirectories>
%(AdditionalIncludeDirectories)
</AdditionalIncludeDirectories>
<CompileAs>
Default
</CompileAs>
<EnableEnhancedInstructionSet>
No
Extensions
</EnableEnhancedInstructionSet>
<EnableEnhancedInstructionSet>
No
tSet
</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<SubSystem>
Console
</SubSystem>
...
...
@@ -226,7 +227,7 @@
<SDLCheck>
true
</SDLCheck>
<AdditionalIncludeDirectories>
%(AdditionalIncludeDirectories)
</AdditionalIncludeDirectories>
<CompileAs>
Default
</CompileAs>
<EnableEnhancedInstructionSet>
No
Extensions
</EnableEnhancedInstructionSet>
<EnableEnhancedInstructionSet>
No
tSet
</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<SubSystem>
Console
</SubSystem>
...
...
@@ -250,6 +251,7 @@
<SDLCheck>
true
</SDLCheck>
<AdditionalIncludeDirectories>
%(AdditionalIncludeDirectories)
</AdditionalIncludeDirectories>
<CompileAs>
Default
</CompileAs>
<EnableEnhancedInstructionSet>
NotSet
</EnableEnhancedInstructionSet>
</ClCompile>
<Link>
<SubSystem>
Console
</SubSystem>
...
...
@@ -269,21 +271,21 @@
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|x64'"
>
true
</ExcludedFromBuild>
</ClCompile>
<ClCompile
Include=
"mal_profiling.c"
>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|Win32'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|Win32'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|ARM'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|ARM'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|x64'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|x64'"
>
true
</ExcludedFromBuild>
</ClCompile>
<ClCompile
Include=
"mal_test_0.c"
>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|Win32'"
>
false
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|ARM'"
>
false
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|Win32'"
>
false
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|ARM'"
>
false
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|ARM'"
>
false
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|x64'"
>
false
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|x64'"
>
false
</ExcludedFromBuild>
</ClCompile>
<ClCompile
Include=
"mal_test_0.c"
>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|Win32'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|ARM'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|Win32'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|ARM'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|x64'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Release|x64'"
>
true
</ExcludedFromBuild>
</ClCompile>
<ClCompile
Include=
"mal_test_0.cpp"
>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|Win32'"
>
true
</ExcludedFromBuild>
<ExcludedFromBuild
Condition=
"'$(Configuration)|$(Platform)'=='Debug|ARM'"
>
true
</ExcludedFromBuild>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment