Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
M
miniaudio
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
MyCard
miniaudio
Commits
22d7b740
Commit
22d7b740
authored
May 26, 2018
by
David Reid
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Initial work on SSE2 optimizations for sample rate conversion.
parent
5dafa54f
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
430 additions
and
21 deletions
+430
-21
mini_al.h
mini_al.h
+130
-13
tests/mal_profiling.c
tests/mal_profiling.c
+300
-8
No files found.
mini_al.h
View file @
22d7b740
...
...
@@ -3211,9 +3211,20 @@ static MAL_INLINE float mal_mix_f32(float x, float y, float a)
}
static MAL_INLINE float mal_mix_f32_fast(float x, float y, float a)
{
return x + (y - x)*a;
float r0 = (y - x);
float r1 = r0*a;
return x + r1;
//return x + (y - x)*a;
}
#if defined(MAL_SUPPORT_SSE2)
static MAL_INLINE __m128 mal_mix_f32_fast__sse2(__m128 x, __m128 y, __m128 a)
{
return _mm_add_ps(x, _mm_mul_ps(_mm_sub_ps(y, x), a));
}
#endif
static MAL_INLINE double mal_mix_f64(double x, double y, double a)
{
return x*(1-a) + y*a;
...
...
@@ -3384,7 +3395,7 @@ void mal_timer_init(mal_timer* pTimer)
LARGE_INTEGER counter;
QueryPerformanceCounter(&counter);
pTimer->counter =
(mal_uint64)
counter.QuadPart;
pTimer->counter = counter.QuadPart;
}
double mal_timer_get_time_in_seconds(mal_timer* pTimer)
...
...
@@ -3394,7 +3405,7 @@ double mal_timer_get_time_in_seconds(mal_timer* pTimer)
return 0;
}
return (
counter.QuadPart - pTimer->counter) / (double)
g_mal_TimerFrequency.QuadPart;
return (
double)(counter.QuadPart - pTimer->counter) /
g_mal_TimerFrequency.QuadPart;
}
#elif defined(MAL_APPLE) && (__MAC_OS_X_VERSION_MIN_REQUIRED < 101200)
uint64_t g_mal_TimerFrequency = 0;
...
...
@@ -19677,7 +19688,7 @@ void mal_src__build_sinc_table__sinc(mal_src* pSRC)
mal_assert(pSRC != NULL);
pSRC->sinc.table[0] = 1.0f;
for (
int
i = 1; i < mal_countof(pSRC->sinc.table); i += 1) {
for (
mal_uint32
i = 1; i < mal_countof(pSRC->sinc.table); i += 1) {
double x = i*MAL_PI_D / MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION;
pSRC->sinc.table[i] = (float)(sin(x)/x);
}
...
...
@@ -19693,7 +19704,7 @@ void mal_src__build_sinc_table__hann(mal_src* pSRC)
{
mal_src__build_sinc_table__sinc(pSRC);
for (
int
i = 0; i < mal_countof(pSRC->sinc.table); i += 1) {
for (
mal_uint32
i = 0; i < mal_countof(pSRC->sinc.table); i += 1) {
double x = pSRC->sinc.table[i];
double N = MAL_SRC_SINC_MAX_WINDOW_WIDTH*2;
double n = ((double)(i) / MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION) + MAL_SRC_SINC_MAX_WINDOW_WIDTH;
...
...
@@ -20070,7 +20081,7 @@ static MAL_INLINE float mal_src_sinc__interpolation_factor(const mal_src* pSRC,
float xabs = (float)fabs(x);
if (xabs >= MAL_SRC_SINC_MAX_WINDOW_WIDTH /*pSRC->config.sinc.windowWidth*/) {
return 0;
xabs = 1; // <-- A non-zero integer will always return 0.
}
xabs = xabs * MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION;
...
...
@@ -20084,6 +20095,60 @@ static MAL_INLINE float mal_src_sinc__interpolation_factor(const mal_src* pSRC,
#endif
}
#if defined(MAL_SUPPORT_SSE2)
static MAL_INLINE __m128 mal_fabsf_sse2(__m128 x)
{
static MAL_ALIGN(16) mal_uint32 mask[4] = {
0x7FFFFFFF,
0x7FFFFFFF,
0x7FFFFFFF,
0x7FFFFFFF
};
return _mm_and_ps(*(__m128*)mask, x);
}
static MAL_INLINE __m128 mal_truncf_sse2(__m128 x)
{
return _mm_cvtepi32_ps(_mm_cvttps_epi32(x));
}
static MAL_INLINE __m128 mal_src_sinc__interpolation_factor__sse2(const mal_src* pSRC, __m128* x)
{
__m128 windowWidth128 = _mm_set1_ps(MAL_SRC_SINC_MAX_WINDOW_WIDTH);
__m128 resolution128 = _mm_set1_ps(MAL_SRC_SINC_LOOKUP_TABLE_RESOLUTION);
__m128 one = _mm_set1_ps(1);
__m128 xabs = mal_fabsf_sse2(*x);
// if (MAL_SRC_SINC_MAX_WINDOW_WIDTH <= xabs) xabs = 1 else xabs = xabs;
__m128 xcmp = _mm_cmp_ps(windowWidth128, xabs, 2); // 2 = Less than or equal = _mm_cmple_ps.
xabs = _mm_or_ps(_mm_and_ps(one, xcmp), _mm_andnot_ps(xcmp, xabs)); // xabs = (xcmp) ? 1 : xabs;
xabs = _mm_mul_ps(xabs, resolution128);
__m128i ixabs = _mm_cvttps_epi32(xabs);
__m128 lo = _mm_set_ps(
pSRC->sinc.table[((int*)&ixabs)[3]],
pSRC->sinc.table[((int*)&ixabs)[2]],
pSRC->sinc.table[((int*)&ixabs)[1]],
pSRC->sinc.table[((int*)&ixabs)[0]]
);
__m128 hi = _mm_set_ps(
pSRC->sinc.table[((int*)&ixabs)[3]+1],
pSRC->sinc.table[((int*)&ixabs)[2]+1],
pSRC->sinc.table[((int*)&ixabs)[1]+1],
pSRC->sinc.table[((int*)&ixabs)[0]+1]
);
__m128 a = _mm_sub_ps(xabs, _mm_cvtepi32_ps(ixabs));
__m128 r = mal_mix_f32_fast__sse2(lo, hi, a);
return r;
}
#endif
mal_uint64 mal_src_read_deinterleaved__sinc(mal_src* pSRC, mal_uint64 frameCount, void** ppSamplesOut, void* pUserData)
{
mal_assert(pSRC != NULL);
...
...
@@ -20122,21 +20187,66 @@ mal_uint64 mal_src_read_deinterleaved__sinc(mal_src* pSRC, mal_uint64 frameCount
outputFramesToRead = maxOutputFramesToRead;
}
float _windowSamplesUnaligned[MAL_SRC_SINC_MAX_WINDOW_WIDTH*2 + MAL_SIMD_ALIGNMENT];
float* windowSamples = (float*)(((mal_uintptr)_windowSamplesUnaligned + MAL_SIMD_ALIGNMENT-1) & ~(MAL_SIMD_ALIGNMENT-1));
float _iWindowFUnaligned[MAL_SRC_SINC_MAX_WINDOW_WIDTH*2 + MAL_SIMD_ALIGNMENT];
float* iWindowF = (float*)(((mal_uintptr)_iWindowFUnaligned + MAL_SIMD_ALIGNMENT-1) & ~(MAL_SIMD_ALIGNMENT-1));
for (mal_int32 i = 0; i < windowWidth2; ++i) {
iWindowF[i] = (float)(i - windowWidth);
}
for (mal_uint32 iChannel = 0; iChannel < pSRC->config.channels; iChannel += 1) {
// Do SRC.
float timeIn = timeInBeg;
for (mal_uint32 iSample = 0; iSample < outputFramesToRead; iSample += 1) {
mal_int32 iTimeIn = (mal_int32)timeIn;
float sampleOut = 0;
for (mal_int32 iWindow = -windowWidth+1; iWindow < windowWidth; iWindow += 1) {
float t = (timeIn - iTimeIn);
float w = (float)(iWindow);
float iTimeInF = mal_floorf(timeIn);
mal_uint32 iTimeIn = (mal_uint32)iTimeInF;
//mal_int32 iWindowBeg = -windowWidth+1;
//mal_int32 iWindowEnd = windowWidth;
mal_int32 iWindow = 0;
// Pre-load the window samples into an aligned buffer to begin with. Need to put these into an aligned buffer to make SIMD easier.
windowSamples[0] = 0; // <-- The first sample is always zero.
for (mal_int32 i = 1; i < windowWidth2; ++i) {
windowSamples[i] = mal_src_sinc__get_input_sample_from_window(pSRC, iChannel, iTimeIn, i - windowWidth);
}
#if defined(MAL_SUPPORT_SSE2)
if (pSRC->useSSE2) {
__m128 t = _mm_set1_ps((timeIn - iTimeInF));
mal_int32 windowWidth4 = windowWidth2 >> 2;
for (mal_int32 iWindow4 = 0; iWindow4 < windowWidth4; iWindow4 += 1) {
__m128* s = (__m128*)windowSamples + iWindow4;
__m128* w = (__m128*)iWindowF + iWindow4;
__m128 x = _mm_sub_ps(t, *w);
__m128 a = mal_src_sinc__interpolation_factor__sse2(pSRC, &x);
__m128 r = _mm_mul_ps(*s, a);
sampleOut += ((float*)(&r))[0];
sampleOut += ((float*)(&r))[1];
sampleOut += ((float*)(&r))[2];
sampleOut += ((float*)(&r))[3];
}
iWindow += windowWidth4 * 4;
}
#endif
// Non-SIMD/Reference implementation.
for (; iWindow < windowWidth2; iWindow += 1) {
float s = windowSamples[iWindow];
float t = (timeIn - iTimeIn);
float w = iWindowF[iWindow];
float a = mal_src_sinc__interpolation_factor(pSRC, (t - w));
float
s = mal_src_sinc__get_input_sample_from_window(pSRC, iChannel, iTimeIn, iWindow)
;
float
r = s * a
;
sampleOut +=
s * a
;
sampleOut +=
r
;
}
ppNextSamplesOut[iChannel][iSample] = (float)sampleOut;
...
...
@@ -21902,6 +22012,13 @@ mal_result mal_decoder_init__internal(mal_decoder_read_proc onRead, mal_decoder_
mal_assert(pConfig != NULL);
mal_assert(pDecoder != NULL);
// Silence some warnings in the case that we don't have any decoder backends enabled.
(void)onRead;
(void)onSeek;
(void)pUserData;
(void)pConfig;
(void)pDecoder;
// We use trial and error to open a decoder.
mal_result result = MAL_NO_BACKEND;
...
...
tests/mal_profiling.c
View file @
22d7b740
#define MINI_AL_IMPLEMENTATION
#include "../mini_al.h"
typedef
enum
{
simd_mode_scalar
=
0
,
simd_mode_sse2
,
simd_mode_avx
,
simd_mode_avx512
,
simd_mode_neon
}
simd_mode
;
const
char
*
simd_mode_to_string
(
simd_mode
mode
)
{
switch
(
mode
)
{
case
simd_mode_scalar
:
return
"Reference"
;
case
simd_mode_sse2
:
return
"SSE2"
;
case
simd_mode_avx
:
return
"AVX"
;
case
simd_mode_avx512
:
return
"AVX-512"
;
case
simd_mode_neon
:
return
"NEON"
;
}
return
"Unknown"
;
}
const
char
*
mal_src_algorithm_to_string
(
mal_src_algorithm
algorithm
)
{
switch
(
algorithm
)
{
case
mal_src_algorithm_none
:
return
"Passthrough"
;
case
mal_src_algorithm_linear
:
return
"Linear"
;
case
mal_src_algorithm_sinc
:
return
"Sinc"
;
}
return
"Unknown"
;
}
float
g_ChannelRouterProfilingOutputBenchmark
[
8
][
48000
];
float
g_ChannelRouterProfilingOutput
[
8
][
48000
];
double
g_ChannelRouterTime_Reference
=
0
;
...
...
@@ -9,7 +43,7 @@ double g_ChannelRouterTime_AVX = 0;
double
g_ChannelRouterTime_AVX512
=
0
;
double
g_ChannelRouterTime_NEON
=
0
;
mal_sine_wave
sineWave
;
mal_sine_wave
g_
sineWave
;
mal_bool32
channel_router_test
(
mal_uint32
channels
,
mal_uint64
frameCount
,
float
**
ppFramesA
,
float
**
ppFramesB
)
{
...
...
@@ -32,8 +66,8 @@ mal_uint32 channel_router_on_read(mal_channel_router* pRouter, mal_uint32 frameC
float
**
ppSamplesOutF
=
(
float
**
)
ppSamplesOut
;
for
(
mal_uint32
iChannel
=
0
;
iChannel
<
pRouter
->
config
.
channelsIn
;
++
iChannel
)
{
mal_sine_wave_init
(
1
/
(
iChannel
+
1
),
400
,
48000
,
&
sineWave
);
mal_sine_wave_read
(
&
sineWave
,
frameCount
,
ppSamplesOutF
[
iChannel
]);
mal_sine_wave_init
(
1
/
(
iChannel
+
1
),
400
,
48000
,
&
g_
sineWave
);
mal_sine_wave_read
(
&
g_
sineWave
,
frameCount
,
ppSamplesOutF
[
iChannel
]);
}
return
frameCount
;
...
...
@@ -75,7 +109,7 @@ int do_profiling__channel_routing()
ppOutBenchmark
[
i
]
=
(
void
*
)
g_ChannelRouterProfilingOutputBenchmark
[
i
];
}
mal_sine_wave_init
(
1
,
400
,
48000
,
&
sineWave
);
mal_sine_wave_init
(
1
,
400
,
48000
,
&
g_
sineWave
);
mal_uint64
framesRead
=
mal_channel_router_read_deinterleaved
(
&
router
,
framesToRead
,
ppOutBenchmark
,
NULL
);
if
(
framesRead
!=
framesToRead
)
{
printf
(
"Channel Router: An error occurred while reading benchmark data.
\n
"
);
...
...
@@ -183,9 +217,263 @@ int do_profiling__channel_routing()
printf
(
"NEON: %.4fms (%.2f%%)
\n
"
,
g_ChannelRouterTime_NEON
*
1000
,
g_ChannelRouterTime_Reference
/
g_ChannelRouterTime_NEON
*
100
);
}
return
1
;
return
0
;
}
///////////////////////////////////////////////////////////////////////////////
//
// SRC
//
///////////////////////////////////////////////////////////////////////////////
typedef
struct
{
float
*
pFrameData
[
MAL_MAX_CHANNELS
];
mal_uint64
frameCount
;
mal_uint32
channels
;
double
timeTaken
;
}
src_reference_data
;
typedef
struct
{
float
*
pFrameData
[
MAL_MAX_CHANNELS
];
mal_uint64
frameCount
;
mal_uint64
iNextFrame
;
mal_uint32
channels
;
}
src_data
;
mal_uint32
do_profiling__src__on_read
(
mal_src
*
pSRC
,
mal_uint32
frameCount
,
void
**
ppSamplesOut
,
void
*
pUserData
)
{
src_data
*
pBaseData
=
(
src_data
*
)
pUserData
;
mal_assert
(
pBaseData
!=
NULL
);
mal_assert
(
pBaseData
->
iNextFrame
<=
pBaseData
->
frameCount
);
mal_uint64
framesToRead
=
frameCount
;
mal_uint64
framesAvailable
=
pBaseData
->
frameCount
-
pBaseData
->
iNextFrame
;
if
(
framesToRead
>
framesAvailable
)
{
framesToRead
=
framesAvailable
;
}
if
(
framesToRead
>
0
)
{
for
(
mal_uint32
iChannel
=
0
;
iChannel
<
pSRC
->
config
.
channels
;
iChannel
+=
1
)
{
mal_copy_memory
(
ppSamplesOut
[
iChannel
],
pBaseData
->
pFrameData
[
iChannel
],
(
size_t
)(
framesToRead
*
sizeof
(
float
)));
}
}
pBaseData
->
iNextFrame
+=
framesToRead
;
return
(
mal_uint32
)
framesToRead
;
}
mal_result
init_src
(
src_data
*
pBaseData
,
mal_uint32
sampleRateIn
,
mal_uint32
sampleRateOut
,
mal_src_algorithm
algorithm
,
simd_mode
mode
,
mal_src
*
pSRC
)
{
mal_assert
(
pBaseData
!=
NULL
);
mal_assert
(
pSRC
!=
NULL
);
mal_src_config
srcConfig
=
mal_src_config_init
(
sampleRateIn
,
sampleRateOut
,
pBaseData
->
channels
,
do_profiling__src__on_read
,
pBaseData
);
srcConfig
.
sinc
.
windowWidth
=
17
;
// <-- Make this an odd number to test unaligned section in the SIMD implementations.
srcConfig
.
algorithm
=
algorithm
;
srcConfig
.
noSSE2
=
MAL_TRUE
;
srcConfig
.
noAVX
=
MAL_TRUE
;
srcConfig
.
noAVX512
=
MAL_TRUE
;
srcConfig
.
noNEON
=
MAL_TRUE
;
switch
(
mode
)
{
case
simd_mode_sse2
:
srcConfig
.
noSSE2
=
MAL_FALSE
;
break
;
case
simd_mode_avx
:
srcConfig
.
noAVX
=
MAL_FALSE
;
break
;
case
simd_mode_avx512
:
srcConfig
.
noAVX512
=
MAL_FALSE
;
break
;
case
simd_mode_neon
:
srcConfig
.
noNEON
=
MAL_FALSE
;
break
;
case
simd_mode_scalar
:
default:
break
;
}
mal_result
result
=
mal_src_init
(
&
srcConfig
,
pSRC
);
if
(
result
!=
MAL_SUCCESS
)
{
printf
(
"Failed to initialize sample rate converter.
\n
"
);
return
(
int
)
result
;
}
return
result
;
}
int
do_profiling__src__profile_individual
(
src_data
*
pBaseData
,
mal_uint32
sampleRateIn
,
mal_uint32
sampleRateOut
,
mal_src_algorithm
algorithm
,
simd_mode
mode
,
src_reference_data
*
pReferenceData
)
{
mal_assert
(
pBaseData
!=
NULL
);
mal_assert
(
pReferenceData
!=
NULL
);
mal_result
result
=
MAL_ERROR
;
// Make sure the base data is moved back to the start.
pBaseData
->
iNextFrame
=
0
;
mal_src
src
;
result
=
init_src
(
pBaseData
,
sampleRateIn
,
sampleRateOut
,
algorithm
,
mode
,
&
src
);
if
(
result
!=
MAL_SUCCESS
)
{
return
(
int
)
result
;
}
// Profiling.
mal_uint64
sz
=
pReferenceData
->
frameCount
*
sizeof
(
float
);
mal_assert
(
sz
<=
SIZE_MAX
);
float
*
pFrameData
[
MAL_MAX_CHANNELS
];
for
(
mal_uint32
iChannel
=
0
;
iChannel
<
pBaseData
->
channels
;
iChannel
+=
1
)
{
pFrameData
[
iChannel
]
=
(
float
*
)
mal_aligned_malloc
((
size_t
)
sz
,
MAL_SIMD_ALIGNMENT
);
if
(
pFrameData
[
iChannel
]
==
NULL
)
{
printf
(
"Out of memory.
\n
"
);
return
-
2
;
}
mal_zero_memory
(
pFrameData
[
iChannel
],
(
size_t
)
sz
);
}
mal_timer
timer
;
mal_timer_init
(
&
timer
);
double
startTime
=
mal_timer_get_time_in_seconds
(
&
timer
);
{
mal_src_read_deinterleaved
(
&
src
,
pReferenceData
->
frameCount
,
(
void
**
)
pFrameData
,
pBaseData
);
}
double
timeTaken
=
mal_timer_get_time_in_seconds
(
&
timer
)
-
startTime
;
// Correctness test.
mal_bool32
passed
=
MAL_TRUE
;
for
(
mal_uint32
iChannel
=
0
;
iChannel
<
pReferenceData
->
channels
;
iChannel
+=
1
)
{
for
(
mal_uint32
iFrame
=
0
;
iFrame
<
pReferenceData
->
frameCount
;
iFrame
+=
1
)
{
float
s0
=
pReferenceData
->
pFrameData
[
iChannel
][
iFrame
];
float
s1
=
pFrameData
[
iChannel
][
iFrame
];
if
(
s0
!=
s1
)
{
printf
(
"(Channel %d, Sample %d) %f != %f
\n
"
,
iChannel
,
iFrame
,
s0
,
s1
);
passed
=
MAL_FALSE
;
}
}
}
// Print results.
if
(
passed
)
{
printf
(
" [PASSED] "
);
}
else
{
printf
(
" [FAILED] "
);
}
printf
(
"%s %d -> %d (%s): %.4fms (%.2f%%)
\n
"
,
mal_src_algorithm_to_string
(
algorithm
),
sampleRateIn
,
sampleRateOut
,
simd_mode_to_string
(
mode
),
timeTaken
*
1000
,
pReferenceData
->
timeTaken
/
timeTaken
*
100
);
for
(
mal_uint32
iChannel
=
0
;
iChannel
<
pBaseData
->
channels
;
iChannel
+=
1
)
{
mal_aligned_free
(
pFrameData
[
iChannel
]);
}
return
(
int
)
result
;
}
int
do_profiling__src__profile_set
(
src_data
*
pBaseData
,
mal_uint32
sampleRateIn
,
mal_uint32
sampleRateOut
,
mal_src_algorithm
algorithm
)
{
mal_assert
(
pBaseData
!=
NULL
);
// Make sure the base data is back at the start.
pBaseData
->
iNextFrame
=
0
;
src_reference_data
referenceData
;
mal_zero_object
(
&
referenceData
);
referenceData
.
channels
=
pBaseData
->
channels
;
// The first thing to do is to perform a sample rate conversion using the scalar/reference implementation. This reference is used to compare
// the results of the optimized implementation.
referenceData
.
frameCount
=
mal_calculate_frame_count_after_src
(
sampleRateOut
,
sampleRateIn
,
pBaseData
->
frameCount
);
if
(
referenceData
.
frameCount
==
0
)
{
printf
(
"Failed to calculate output frame count.
\n
"
);
return
-
1
;
}
mal_uint64
sz
=
referenceData
.
frameCount
*
sizeof
(
float
);
mal_assert
(
sz
<=
SIZE_MAX
);
for
(
mal_uint32
iChannel
=
0
;
iChannel
<
referenceData
.
channels
;
iChannel
+=
1
)
{
referenceData
.
pFrameData
[
iChannel
]
=
(
float
*
)
mal_aligned_malloc
((
size_t
)
sz
,
MAL_SIMD_ALIGNMENT
);
if
(
referenceData
.
pFrameData
[
iChannel
]
==
NULL
)
{
printf
(
"Out of memory.
\n
"
);
return
-
2
;
}
mal_zero_memory
(
referenceData
.
pFrameData
[
iChannel
],
(
size_t
)
sz
);
}
// Generate the reference data.
mal_src
src
;
mal_result
result
=
init_src
(
pBaseData
,
sampleRateIn
,
sampleRateOut
,
algorithm
,
simd_mode_scalar
,
&
src
);
if
(
result
!=
MAL_SUCCESS
)
{
return
(
int
)
result
;
}
mal_timer
timer
;
mal_timer_init
(
&
timer
);
double
startTime
=
mal_timer_get_time_in_seconds
(
&
timer
);
{
mal_src_read_deinterleaved
(
&
src
,
referenceData
.
frameCount
,
(
void
**
)
referenceData
.
pFrameData
,
pBaseData
);
}
referenceData
.
timeTaken
=
mal_timer_get_time_in_seconds
(
&
timer
)
-
startTime
;
// Now that we have the reference data to compare against we can go ahead and measure the SIMD optimizations.
if
(
mal_has_sse2
())
{
do_profiling__src__profile_individual
(
pBaseData
,
sampleRateIn
,
sampleRateOut
,
algorithm
,
simd_mode_sse2
,
&
referenceData
);
}
if
(
mal_has_avx
())
{
do_profiling__src__profile_individual
(
pBaseData
,
sampleRateIn
,
sampleRateOut
,
algorithm
,
simd_mode_avx
,
&
referenceData
);
}
if
(
mal_has_avx512f
())
{
do_profiling__src__profile_individual
(
pBaseData
,
sampleRateIn
,
sampleRateOut
,
algorithm
,
simd_mode_avx512
,
&
referenceData
);
}
if
(
mal_has_neon
())
{
do_profiling__src__profile_individual
(
pBaseData
,
sampleRateIn
,
sampleRateOut
,
algorithm
,
simd_mode_neon
,
&
referenceData
);
}
for
(
mal_uint32
iChannel
=
0
;
iChannel
<
referenceData
.
channels
;
iChannel
+=
1
)
{
mal_aligned_free
(
referenceData
.
pFrameData
[
iChannel
]);
}
return
0
;
}
int
do_profiling__src
()
{
printf
(
"Sample Rate Conversion
\n
"
);
printf
(
"======================
\n
"
);
// Set up base data.
src_data
baseData
;
mal_zero_object
(
&
baseData
);
baseData
.
channels
=
8
;
baseData
.
frameCount
=
10000
;
for
(
mal_uint32
iChannel
=
0
;
iChannel
<
baseData
.
channels
;
++
iChannel
)
{
baseData
.
pFrameData
[
iChannel
]
=
(
float
*
)
mal_aligned_malloc
((
size_t
)(
baseData
.
frameCount
*
sizeof
(
float
)),
MAL_SIMD_ALIGNMENT
);
if
(
baseData
.
pFrameData
[
iChannel
]
==
NULL
)
{
printf
(
"Out of memory.
\n
"
);
return
-
1
;
}
mal_sine_wave
sineWave
;
mal_sine_wave_init
(
1
.
0
f
,
400
+
(
iChannel
*
50
),
48000
,
&
sineWave
);
mal_sine_wave_read
(
&
sineWave
,
baseData
.
frameCount
,
baseData
.
pFrameData
[
iChannel
]);
}
// Upsampling.
do_profiling__src__profile_set
(
&
baseData
,
44100
,
48000
,
mal_src_algorithm_sinc
);
// Downsampling.
do_profiling__src__profile_set
(
&
baseData
,
48000
,
44100
,
mal_src_algorithm_sinc
);
for
(
mal_uint32
iChannel
=
0
;
iChannel
<
baseData
.
channels
;
iChannel
+=
1
)
{
mal_aligned_free
(
baseData
.
pFrameData
[
iChannel
]);
}
return
0
;
}
int
main
(
int
argc
,
char
**
argv
)
{
(
void
)
argc
;
...
...
@@ -197,19 +485,16 @@ int main(int argc, char** argv)
}
else
{
printf
(
"Has SSE: NO
\n
"
);
}
if
(
mal_has_avx
())
{
printf
(
"Has AVX: YES
\n
"
);
}
else
{
printf
(
"Has AVX: NO
\n
"
);
}
if
(
mal_has_avx512f
())
{
printf
(
"Has AVX-512F: YES
\n
"
);
}
else
{
printf
(
"Has AVX-512F: NO
\n
"
);
}
if
(
mal_has_neon
())
{
printf
(
"Has NEON: YES
\n
"
);
}
else
{
...
...
@@ -221,7 +506,14 @@ int main(int argc, char** argv)
// Channel routing.
do_profiling__channel_routing
();
printf
(
"
\n\n
"
);
// Sample rate conversion.
do_profiling__src
();
printf
(
"
\n\n
"
);
printf
(
"Press any key to quit...
\n
"
);
getchar
();
return
0
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment