Commit 8a1858eb authored by Steven Noonan's avatar Steven Noonan

use MA_ASSUME for channel counts before loops

The range of the value isn't obvious to any compiler, as it could go for
one iteration or 4 billion iterations. Adding MA_ASSUME in these places
helps the compiler understand the range of possible values, and know how
heavily to vectorize (or not vectorize) these loops.
Signed-off-by: default avatarSteven Noonan <steven@uplinklabs.net>
parent c1451b30
...@@ -36324,6 +36324,7 @@ static MA_INLINE void ma_biquad_process_pcm_frame_f32__direct_form_2_transposed( ...@@ -36324,6 +36324,7 @@ static MA_INLINE void ma_biquad_process_pcm_frame_f32__direct_form_2_transposed(
const float a1 = pBQ->a1.f32; const float a1 = pBQ->a1.f32;
const float a2 = pBQ->a2.f32; const float a2 = pBQ->a2.f32;
MA_ASSUME(channels >= MA_MIN_CHANNELS && channels <= MA_MAX_CHANNELS);
for (c = 0; c < channels; c += 1) { for (c = 0; c < channels; c += 1) {
float r1 = pBQ->r1[c].f32; float r1 = pBQ->r1[c].f32;
float r2 = pBQ->r2[c].f32; float r2 = pBQ->r2[c].f32;
...@@ -36355,6 +36356,7 @@ static MA_INLINE void ma_biquad_process_pcm_frame_s16__direct_form_2_transposed( ...@@ -36355,6 +36356,7 @@ static MA_INLINE void ma_biquad_process_pcm_frame_s16__direct_form_2_transposed(
const ma_int32 a1 = pBQ->a1.s32; const ma_int32 a1 = pBQ->a1.s32;
const ma_int32 a2 = pBQ->a2.s32; const ma_int32 a2 = pBQ->a2.s32;
MA_ASSUME(channels >= MA_MIN_CHANNELS && channels <= MA_MAX_CHANNELS);
for (c = 0; c < channels; c += 1) { for (c = 0; c < channels; c += 1) {
ma_int32 r1 = pBQ->r1[c].s32; ma_int32 r1 = pBQ->r1[c].s32;
ma_int32 r2 = pBQ->r2[c].s32; ma_int32 r2 = pBQ->r2[c].s32;
...@@ -36523,6 +36525,7 @@ static MA_INLINE void ma_lpf1_process_pcm_frame_f32(ma_lpf1* pLPF, float* pY, co ...@@ -36523,6 +36525,7 @@ static MA_INLINE void ma_lpf1_process_pcm_frame_f32(ma_lpf1* pLPF, float* pY, co
const float a = pLPF->a.f32; const float a = pLPF->a.f32;
const float b = 1 - a; const float b = 1 - a;
MA_ASSUME(channels >= MA_MIN_CHANNELS && channels <= MA_MAX_CHANNELS);
for (c = 0; c < channels; c += 1) { for (c = 0; c < channels; c += 1) {
float r1 = pLPF->r1[c].f32; float r1 = pLPF->r1[c].f32;
float x = pX[c]; float x = pX[c];
...@@ -36542,6 +36545,7 @@ static MA_INLINE void ma_lpf1_process_pcm_frame_s16(ma_lpf1* pLPF, ma_int16* pY, ...@@ -36542,6 +36545,7 @@ static MA_INLINE void ma_lpf1_process_pcm_frame_s16(ma_lpf1* pLPF, ma_int16* pY,
const ma_int32 a = pLPF->a.s32; const ma_int32 a = pLPF->a.s32;
const ma_int32 b = ((1 << MA_BIQUAD_FIXED_POINT_SHIFT) - a); const ma_int32 b = ((1 << MA_BIQUAD_FIXED_POINT_SHIFT) - a);
MA_ASSUME(channels >= MA_MIN_CHANNELS && channels <= MA_MAX_CHANNELS);
for (c = 0; c < channels; c += 1) { for (c = 0; c < channels; c += 1) {
ma_int32 r1 = pLPF->r1[c].s32; ma_int32 r1 = pLPF->r1[c].s32;
ma_int32 x = pX[c]; ma_int32 x = pX[c];
...@@ -37032,6 +37036,7 @@ static MA_INLINE void ma_hpf1_process_pcm_frame_f32(ma_hpf1* pHPF, float* pY, co ...@@ -37032,6 +37036,7 @@ static MA_INLINE void ma_hpf1_process_pcm_frame_f32(ma_hpf1* pHPF, float* pY, co
const float a = 1 - pHPF->a.f32; const float a = 1 - pHPF->a.f32;
const float b = 1 - a; const float b = 1 - a;
MA_ASSUME(channels >= MA_MIN_CHANNELS && channels <= MA_MAX_CHANNELS);
for (c = 0; c < channels; c += 1) { for (c = 0; c < channels; c += 1) {
float r1 = pHPF->r1[c].f32; float r1 = pHPF->r1[c].f32;
float x = pX[c]; float x = pX[c];
...@@ -37051,6 +37056,7 @@ static MA_INLINE void ma_hpf1_process_pcm_frame_s16(ma_hpf1* pHPF, ma_int16* pY, ...@@ -37051,6 +37056,7 @@ static MA_INLINE void ma_hpf1_process_pcm_frame_s16(ma_hpf1* pHPF, ma_int16* pY,
const ma_int32 a = ((1 << MA_BIQUAD_FIXED_POINT_SHIFT) - pHPF->a.s32); const ma_int32 a = ((1 << MA_BIQUAD_FIXED_POINT_SHIFT) - pHPF->a.s32);
const ma_int32 b = ((1 << MA_BIQUAD_FIXED_POINT_SHIFT) - a); const ma_int32 b = ((1 << MA_BIQUAD_FIXED_POINT_SHIFT) - a);
MA_ASSUME(channels >= MA_MIN_CHANNELS && channels <= MA_MAX_CHANNELS);
for (c = 0; c < channels; c += 1) { for (c = 0; c < channels; c += 1) {
ma_int32 r1 = pHPF->r1[c].s32; ma_int32 r1 = pHPF->r1[c].s32;
ma_int32 x = pX[c]; ma_int32 x = pX[c];
...@@ -38396,6 +38402,7 @@ static void ma_linear_resampler_interpolate_frame_s16(ma_linear_resampler* pResa ...@@ -38396,6 +38402,7 @@ static void ma_linear_resampler_interpolate_frame_s16(ma_linear_resampler* pResa
a = (pResampler->inTimeFrac << shift) / pResampler->config.sampleRateOut; a = (pResampler->inTimeFrac << shift) / pResampler->config.sampleRateOut;
MA_ASSUME(channels >= MA_MIN_CHANNELS && channels <= MA_MAX_CHANNELS);
for (c = 0; c < channels; c += 1) { for (c = 0; c < channels; c += 1) {
ma_int16 s = ma_linear_resampler_mix_s16(pResampler->x0.s16[c], pResampler->x1.s16[c], a, shift); ma_int16 s = ma_linear_resampler_mix_s16(pResampler->x0.s16[c], pResampler->x1.s16[c], a, shift);
pFrameOut[c] = s; pFrameOut[c] = s;
...@@ -38414,6 +38421,7 @@ static void ma_linear_resampler_interpolate_frame_f32(ma_linear_resampler* pResa ...@@ -38414,6 +38421,7 @@ static void ma_linear_resampler_interpolate_frame_f32(ma_linear_resampler* pResa
a = (float)pResampler->inTimeFrac / pResampler->config.sampleRateOut; a = (float)pResampler->inTimeFrac / pResampler->config.sampleRateOut;
MA_ASSUME(channels >= MA_MIN_CHANNELS && channels <= MA_MAX_CHANNELS);
for (c = 0; c < channels; c += 1) { for (c = 0; c < channels; c += 1) {
float s = ma_mix_f32_fast(pResampler->x0.f32[c], pResampler->x1.f32[c], a); float s = ma_mix_f32_fast(pResampler->x0.f32[c], pResampler->x1.f32[c], a);
pFrameOut[c] = s; pFrameOut[c] = s;
...@@ -48723,6 +48731,7 @@ static MA_INLINE ma_uint64 ma_noise_read_pcm_frames__white(ma_noise* pNoise, voi ...@@ -48723,6 +48731,7 @@ static MA_INLINE ma_uint64 ma_noise_read_pcm_frames__white(ma_noise* pNoise, voi
ma_uint64 iFrame; ma_uint64 iFrame;
ma_uint32 iChannel; ma_uint32 iChannel;
const ma_uint32 channels = pNoise->config.channels; const ma_uint32 channels = pNoise->config.channels;
MA_ASSUME(channels >= MA_MIN_CHANNELS && channels <= MA_MAX_CHANNELS);
if (pNoise->config.format == ma_format_f32) { if (pNoise->config.format == ma_format_f32) {
float* pFramesOutF32 = (float*)pFramesOut; float* pFramesOutF32 = (float*)pFramesOut;
...@@ -48841,6 +48850,7 @@ static MA_INLINE ma_uint64 ma_noise_read_pcm_frames__pink(ma_noise* pNoise, void ...@@ -48841,6 +48850,7 @@ static MA_INLINE ma_uint64 ma_noise_read_pcm_frames__pink(ma_noise* pNoise, void
ma_uint64 iFrame; ma_uint64 iFrame;
ma_uint32 iChannel; ma_uint32 iChannel;
const ma_uint32 channels = pNoise->config.channels; const ma_uint32 channels = pNoise->config.channels;
MA_ASSUME(channels >= MA_MIN_CHANNELS && channels <= MA_MAX_CHANNELS);
if (pNoise->config.format == ma_format_f32) { if (pNoise->config.format == ma_format_f32) {
float* pFramesOutF32 = (float*)pFramesOut; float* pFramesOutF32 = (float*)pFramesOut;
...@@ -48922,6 +48932,7 @@ static MA_INLINE ma_uint64 ma_noise_read_pcm_frames__brownian(ma_noise* pNoise, ...@@ -48922,6 +48932,7 @@ static MA_INLINE ma_uint64 ma_noise_read_pcm_frames__brownian(ma_noise* pNoise,
ma_uint64 iFrame; ma_uint64 iFrame;
ma_uint32 iChannel; ma_uint32 iChannel;
const ma_uint32 channels = pNoise->config.channels; const ma_uint32 channels = pNoise->config.channels;
MA_ASSUME(channels >= MA_MIN_CHANNELS && channels <= MA_MAX_CHANNELS);
if (pNoise->config.format == ma_format_f32) { if (pNoise->config.format == ma_format_f32) {
float* pFramesOutF32 = (float*)pFramesOut; float* pFramesOutF32 = (float*)pFramesOut;
...@@ -9323,6 +9323,9 @@ MA_API ma_result ma_spatializer_process_pcm_frames(ma_spatializer* pSpatializer, ...@@ -9323,6 +9323,9 @@ MA_API ma_result ma_spatializer_process_pcm_frames(ma_spatializer* pSpatializer,
const ma_uint32 channelsOut = pSpatializer->config.channelsOut; const ma_uint32 channelsOut = pSpatializer->config.channelsOut;
const ma_uint32 channelsIn = pSpatializer->config.channelsIn; const ma_uint32 channelsIn = pSpatializer->config.channelsIn;
MA_ASSUME(channelsOut >= MA_MIN_CHANNELS && channelsOut <= MA_MAX_CHANNELS);
MA_ASSUME(channelsIn >= MA_MIN_CHANNELS && channelsIn <= MA_MAX_CHANNELS);
/* /*
We'll need the listener velocity for doppler pitch calculations. The speed of sound is We'll need the listener velocity for doppler pitch calculations. The speed of sound is
defined by the listener, so we'll grab that here too. defined by the listener, so we'll grab that here too.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment