Commit 7e1bdf77 authored by David Reid's avatar David Reid

Update dr_flac and dr_mp3.

parent 1f295eba
/*
FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
dr_flac - v0.12.11 - TBD
dr_flac - v0.12.11 - 2020-04-19
David Reid - mackron@gmail.com
......@@ -151,10 +151,7 @@ samples as it can, up to the amount requested. Later on when you need the next b
}
```
You can seek to a specific sample with drflac_seek_to_sample(). The given sample is based on interleaving. So for example, if you were to seek to the sample at
index 0 in a stereo stream, you'll be seeking to the first sample of the left channel. The sample at index 1 will be the first sample of the right channel. The
sample at index 2 will be the second sample of the left channel, etc.
You can seek to a specific PCM frame with `drflac_seek_to_pcm_frame()`.
If you just want to quickly decode an entire FLAC file in one go you can do something like this:
......@@ -4124,6 +4121,12 @@ static DRFLAC_INLINE void drflac__vst2q_s32(drflac_int32* p, int32x4x2_t x)
vst1q_s32(p+4, x.val[1]);
}
static DRFLAC_INLINE void drflac__vst2q_u32(drflac_uint32* p, uint32x4x2_t x)
{
vst1q_u32(p+0, x.val[0]);
vst1q_u32(p+4, x.val[1]);
}
static DRFLAC_INLINE void drflac__vst2q_f32(float* p, float32x4x2_t x)
{
vst1q_f32(p+0, x.val[0]);
......@@ -4135,6 +4138,11 @@ static DRFLAC_INLINE void drflac__vst2q_s16(drflac_int16* p, int16x4x2_t x)
vst1q_s16(p, vcombine_s16(x.val[0], x.val[1]));
}
static DRFLAC_INLINE void drflac__vst2q_u16(drflac_uint16* p, uint16x4x2_t x)
{
vst1q_u16(p, vcombine_u16(x.val[0], x.val[1]));
}
static DRFLAC_INLINE int32x4_t drflac__vdupq_n_s32x4(drflac_int32 x3, drflac_int32 x2, drflac_int32 x1, drflac_int32 x0)
{
drflac_int32 x[4];
......@@ -8716,15 +8724,15 @@ static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__neon(drf
shift1_4 = vdupq_n_s32(shift1);
for (i = 0; i < frameCount4; ++i) {
int32x4_t left;
int32x4_t side;
int32x4_t right;
uint32x4_t left;
uint32x4_t side;
uint32x4_t right;
left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4);
side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4);
right = vsubq_s32(left, side);
left = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
right = vsubq_u32(left, side);
drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
drflac__vst2q_u32((drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right));
}
for (i = (frameCount4 << 2); i < frameCount; ++i) {
......@@ -8870,15 +8878,15 @@ static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__neon(dr
shift1_4 = vdupq_n_s32(shift1);
for (i = 0; i < frameCount4; ++i) {
int32x4_t side;
int32x4_t right;
int32x4_t left;
uint32x4_t side;
uint32x4_t right;
uint32x4_t left;
side = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4);
right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4);
left = vaddq_s32(right, side);
side = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
left = vaddq_u32(right, side);
drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
drflac__vst2q_u32((drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right));
}
for (i = (frameCount4 << 2); i < frameCount; ++i) {
......@@ -9121,30 +9129,30 @@ static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__neon(drfl
const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
drflac_int32 shift = unusedBitsPerSample;
int32x4_t wbpsShift0_4; /* wbps = Wasted Bits Per Sample */
int32x4_t wbpsShift1_4; /* wbps = Wasted Bits Per Sample */
int32x4_t one4;
int32x4_t wbpsShift0_4; /* wbps = Wasted Bits Per Sample */
int32x4_t wbpsShift1_4; /* wbps = Wasted Bits Per Sample */
uint32x4_t one4;
DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
one4 = vdupq_n_s32(1);
one4 = vdupq_n_u32(1);
if (shift == 0) {
for (i = 0; i < frameCount4; ++i) {
int32x4_t mid;
int32x4_t side;
uint32x4_t mid;
uint32x4_t side;
int32x4_t left;
int32x4_t right;
mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbpsShift0_4);
side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbpsShift1_4);
mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, one4));
mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4));
left = vshrq_n_s32(vaddq_s32(mid, side), 1);
right = vshrq_n_s32(vsubq_s32(mid, side), 1);
left = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
}
......@@ -9165,18 +9173,18 @@ static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__neon(drfl
shift4 = vdupq_n_s32(shift);
for (i = 0; i < frameCount4; ++i) {
int32x4_t mid;
int32x4_t side;
uint32x4_t mid;
uint32x4_t side;
int32x4_t left;
int32x4_t right;
mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbpsShift0_4);
side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbpsShift1_4);
mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, one4));
mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4));
left = vshlq_s32(vaddq_s32(mid, side), shift4);
right = vshlq_s32(vsubq_s32(mid, side), shift4);
left = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
}
......@@ -9304,8 +9312,8 @@ static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo_
int32x4_t left;
int32x4_t right;
left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift4_0);
right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift4_1);
left = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift4_0));
right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift4_1));
drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
}
......@@ -9552,18 +9560,18 @@ static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__neon(drf
shift1_4 = vdupq_n_s32(shift1);
for (i = 0; i < frameCount4; ++i) {
int32x4_t left;
int32x4_t side;
int32x4_t right;
uint32x4_t left;
uint32x4_t side;
uint32x4_t right;
left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4);
side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4);
right = vsubq_s32(left, side);
left = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
right = vsubq_u32(left, side);
left = vshrq_n_s32(left, 16);
right = vshrq_n_s32(right, 16);
left = vshrq_n_u32(left, 16);
right = vshrq_n_u32(right, 16);
drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
drflac__vst2q_u16((drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right)));
}
for (i = (frameCount4 << 2); i < frameCount; ++i) {
......@@ -9733,18 +9741,18 @@ static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__neon(dr
shift1_4 = vdupq_n_s32(shift1);
for (i = 0; i < frameCount4; ++i) {
int32x4_t side;
int32x4_t right;
int32x4_t left;
uint32x4_t side;
uint32x4_t right;
uint32x4_t left;
side = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4);
right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4);
left = vaddq_s32(right, side);
side = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
left = vaddq_u32(right, side);
left = vshrq_n_s32(left, 16);
right = vshrq_n_s32(right, 16);
left = vshrq_n_u32(left, 16);
right = vshrq_n_u32(right, 16);
drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
drflac__vst2q_u16((drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right)));
}
for (i = (frameCount4 << 2); i < frameCount; ++i) {
......@@ -10024,18 +10032,18 @@ static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__neon(drfl
if (shift == 0) {
for (i = 0; i < frameCount4; ++i) {
int32x4_t mid;
int32x4_t side;
uint32x4_t mid;
uint32x4_t side;
int32x4_t left;
int32x4_t right;
mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbpsShift0_4);
side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbpsShift1_4);
mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, vdupq_n_s32(1)));
mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
left = vshrq_n_s32(vaddq_s32(mid, side), 1);
right = vshrq_n_s32(vsubq_s32(mid, side), 1);
left = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
left = vshrq_n_s32(left, 16);
right = vshrq_n_s32(right, 16);
......@@ -10059,18 +10067,18 @@ static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__neon(drfl
shift4 = vdupq_n_s32(shift);
for (i = 0; i < frameCount4; ++i) {
int32x4_t mid;
int32x4_t side;
uint32x4_t mid;
uint32x4_t side;
int32x4_t left;
int32x4_t right;
mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbpsShift0_4);
side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbpsShift1_4);
mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, vdupq_n_s32(1)));
mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
left = vshlq_s32(vaddq_s32(mid, side), shift4);
right = vshlq_s32(vsubq_s32(mid, side), shift4);
left = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
left = vshrq_n_s32(left, 16);
right = vshrq_n_s32(right, 16);
......@@ -10214,8 +10222,8 @@ static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo_
int32x4_t left;
int32x4_t right;
left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4);
right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4);
left = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4));
right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4));
left = vshrq_n_s32(left, 16);
right = vshrq_n_s32(right, 16);
......@@ -10453,17 +10461,17 @@ static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__neon(drf
shift1_4 = vdupq_n_s32(shift1);
for (i = 0; i < frameCount4; ++i) {
int32x4_t left;
int32x4_t side;
int32x4_t right;
uint32x4_t left;
uint32x4_t side;
uint32x4_t right;
float32x4_t leftf;
float32x4_t rightf;
left = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4);
side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4);
right = vsubq_s32(left, side);
leftf = vmulq_f32(vcvtq_f32_s32(left), factor4);
rightf = vmulq_f32(vcvtq_f32_s32(right), factor4);
left = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
right = vsubq_u32(left, side);
leftf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)), factor4);
rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4);
drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
}
......@@ -10619,17 +10627,17 @@ static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__neon(dr
shift1_4 = vdupq_n_s32(shift1);
for (i = 0; i < frameCount4; ++i) {
int32x4_t side;
int32x4_t right;
int32x4_t left;
uint32x4_t side;
uint32x4_t right;
uint32x4_t left;
float32x4_t leftf;
float32x4_t rightf;
side = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4);
right = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4);
left = vaddq_s32(right, side);
leftf = vmulq_f32(vcvtq_f32_s32(left), factor4);
rightf = vmulq_f32(vcvtq_f32_s32(right), factor4);
side = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
left = vaddq_u32(right, side);
leftf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)), factor4);
rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4);
drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
}
......@@ -10910,13 +10918,13 @@ static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__neon(drfl
float32x4_t leftf;
float32x4_t rightf;
int32x4_t mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbps0_4);
int32x4_t side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbps1_4);
uint32x4_t mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4);
uint32x4_t side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4);
mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, vdupq_n_s32(1)));
mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
lefti = vshrq_n_s32(vaddq_s32(mid, side), 1);
righti = vshrq_n_s32(vsubq_s32(mid, side), 1);
lefti = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
righti = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4);
rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
......@@ -10937,20 +10945,20 @@ static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__neon(drfl
shift -= 1;
shift4 = vdupq_n_s32(shift);
for (i = 0; i < frameCount4; ++i) {
int32x4_t mid;
int32x4_t side;
uint32x4_t mid;
uint32x4_t side;
int32x4_t lefti;
int32x4_t righti;
float32x4_t leftf;
float32x4_t rightf;
mid = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), wbps0_4);
side = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), wbps1_4);
mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4);
side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4);
mid = vorrq_s32(vshlq_n_s32(mid, 1), vandq_s32(side, vdupq_n_s32(1)));
mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
lefti = vshlq_s32(vaddq_s32(mid, side), shift4);
righti = vshlq_s32(vsubq_s32(mid, side), shift4);
lefti = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
righti = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4);
rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
......@@ -11096,8 +11104,8 @@ static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo_
float32x4_t leftf;
float32x4_t rightf;
lefti = vshlq_s32(vld1q_s32(pInputSamples0 + i*4), shift0_4);
righti = vshlq_s32(vld1q_s32(pInputSamples1 + i*4), shift1_4);
lefti = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4));
righti = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4));
leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4);
rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
......@@ -11697,7 +11705,7 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
/*
REVISION HISTORY
================
v0.12.11 - TBD
v0.12.11 - 2020-04-19
- Fix some pedantic warnings.
- Fix some undefined behaviour warnings.
......
/*
MP3 audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
dr_mp3 - v0.6.3 - 2020-04-13
dr_mp3 - v0.6.4 - 2020-04-19
David Reid - mackron@gmail.com
......@@ -669,6 +669,17 @@ static int drmp3_have_simd(void)
#endif
#if defined(__ARM_ARCH) && (__ARM_ARCH >= 6) && !defined(__aarch64__)
#define DRMP3_HAVE_ARMV6 1
static __inline__ __attribute__((always_inline)) drmp32_int32 drmp3_clip_int16_arm(int32_t a)
{
drmp3_int32 x = 0;
__asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a));
return x;
}
#endif
typedef struct
{
const drmp3_uint8 *buf;
......@@ -1888,11 +1899,17 @@ typedef drmp3_int16 drmp3d_sample_t;
static drmp3_int16 drmp3d_scale_pcm(float sample)
{
drmp3_int16 s;
#if DRMP3_HAVE_ARMV6
drmp3_int32 s32 = (drmp3_int32)(sample + .5f);
s32 -= (s32 < 0);
s = (drmp3_int16)drmp3_clip_int16_arm(s32);
#else
if (sample >= 32766.5) return (drmp3_int16) 32767;
if (sample <= -32767.5) return (drmp3_int16)-32768;
s = (drmp3_int16)(sample + .5f);
s -= (s < 0); /* away from zero, to be compliant */
return (drmp3_int16)s;
#endif
return s;
}
#else
typedef float drmp3d_sample_t;
......@@ -4343,6 +4360,9 @@ counts rather than sample counts.
/*
REVISION HISTORY
================
v0.6.4 - 2020-04-19
- Bring up to date with changes to minimp3.
v0.6.3 - 2020-04-13
- Fix some pedantic warnings.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment