Optimized SDL_Convert_*_to_*_Scalar

They are now all branch-less and avoid float multiplication/conversion where possible
This commit is contained in:
Brick
2023-08-13 14:36:48 +01:00
committed by Ryan C. Gordon
parent f97b920b31
commit c6c1e673c0

View File

@@ -39,50 +39,155 @@
#define NEED_SCALAR_CONVERTER_FALLBACKS 1
#endif
#define DIVBY128 0.0078125f
#define DIVBY32768 0.000030517578125f
#define DIVBY8388607 0.00000011920930376163766f
#if NEED_SCALAR_CONVERTER_FALLBACKS
/* these all convert backwards because (currently) float32 is >= to the size of anything it converts to, so it lets us safely convert in-place. */
#define AUDIOCVT_TOFLOAT_SCALAR(from, fromtype, equation) \
static void SDL_Convert_##from##_to_F32_Scalar(float *dst, const fromtype *src, int num_samples) { \
int i; \
LOG_DEBUG_AUDIO_CONVERT(#from, "F32"); \
for (i = num_samples - 1; i >= 0; --i) { \
dst[i] = equation; \
} \
/* This code requires that floats are in the IEEE-754 binary32 format */
SDL_COMPILE_TIME_ASSERT(float_bits, sizeof(float) == sizeof(Uint32));
union float_bits {
Uint32 u32;
float f32;
};
static void SDL_Convert_S8_to_F32_Scalar(float *dst, const Sint8 *src, int num_samples)
{
int i;
LOG_DEBUG_AUDIO_CONVERT("S8", "F32");
for (i = num_samples - 1; i >= 0; --i) {
/* 1) Construct a float in the range [65536.0, 65538.0)
* 2) Shift the float range to [-1.0, 1.0) */
union float_bits x = { .u32 = (Uint8)src[i] ^ 0x47800080u };
dst[i] = x.f32 - 65537.0f;
}
}
AUDIOCVT_TOFLOAT_SCALAR(S8, Sint8, ((float)src[i]) * DIVBY128)
AUDIOCVT_TOFLOAT_SCALAR(U8, Uint8, (((float)src[i]) * DIVBY128) - 1.0f)
AUDIOCVT_TOFLOAT_SCALAR(S16, Sint16, ((float)src[i]) * DIVBY32768)
AUDIOCVT_TOFLOAT_SCALAR(S32, Sint32, ((float)(src[i] >> 8)) * DIVBY8388607)
#undef AUDIOCVT_FROMFLOAT_SCALAR
static void SDL_Convert_U8_to_F32_Scalar(float *dst, const Uint8 *src, int num_samples)
{
int i;
/* these all convert forwards because (currently) float32 is >= to the size of anything it converts from, so it lets us safely convert in-place. */
#define AUDIOCVT_FROMFLOAT_SCALAR(to, totype, clampmin, clampmax, equation) \
static void SDL_Convert_F32_to_##to##_Scalar(totype *dst, const float *src, int num_samples) { \
int i; \
LOG_DEBUG_AUDIO_CONVERT("F32", #to); \
for (i = 0; i < num_samples; i++) { \
const float sample = src[i]; \
if (sample >= 1.0f) { \
dst[i] = (totype) (clampmax); \
} else if (sample <= -1.0f) { \
dst[i] = (totype) (clampmin); \
} else { \
dst[i] = (totype) (equation); \
} \
} \
LOG_DEBUG_AUDIO_CONVERT("U8", "F32");
for (i = num_samples - 1; i >= 0; --i) {
/* 1) Construct a float in the range [65536.0, 65538.0)
* 2) Shift the float range to [-1.0, 1.0) */
union float_bits x = { .u32 = (Uint8)src[i] ^ 0x47800000u };
dst[i] = x.f32 - 65537.0f;
}
}
AUDIOCVT_FROMFLOAT_SCALAR(S8, Sint8, -128, 127, sample * 127.0f);
AUDIOCVT_FROMFLOAT_SCALAR(U8, Uint8, 0, 255, (sample + 1.0f) * 127.0f);
AUDIOCVT_FROMFLOAT_SCALAR(S16, Sint16, -32768, 32767, sample * 32767.0f);
AUDIOCVT_FROMFLOAT_SCALAR(S32, Sint32, -2147483648LL, 2147483647, ((Sint32)(sample * 8388607.0f)) << 8);
#undef AUDIOCVT_FROMFLOAT_SCALAR
static void SDL_Convert_S16_to_F32_Scalar(float *dst, const Sint16 *src, int num_samples)
{
int i;
LOG_DEBUG_AUDIO_CONVERT("S16", "F32");
for (i = num_samples - 1; i >= 0; --i) {
/* 1) Construct a float in the range [256.0, 258.0)
* 2) Shift the float range to [-1.0, 1.0) */
union float_bits x = { .u32 = (Uint16)src[i] ^ 0x43808000u };
dst[i] = x.f32 - 257.0f;
}
}
static void SDL_Convert_S32_to_F32_Scalar(float *dst, const Sint32 *src, int num_samples)
{
int i;
LOG_DEBUG_AUDIO_CONVERT("S32", "F32");
for (i = num_samples - 1; i >= 0; --i) {
dst[i] = (float)src[i] * 0x1p-31f;
}
}
/* Create a bit-mask based on the sign-bit. Should optimize to a single arithmetic-shift-right */
#define SIGNMASK(x) (Uint32)(0u - ((Uint32)(x) >> 31))
static void SDL_Convert_F32_to_S8_Scalar(Sint8 *dst, const float *src, int num_samples)
{
int i;
LOG_DEBUG_AUDIO_CONVERT("F32", "S8");
for (i = 0; i < num_samples; ++i) {
/* 1) Shift the float range from [-1.0, 1.0] to [98303.0, 98305.0]
* 2) Shift the integer range from [0x47BFFF80, 0x47C00080] to [-128, 128]
* 3) Clamp the value to [-128, 127] */
union float_bits x = { .f32 = src[i] + 98304.0f };
Uint32 y = x.u32 - 0x47C00000u;
Uint32 z = 0x7Fu - (y ^ SIGNMASK(y));
y = y ^ (z & SIGNMASK(z));
dst[i] = (Sint8)(y & 0xFF);
}
}
static void SDL_Convert_F32_to_U8_Scalar(Uint8 *dst, const float *src, int num_samples)
{
int i;
LOG_DEBUG_AUDIO_CONVERT("F32", "U8");
for (i = 0; i < num_samples; ++i) {
union float_bits x = { .f32 = src[i] + 98304.0f };
/* 1) Shift the float range from [-1.0, 1.0] to [98303.0, 98305.0]
* 2) Shift the integer range from [0x47BFFF80, 0x47C00080] to [-128, 128]
* 3) Clamp the value to [-128, 127]
* 4) Shift the integer range from [-128, 127] to [0, 255] */
Uint32 y = x.u32 - 0x47C00000u;
Uint32 z = 0x7Fu - (y ^ SIGNMASK(y));
y = (y ^ 0x80u) ^ (z & SIGNMASK(z));
dst[i] = (Uint8)(y & 0xFF);
}
}
static void SDL_Convert_F32_to_S16_Scalar(Sint16 *dst, const float *src, int num_samples)
{
int i;
LOG_DEBUG_AUDIO_CONVERT("F32", "S16");
for (i = 0; i < num_samples; ++i) {
/* 1) Shift the float range from [-1.0, 1.0] to [383.0, 385.0]
* 2) Shift the integer range from [0x43BF8000, 0x43C08000] to [-32768, 32768]
* 3) Clamp values outside the [-32768, 32767] range */
union float_bits x = { .f32 = src[i] + 384.0f };
Uint32 y = x.u32 - 0x43C00000u;
Uint32 z = 0x7FFFu - (y ^ SIGNMASK(y));
y = y ^ (z & SIGNMASK(z));
dst[i] = (Sint16)(y & 0xFFFF);
}
}
static void SDL_Convert_F32_to_S32_Scalar(Sint32 *dst, const float *src, int num_samples)
{
int i;
LOG_DEBUG_AUDIO_CONVERT("F32", "S32");
for (i = 0; i < num_samples; ++i) {
union float_bits x = { .f32 = src[i] };
/* 1) Shift the float range from [-1.0, 1.0] to [-2147483648.0, 2147483648.0]
* 2) Set values outside the [-2147483648.0, 2147483647.0] range to -2147483648.0
* 3) Convert the float to an integer, and fixup values outside the valid range */
Uint32 y = x.u32 + 0x0F800000u;
Uint32 z = y - 0xCF000000u;
z &= SIGNMASK(y ^ z);
x.u32 = y - z;
dst[i] = (Sint32)((Uint32)(Sint32)x.f32 ^ SIGNMASK(z));
}
}
#undef SIGNMASK
#endif /* NEED_SCALAR_CONVERTER_FALLBACKS */
@@ -426,6 +531,10 @@ static void SDL_TARGETING("sse2") SDL_Convert_F32_to_S32_SSE2(Sint32 *dst, const
#endif
#ifdef SDL_NEON_INTRINSICS
#define DIVBY128 0x1p-7f
#define DIVBY32768 0x1p-15f
#define DIVBY8388607 0x1.000002p-23f
static void SDL_Convert_S8_to_F32_NEON(float *dst, const Sint8 *src, int num_samples)
{
int i;