mirror of
https://github.com/libsdl-org/SDL.git
synced 2025-10-01 15:38:29 +00:00
Allow optimizing memcpy and memset where possible
Modern C runtimes have well optimized memset and memcpy, so use those instead of dispatching into SDL's versions. In addition, some compilers can analyze memset and memcpy calls and directly turn them into optimized assembly.
This commit is contained in:
@@ -510,147 +510,6 @@ int SDL_toupper(int x) { return ((x) >= 'a') && ((x) <= 'z') ? ('A' + ((x) - 'a'
|
||||
int SDL_tolower(int x) { return ((x) >= 'A') && ((x) <= 'Z') ? ('a' + ((x) - 'A')) : (x); }
|
||||
#endif
|
||||
|
||||
/* This file contains a portable memcpy manipulation function for SDL */
|
||||
|
||||
void *SDL_memcpy(SDL_OUT_BYTECAP(len) void *dst, SDL_IN_BYTECAP(len) const void *src, size_t len)
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
/* Presumably this is well tuned for speed.
|
||||
On my machine this is twice as fast as the C code below.
|
||||
*/
|
||||
return __builtin_memcpy(dst, src, len);
|
||||
#elif defined(HAVE_MEMCPY)
|
||||
return memcpy(dst, src, len);
|
||||
#elif defined(HAVE_BCOPY)
|
||||
bcopy(src, dst, len);
|
||||
return dst;
|
||||
#else
|
||||
/* GCC 4.9.0 with -O3 will generate movaps instructions with the loop
|
||||
using Uint32* pointers, so we need to make sure the pointers are
|
||||
aligned before we loop using them.
|
||||
*/
|
||||
if (((uintptr_t)src & 0x3) || ((uintptr_t)dst & 0x3)) {
|
||||
/* Do an unaligned byte copy */
|
||||
Uint8 *srcp1 = (Uint8 *)src;
|
||||
Uint8 *dstp1 = (Uint8 *)dst;
|
||||
|
||||
while (len--) {
|
||||
*dstp1++ = *srcp1++;
|
||||
}
|
||||
} else {
|
||||
size_t left = (len % 4);
|
||||
Uint32 *srcp4, *dstp4;
|
||||
Uint8 *srcp1, *dstp1;
|
||||
|
||||
srcp4 = (Uint32 *)src;
|
||||
dstp4 = (Uint32 *)dst;
|
||||
len /= 4;
|
||||
while (len--) {
|
||||
*dstp4++ = *srcp4++;
|
||||
}
|
||||
|
||||
srcp1 = (Uint8 *)srcp4;
|
||||
dstp1 = (Uint8 *)dstp4;
|
||||
switch (left) {
|
||||
case 3:
|
||||
*dstp1++ = *srcp1++;
|
||||
case 2:
|
||||
*dstp1++ = *srcp1++;
|
||||
case 1:
|
||||
*dstp1++ = *srcp1++;
|
||||
}
|
||||
}
|
||||
return dst;
|
||||
#endif /* __GNUC__ */
|
||||
}
|
||||
|
||||
void *SDL_memset(SDL_OUT_BYTECAP(len) void *dst, int c, size_t len)
|
||||
{
|
||||
#ifdef HAVE_MEMSET
|
||||
return memset(dst, c, len);
|
||||
#else
|
||||
size_t left;
|
||||
Uint32 *dstp4;
|
||||
Uint8 *dstp1 = (Uint8 *)dst;
|
||||
Uint8 value1;
|
||||
Uint32 value4;
|
||||
|
||||
/* The value used in memset() is a byte, passed as an int */
|
||||
c &= 0xff;
|
||||
|
||||
/* The destination pointer needs to be aligned on a 4-byte boundary to
|
||||
* execute a 32-bit set. Set first bytes manually if needed until it is
|
||||
* aligned. */
|
||||
value1 = (Uint8)c;
|
||||
while ((uintptr_t)dstp1 & 0x3) {
|
||||
if (len--) {
|
||||
*dstp1++ = value1;
|
||||
} else {
|
||||
return dst;
|
||||
}
|
||||
}
|
||||
|
||||
value4 = ((Uint32)c | ((Uint32)c << 8) | ((Uint32)c << 16) | ((Uint32)c << 24));
|
||||
dstp4 = (Uint32 *)dstp1;
|
||||
left = (len % 4);
|
||||
len /= 4;
|
||||
while (len--) {
|
||||
*dstp4++ = value4;
|
||||
}
|
||||
|
||||
dstp1 = (Uint8 *)dstp4;
|
||||
switch (left) {
|
||||
case 3:
|
||||
*dstp1++ = value1;
|
||||
case 2:
|
||||
*dstp1++ = value1;
|
||||
case 1:
|
||||
*dstp1++ = value1;
|
||||
}
|
||||
|
||||
return dst;
|
||||
#endif /* HAVE_MEMSET */
|
||||
}
|
||||
|
||||
/* Note that memset() is a byte assignment and this is a 32-bit assignment, so they're not directly equivalent. */
|
||||
void *SDL_memset4(void *dst, Uint32 val, size_t dwords)
|
||||
{
|
||||
#if defined(__APPLE__) && defined(HAVE_STRING_H)
|
||||
memset_pattern4(dst, &val, dwords * 4);
|
||||
#elif defined(__GNUC__) && defined(__i386__)
|
||||
int u0, u1, u2;
|
||||
__asm__ __volatile__(
|
||||
"cld \n\t"
|
||||
"rep ; stosl \n\t"
|
||||
: "=&D"(u0), "=&a"(u1), "=&c"(u2)
|
||||
: "0"(dst), "1"(val), "2"(SDL_static_cast(Uint32, dwords))
|
||||
: "memory");
|
||||
#else
|
||||
size_t _n = (dwords + 3) / 4;
|
||||
Uint32 *_p = SDL_static_cast(Uint32 *, dst);
|
||||
Uint32 _val = (val);
|
||||
if (dwords == 0) {
|
||||
return dst;
|
||||
}
|
||||
switch (dwords % 4) {
|
||||
case 0:
|
||||
do {
|
||||
*_p++ = _val;
|
||||
SDL_FALLTHROUGH;
|
||||
case 3:
|
||||
*_p++ = _val;
|
||||
SDL_FALLTHROUGH;
|
||||
case 2:
|
||||
*_p++ = _val;
|
||||
SDL_FALLTHROUGH;
|
||||
case 1:
|
||||
*_p++ = _val;
|
||||
} while (--_n);
|
||||
}
|
||||
#endif
|
||||
return dst;
|
||||
}
|
||||
|
||||
#if defined(HAVE_CTYPE_H) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
|
||||
int SDL_isblank(int x)
|
||||
{
|
||||
|
Reference in New Issue
Block a user