mirror of
https://github.com/ARM-software/arm-trusted-firmware.git
synced 2025-04-23 21:44:15 +00:00

Currently our memset() implementation is safe, but slow. The main reason for that seems to be the single byte writes that it issues, which can show horrible performance, depending on the implementation of the load/store subsystem. Improve the algorithm by trying to issue 64-bit writes. As this only works with aligned pointers, have a head and a tail section which covers unaligned pointers, and leave the bulk of the work to the middle section that does use 64-bit writes. Put through some unit tests, which exercise all combinations of nasty input parameters (pointers with various alignments, various odd and even sizes, corner cases of content to write (-1, 256)). Change-Id: I28ddd3d388cc4989030f1a70447581985368d5bb Signed-off-by: Andre Przywara <andre.przywara@arm.com>
48 lines
966 B
C
48 lines
966 B
C
/*
|
|
* Copyright (c) 2013-2020, ARM Limited and Contributors. All rights reserved.
|
|
*
|
|
* SPDX-License-Identifier: BSD-3-Clause
|
|
*/
|
|
|
|
#include <stddef.h>
|
|
#include <string.h>
|
|
#include <stdint.h>
|
|
|
|
void *memset(void *dst, int val, size_t count)
|
|
{
|
|
char *ptr = dst;
|
|
uint64_t *ptr64;
|
|
uint64_t fill = (unsigned char)val;
|
|
|
|
/* Simplify code below by making sure we write at least one byte. */
|
|
if (count == 0) {
|
|
return dst;
|
|
}
|
|
|
|
/* Handle the first part, until the pointer becomes 64-bit aligned. */
|
|
while (((uintptr_t)ptr & 7)) {
|
|
*ptr++ = val;
|
|
if (--count == 0) {
|
|
return dst;
|
|
}
|
|
}
|
|
|
|
/* Duplicate the fill byte to the rest of the 64-bit word. */
|
|
fill |= fill << 8;
|
|
fill |= fill << 16;
|
|
fill |= fill << 32;
|
|
|
|
/* Use 64-bit writes for as long as possible. */
|
|
ptr64 = (void *)ptr;
|
|
for (; count >= 8; count -= 8) {
|
|
*ptr64++ = fill;
|
|
}
|
|
|
|
/* Handle the remaining part byte-per-byte. */
|
|
ptr = (void *)ptr64;
|
|
while (count--) {
|
|
*ptr++ = val;
|
|
}
|
|
|
|
return dst;
|
|
}
|