From 3229779aff7b18b4e1d1e3ae471958fa07cef7f4 Mon Sep 17 00:00:00 2001 From: mykola2312 <49044616+mykola2312@users.noreply.github.com> Date: Wed, 11 May 2022 19:51:24 +0300 Subject: [PATCH] implement x86-64 assembly-optimized functions _cu_memcpy, _cu_memset, cu_memzero --- arch/x86_64.S | 42 ++++++++++++++++++++++++++++++++++++++++++ cutil.h | 4 ++++ test.c | 11 +++++++++++ 3 files changed, 57 insertions(+) diff --git a/arch/x86_64.S b/arch/x86_64.S index 207caa0..070d0c2 100644 --- a/arch/x86_64.S +++ b/arch/x86_64.S @@ -1,6 +1,9 @@ .globl hw_bswap16 .globl hw_bswap32 .globl hw_bswap64 +.globl _cu_memcpy +.globl _cu_memset +.globl cu_memzero .globl cu_memtest .text @@ -20,6 +23,45 @@ hw_bswap64: bswap %rax ret + +_cu_memcpy: + // RDI - dst, RSI - src, RDX - size + mov %rdx, %rcx + shr $3, %rcx + rep movsq + + mov %rdx, %rcx + and $7, %rcx + rep movsb + ret + +_cu_memset: + // RDI - ptr, RSI - val, RDX - size + mov %rdx, %rax + + mov %rsi, %rcx + shr $3, %rcx + rep stosq + + mov %rsi, %rcx + and $7, %rcx + rep stosb + + ret + +cu_memzero: + xor %rax, %rax + + mov %rsi, %rcx + shr $3, %rcx + rep stosq + + mov %rsi, %rcx + and $7, %rcx + rep stosb + + ret + cu_memtest: // RDI - ptr, RSI - size xchg %rsi, %rdi diff --git a/cutil.h b/cutil.h index f6e2b43..b86838c 100644 --- a/cutil.h +++ b/cutil.h @@ -4,6 +4,9 @@ #include "cutypes.h" #include +extern void* _cu_memset(void* dst, int val, size_t size); +extern void* _cu_memcpy(void* dst, const void* src, size_t size); + #define CUTIL_MALLOC malloc #define CUTIL_REALLOC realloc #define CUTIL_FREE free @@ -22,5 +25,6 @@ void cutil_init(); void cutil_exit(); extern const void* cu_memtest(const void* mem, uint size); +extern void cu_memzero(void* dst, size_t size); #endif \ No newline at end of file diff --git a/test.c b/test.c index 78bd65d..ff11b9f 100644 --- a/test.c +++ b/test.c @@ -109,6 +109,17 @@ int main() char val2[] = {0,0,0,1,0,0,0,0,0,0,4,0,0}; printf("val2\tcu_memtest\t%p\n", cu_memtest(val2, sizeof(val2))); + uint val3[4] = {1, 2, 3, 4}; + uint val4[4]; + + _cu_memcpy(val4, val3, sizeof(val3)); + cu_memzero(val3, sizeof(val3)); + printf("cu_memzero test\t%p\n", cu_memtest(val3, sizeof(val3))); + printf("val4 values\n"); + for (unsigned i = 0; i < 4; i++) + printf("\t%u\n", val4[i]); + printf("\n"); + printf("[endian]\n"); printf("cutil_endian\t%u\n", cu_endian); printf("%x\t%x\n", 0x1234, cu_bswap16(0x1234));