From 4dc9fe3a550f66ccfca225f59c62d8abe9fc685a Mon Sep 17 00:00:00 2001 From: mykola2312 <49044616+mykola2312@users.noreply.github.com> Date: Sun, 15 May 2022 21:19:38 +0300 Subject: [PATCH] try assembly memmove, doesn't work because granularity of move --- arch/x86_64.S | 50 ++++++++++++++++++++++++++++++++++++++++++++++++-- cutil.c | 2 +- cutil.h | 4 ++-- test.c | 8 ++++---- 4 files changed, 55 insertions(+), 9 deletions(-) diff --git a/arch/x86_64.S b/arch/x86_64.S index 8f3a74f..55f7b79 100644 --- a/arch/x86_64.S +++ b/arch/x86_64.S @@ -2,6 +2,7 @@ .globl hw_bswap32 .globl hw_bswap64 .globl _cu_memcpy +.globl _cu_memmove .globl _cu_memset .globl cu_memzero .globl cu_memtest @@ -28,21 +29,66 @@ _cu_memcpy: // RDI - dst, RSI - src, RDX - size mov %rdx, %rcx shr $3, %rcx + je .cpy1 rep movsq - +.cpy1: mov %rdx, %rcx and $7, %rcx rep movsb ret +_cu_memmove: + // RDI - dst, RSI - src, RDX - size + cmp %rsi, %rdi + je .retmove + jl .lessmove +.greatermove: // dst > src + add %rdx, %rdi + add %rdx, %rsi + std + mov %rdx, %rcx + shr $3, %rcx + je .greater1_ + sub $8, %rdi + sub $8, %rsi + rep movsq +.greater1: + add $8, %rdi + add $8, %rsi + mov %rdx, %rcx + and $7, %rcx + rep movsb + cld + jmp .retmove +.greater1_: + dec %rdi + dec %rsi + mov %rdx, %rcx + and $7, %rcx + rep movsb + cld + jmp .retmove +.lessmove: // dst < src + mov %rdx, %rcx + shr $3, %rcx + je .less1 + rep movsq +.less1: + mov %rdx, %rcx + and $7, %rcx + rep movsb +.retmove: + ret + _cu_memset: // RDI - ptr, RSI - val, RDX - size mov %rdx, %rax mov %rsi, %rcx shr $3, %rcx + je .set1 rep stosq - +.set1: mov %rsi, %rcx and $7, %rcx rep stosb diff --git a/cutil.c b/cutil.c index b8170cd..0128301 100644 --- a/cutil.c +++ b/cutil.c @@ -25,7 +25,7 @@ void cutil_exit() { } -void _cu_memmove(void* dst, void* src, size_t size) +void __cu_memmove(void* dst, void* src, size_t size) { if (dst > src) { diff --git a/cutil.h b/cutil.h index c212a49..01cdc8d 100644 --- a/cutil.h +++ b/cutil.h @@ -6,6 +6,8 @@ extern void* _cu_memset(void* dst, int val, size_t size); extern void* _cu_memcpy(void* dst, const void* src, size_t size); +extern void* _cu_memmove(void* dst, void* src, size_t size); +void __cu_memmove(void* dst, void* src, size_t size); #define CUTIL_MALLOC malloc #define CUTIL_REALLOC realloc @@ -27,6 +29,4 @@ void cutil_exit(); extern const void* cu_memtest(const void* mem, uint size); extern void cu_memzero(void* dst, size_t size); -void _cu_memmove(void* dst, void* src, size_t size); - #endif \ No newline at end of file diff --git a/test.c b/test.c index f9f3556..0e63860 100644 --- a/test.c +++ b/test.c @@ -110,20 +110,20 @@ int main() char val2[] = {0,0,0,1,0,0,0,0,0,0,4,0,0}; printf("val2\tcu_memtest\t%p\n", cu_memtest(val2, sizeof(val2))); - uint val3[4] = {1, 2, 3, 4}; - uint val4[4]; + uint val3[5] = {1, 2, 3, 4, 0}; + uint val4[5]; _cu_memcpy(val4, val3, sizeof(val3)); cu_memzero(val3, sizeof(val3)); printf("cu_memzero test\t%p\n", cu_memtest(val3, sizeof(val3))); printf("val4 values\n"); - for (unsigned i = 0; i < 4; i++) + for (unsigned i = 0; i < 5; i++) printf("\t%u\n", val4[i]); printf("\n"); _cu_memmove(&val4[1], &val4[0], sizeof(uint) * 3); printf("val4 memmove ->\n"); - for (unsigned i = 0; i < 4; i++) + for (unsigned i = 0; i < 5; i++) printf("\t%u\n", val4[i]); printf("\n");