From 44112dc4a79452999f80c1e8491a5093fc56d2e9 Mon Sep 17 00:00:00 2001 From: mykola2312 <49044616+mykola2312@users.noreply.github.com> Date: Sun, 25 Aug 2024 13:10:11 +0300 Subject: [PATCH] implement program and section header parsing --- src/relf/Makefile | 2 +- src/relf/relf.c | 168 ++++++++++++++++++++++++++++++++++++++---- src/relf/relf.h | 27 +++++-- src/relf/relf_debug.h | 28 +++++++ 4 files changed, 206 insertions(+), 19 deletions(-) create mode 100644 src/relf/relf_debug.h diff --git a/src/relf/Makefile b/src/relf/Makefile index a0ecdf2..9c20c9b 100644 --- a/src/relf/Makefile +++ b/src/relf/Makefile @@ -6,7 +6,7 @@ LDFLAGS = -z noexecstack SRC = relf.c OBJ := $(addprefix $(OBJ_DIR)/,$(patsubst %.s,%.o,$(patsubst %.c,%.o,$(SRC)))) -DEPS = relf.h +DEPS = relf.h relf_debug.h $(OBJ_DIR)/%.o: %.c @mkdir -p $(OBJ_DIR) diff --git a/src/relf/relf.c b/src/relf/relf.c index b682613..69f1842 100644 --- a/src/relf/relf.c +++ b/src/relf/relf.c @@ -1,4 +1,5 @@ #include "relf/relf.h" +#include "relf/relf_debug.h" #include "blackjack/debug.h" #include #include @@ -6,8 +7,28 @@ #include #include #include +#include #include +// returns 1 if size not suitable for 32 bit host +static int check_32bit_limit(off_t st_size) +{ +#if __x86_64__ + return 0; +#else + // on 32bit hosts we need to check if 64bit file size + // does not exceed 32bit size_t, which mmap/munmap uses + return (st_size > SIZE_MAX); +#endif +} + +static void relf_unmap(relf_t* relf) +{ + munmap(relf->image, relf->image_size); + relf->image = NULL; + relf->image_size = 0; +} + relf_value_t relf_open(relf_t* relf, const char* path) { // reset struct @@ -17,20 +38,19 @@ relf_value_t relf_open(relf_t* relf, const char* path) struct stat st = {0}; if (stat(path, &st)) return RELF_ERROR(RELF_FAILED_OPEN); - - TRACE("st_size %lu\n", st.st_size); + if (check_32bit_limit(st.st_size)) + return RELF_ERROR(RELF_TOO_BIG); + + relf->image_size = (size_t)st.st_size; + TRACE("image_size %lu\n", relf->image_size); // open file and read ELF header int fd = open(path, O_RDONLY); if (fd < 0) return RELF_ERROR(RELF_FAILED_OPEN); - union { - Elf64_Ehdr hdr64; - Elf32_Ehdr hdr32; - } e; - - // read biggest value by default - if (read(fd, &e.hdr64, sizeof(e.hdr64)) < sizeof(e.hdr64)) + // read ELF's ident header, which contains magic and type + uint8_t e_ident[EI_NIDENT]; + if (read(fd, e_ident, EI_NIDENT) < EI_NIDENT) { close(fd); return RELF_ERROR(RELF_FAILED_OPEN); @@ -38,7 +58,7 @@ relf_value_t relf_open(relf_t* relf, const char* path) // check magic and decide ELF type // we operate here ELF64 variant since it same as in ELF32 - if (!memcmp(e.hdr64.e_ident, ELFMAG, sizeof(ELFMAG))) + if (!memcmp(e_ident, ELFMAG, sizeof(ELFMAG))) { // not an ELF file at all close(fd); @@ -46,7 +66,7 @@ relf_value_t relf_open(relf_t* relf, const char* path) } // 32 bit or 64 bit - switch (e.hdr64.e_ident[EI_CLASS]) + switch (e_ident[EI_CLASS]) { case ELFCLASS32: relf->type = RELF_32BIT; break; case ELFCLASS64: relf->type = RELF_64BIT; break; @@ -55,7 +75,7 @@ relf_value_t relf_open(relf_t* relf, const char* path) return RELF_ERROR(RELF_UNSUPPORTED); } - if (e.hdr64.e_ident[EI_DATA] != ELFDATA2LSB) + if (e_ident[EI_DATA] != ELFDATA2LSB) { // not little endian, we can't work with that close(fd); @@ -65,7 +85,7 @@ relf_value_t relf_open(relf_t* relf, const char* path) // we don't care about ABI, OS, machine type or ELF type, // as long as we got little endian we're good to go, so // let's map file to memory - relf->image = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + relf->image = mmap(NULL, relf->image_size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); // we can close file after mmap // but still check for errors if (relf->image == MAP_FAILED) @@ -75,6 +95,128 @@ relf_value_t relf_open(relf_t* relf, const char* path) } // now we need to parse segments and section headers + + // get segment and section numbers + if (relf->type == RELF_64BIT) + { + Elf64_Ehdr* elf = (Elf64_Ehdr*)relf->image; + relf->segment_num = elf->e_phnum; + relf->section_num = elf->e_shnum; + } + else + { + Elf32_Ehdr* elf = (Elf32_Ehdr*)relf->image; + relf->segment_num = elf->e_phnum; + relf->section_num = elf->e_shnum; + } + TRACE("segment_num %u section_num %u\n", relf->segment_num, relf->section_num); + + if (relf->segment_num) + relf->segments = (relf_segment_t*)calloc(relf->segment_num, sizeof(relf_segment_t)); + if (relf->section_num) + relf->sections = (relf_section_t*)calloc(relf->section_num, sizeof(relf_section_t)); + + // load segment info + if (relf->type == RELF_64BIT) + { + Elf64_Ehdr* elf = (Elf64_Ehdr*)relf->image; + for (unsigned i = 0; i < relf->segment_num; i++) + { + const Elf64_Phdr* hdr = (const Elf64_Phdr*) + ((uint8_t*)relf->image + elf->e_phoff + elf->e_phentsize * i); + relf_segment_t* segment = &relf->segments[i]; + + segment->type = hdr->p_type; + segment->flags = hdr->p_flags; + + segment->f_offset = hdr->p_offset; + segment->f_size = hdr->p_filesz; + + segment->v_addr = hdr->p_vaddr; + segment->v_size = hdr->p_memsz; + + TRACE_SEGMENT(segment); + } + } + else + { + Elf32_Ehdr* elf = (Elf32_Ehdr*)relf->image; + for (unsigned i = 0; i < relf->segment_num; i++) + { + const Elf32_Phdr* hdr = (const Elf32_Phdr*) + ((uint8_t*)relf->image + elf->e_phoff + elf->e_phentsize * i); + relf_segment_t* segment = &relf->segments[i]; + + segment->type = hdr->p_type; + segment->flags = hdr->p_flags; + + segment->f_offset = hdr->p_offset; + segment->f_size = hdr->p_filesz; + + segment->v_addr = hdr->p_vaddr; + segment->v_size = hdr->p_memsz; + + TRACE_SEGMENT(segment); + } + } + + // load section info + if (relf->type == RELF_64BIT) + { + Elf64_Ehdr* elf = (Elf64_Ehdr*)relf->image; + for (unsigned i = 0; i < relf->section_num; i++) + { + const Elf64_Shdr* hdr = (const Elf64_Shdr*) + ((uint8_t*)relf->image + elf->e_shoff + elf->e_shentsize * i); + relf_section_t* section = &relf->sections[i]; + + section->type = hdr->sh_type; + section->flags = hdr->sh_flags; + + // we will resolve names when string table is resolved + section->name = NULL; + + section->f_offset = hdr->sh_offset; + section->f_size = hdr->sh_size; + + section->v_addr = hdr->sh_addr; + + section->link = hdr->sh_link; + section->info = hdr->sh_info; + + section->entsize = hdr->sh_entsize; + + TRACE_SECTION(section); + } + } + else + { + Elf32_Ehdr* elf = (Elf32_Ehdr*)relf->image; + for (unsigned i = 0; i < relf->section_num; i++) + { + const Elf32_Shdr* hdr = (const Elf32_Shdr*) + ((uint8_t*)relf->image + elf->e_shoff + elf->e_shentsize * i); + relf_section_t* section = &relf->sections[i]; + + section->type = hdr->sh_type; + section->flags = hdr->sh_flags; + + // we will resolve names when string table is resolved + section->name = NULL; + + section->f_offset = hdr->sh_offset; + section->f_size = hdr->sh_size; + + section->v_addr = hdr->sh_addr; + + section->link = hdr->sh_link; + section->info = hdr->sh_info; + + section->entsize = hdr->sh_entsize; + + TRACE_SECTION(section); + } + } return RELF_ERROR(RELF_OK); } diff --git a/src/relf/relf.h b/src/relf/relf.h index 2111cd4..e056c19 100644 --- a/src/relf/relf.h +++ b/src/relf/relf.h @@ -2,12 +2,14 @@ #define __RELF_H #include +#include // composite error type typedef enum { - RELF_MMAP_FAILED = -4, // file memory mapping failed - RELF_UNSUPPORTED = -3, // big endian or not x86/x86-64 architecture - RELF_NOT_AN_ELF = -2, // wrong magic + RELF_MMAP_FAILED = -5, // file memory mapping failed + RELF_UNSUPPORTED = -4, // big endian or not x86/x86-64 architecture + RELF_NOT_AN_ELF = -3, // wrong magic + RELF_TOO_BIG = -2, // file is over size_t limit RELF_FAILED_OPEN = -1, // failed to stat or open file RELF_OK = 0, } relf_error_t; @@ -26,8 +28,8 @@ typedef union { #define RELF_ERROR(e) ((relf_value_t) {.error = e}) typedef enum { - RELF_64BIT, - RELF_32BIT + RELF_32BIT, + RELF_64BIT } relf_type_t; // we're using our own structures so parsing @@ -60,14 +62,29 @@ typedef struct { uint64_t f_offset; uint64_t f_size; + + uint64_t v_addr; + + uint32_t link; + uint32_t info; + + // for symbol table will tell size of symbol entry + uint64_t entsize; } relf_section_t; // relf instance typedef struct { void* image; + size_t image_size; // is it 64 or 32 bit mode relf_type_t type; + + unsigned segment_num; + unsigned section_num; + + relf_segment_t* segments; + relf_section_t* sections; } relf_t; // opens ELF file, checks ELF magic and maps it into memory diff --git a/src/relf/relf_debug.h b/src/relf/relf_debug.h new file mode 100644 index 0000000..05607f8 --- /dev/null +++ b/src/relf/relf_debug.h @@ -0,0 +1,28 @@ +#ifndef __RELF_DEBUG_H +#define __RELF_DEBUG_H + +#include "blackjack/debug.h" + +#ifdef DEBUG +#define TRACE_SEGMENT(segment) \ + TRACE("segment type %u flags %u f_offset 0x%lx f_size %lu v_addr 0x%lx v_size %lu\n", \ + segment->type, segment->flags, \ + segment->f_offset, segment->f_size, \ + segment->v_addr, segment->v_size \ + ) + +#define TRACE_SECTION(section) \ + TRACE("section type %x flags %x name %s f_offset 0x%lx f_size %lu v_addr 0x%lx link %x info %x entsize %lu\n", \ + section->type, section->flags, \ + section->name, \ + section->f_offset, section->f_size, \ + section->v_addr, \ + section->link, section->info, \ + section->entsize \ + ) +#else +#define TRACE_SEGMENT(segment) +#define TRACE_SECTION(section) +#endif + +#endif \ No newline at end of file