implement opcode matching as well as opreg

2024-08-16 12:21:58 +03:00 · 2024-08-16 12:21:58 +03:00 · 64f44d9a44
commit 64f44d9a44
parent b636bc8891
2 changed files with 40 additions and 7 deletions
--- a/include/rtdisasm.h
+++ b/include/rtdisasm.h
@ -6,7 +6,7 @@
 // code should point to place with machine instructions, and size
 // limits the area of analyze, so no segfaults would be triggered on
 // page boundaries.
-// returns -1 when no instruction was found, 0 when size limit reached
+// returns 0 when no instruction was found, -1 when size limit reached
 // and non-negative-non-zero number of actual instruction size
 int rtdisasm_analyze_single(const uint8_t* code, uint8_t size);
--- a/src/rtdisasm.c
+++ b/src/rtdisasm.c
@ -80,7 +80,10 @@ static int test_rex_prefix(const uint8_t rex)
    return -1;
 }
-const instruction_t* find_instruction(const uint8_t* cur, unsigned type, int vex, int rex)
+// so we can ignore register encoded in opcode
 #define OPREG_MASK      0b11111000
 static const instruction_t* find_instruction(const uint8_t* cur, unsigned type, int vex, int rex)
 {
    for (unsigned i = 0; i < rtdisasm_table_len; i++)
    {
@ -95,11 +98,32 @@ const instruction_t* find_instruction(const uint8_t* cur, unsigned type, int vex
            continue;
        }
-        // compare opcodes
+        if (ins->config.has_modrm)
        if (memcmp(cur, &ins->opcode, ins->opcode_len))
        {
-            // opcodes don't match up, skip
+            // instruction encoding employs register embedded into last opcode byte
-            continue;
+            // so we need to apply bit mask
            // plain means opcode bytes that are not affected
            // by opcode register encoding
            uint16_t plain_len = ins->opcode_len - 1;
            if (plain_len)
            {
                if (memcmp(cur, &ins->opcode, plain_len))
                    continue;
            }
            // now let's match the opreg encoded byte
            if (cur[plain_len] & OPREG_MASK != ins->opcode[plain_len])
                continue;
        }
        else
        {
            // just compare opcodes
            if (memcmp(cur, &ins->opcode, ins->opcode_len))
            {
                // opcodes don't match up, skip
                continue;
            }
        }
        // for now, everything looks good, so that's our instruction
@ -113,7 +137,7 @@ int rtdisasm_analyze_single(const uint8_t* code, uint8_t size)
 {
    const uint8_t* cur = code;
    const uint8_t* const end = code + size;
-    if (cur == end) return 0;
+    if (cur == end) return -1;
    // skip standard prefixes
    while (is_std_prefix(*cur))
@ -143,4 +167,13 @@ int rtdisasm_analyze_single(const uint8_t* code, uint8_t size)
    }
    const instruction_t* ins = find_instruction(cur, type, vex, rex);
    if (!ins) return 0; // no instruction
    // since we now instruction, we need advance past opcode bytes
    cur += ins->opcode_len;
    if (cur >= end) return -1;
    // if instruction has ModRM, we need to analyze it,
    // since it can lead to SIB byte
    // if (ins->config.has_modrm)
 }