294 lines
8.1 KiB
C
294 lines
8.1 KiB
C
#include "rtdisasm.h"
|
|
#include "rtdisasm_table.h"
|
|
#include <string.h>
|
|
|
|
// prefix definitions. must be declared with macro in order
|
|
// to be readable later in prefix table
|
|
|
|
#define PREFIX_LOCK 0xF0
|
|
#define PREFIX_REPNZ 0xF2 // also BND prefix
|
|
#define PREFIX_REPZ 0xF3
|
|
|
|
#define PREFIX_CS_OVERRIDE 0x2E // also branch-not-taken hint
|
|
#define PREFIX_SS_OVERRIDE 0x36
|
|
#define PREFIX_DS_OVERRIDE 0x3E // also branch-taken hint
|
|
#define PREFIX_ES_OVERRIDE 0x26
|
|
#define PREFIX_FS_OVERRIDE 0x64
|
|
#define PREFIX_GS_OVERRIDE 0x65
|
|
|
|
#define PREFIX_OPERAND_OVERRIDE 0x66
|
|
#define PREFIX_ADDRESS_OVERRIDE 0x67
|
|
|
|
static const uint8_t std_prefixes[] = {
|
|
PREFIX_LOCK,
|
|
PREFIX_REPNZ,
|
|
PREFIX_REPZ,
|
|
|
|
PREFIX_CS_OVERRIDE,
|
|
PREFIX_SS_OVERRIDE,
|
|
PREFIX_DS_OVERRIDE,
|
|
PREFIX_ES_OVERRIDE,
|
|
PREFIX_FS_OVERRIDE,
|
|
PREFIX_GS_OVERRIDE,
|
|
|
|
PREFIX_OPERAND_OVERRIDE,
|
|
PREFIX_ADDRESS_OVERRIDE
|
|
};
|
|
|
|
static const unsigned std_prefixes_len = sizeof(std_prefixes);
|
|
|
|
static int is_std_prefix(const uint8_t prefix)
|
|
{
|
|
for (unsigned i = 0; i < std_prefixes_len; i++)
|
|
if (prefix == std_prefixes[i]) return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
#define VEX_2BYTE 0xC5
|
|
#define VEX_3BYTE 0xC4
|
|
|
|
static int test_vex_prefix(const uint8_t vex_first)
|
|
{
|
|
if (vex_first == VEX_2BYTE) return 2;
|
|
else if (vex_first == VEX_3BYTE) return 3;
|
|
else return 0;
|
|
}
|
|
|
|
#define REX_SIG 0b01000000
|
|
#define REX_MASK 0b11110000
|
|
#define REX_VALUE_MASK 0b00001111
|
|
#define REX_B_VALUE (1<<0)
|
|
#define REX_X_VALUE (1<<1)
|
|
#define REX_R_VALUE (1<<2)
|
|
#define REX_W_VALUE (1<<3)
|
|
|
|
// returns -1 if not rex, and non-negative is REX_* define
|
|
static int test_rex_prefix(const uint8_t rex)
|
|
{
|
|
if ((rex & REX_MASK) != REX_SIG) return -1;
|
|
|
|
const uint8_t rex_value = rex & REX_VALUE_MASK;
|
|
switch (rex_value)
|
|
{
|
|
case REX_B_VALUE: return REX_B;
|
|
case REX_X_VALUE: return REX_X;
|
|
case REX_R_VALUE: return REX_R;
|
|
case REX_W_VALUE: return REX_W;
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
// so we can ignore register encoded in opcode
|
|
#define OPREG_MASK 0b11111000
|
|
|
|
static const instruction_t* find_instruction(const uint8_t* cur, unsigned type, int vex, int rex)
|
|
{
|
|
for (unsigned i = 0; i < rtdisasm_table_len; i++)
|
|
{
|
|
const instruction_t* ins = &rtdisasm_table[i];
|
|
|
|
if (ins->config.type != type) continue;
|
|
// check rex if instruction does rex, and if provided rex is not -1
|
|
if (rex != -1 && type == INSTRUCTION_STD
|
|
&& ins->config.has_rex && ins->std.rex != rex)
|
|
{
|
|
// rex doesn't match, skip instruction
|
|
continue;
|
|
}
|
|
|
|
if (ins->config.has_modrm)
|
|
{
|
|
// instruction encoding employs register embedded into last opcode byte
|
|
// so we need to apply bit mask
|
|
|
|
// plain means opcode bytes that are not affected
|
|
// by opcode register encoding
|
|
uint16_t plain_len = ins->opcode_len - 1;
|
|
if (plain_len)
|
|
{
|
|
if (memcmp(cur, &ins->opcode, plain_len))
|
|
continue;
|
|
}
|
|
|
|
// now let's match the opreg encoded byte
|
|
if ((cur[plain_len] & OPREG_MASK) != ins->opcode[plain_len])
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
// just compare opcodes
|
|
if (memcmp(cur, &ins->opcode, ins->opcode_len))
|
|
{
|
|
// opcodes don't match up, skip
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// for now, everything looks good, so that's our instruction
|
|
return ins;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
typedef struct {
|
|
uint8_t mod;
|
|
uint8_t rm;
|
|
uint8_t has_sib;
|
|
uint8_t disp_len;
|
|
} modrm_encoding_t;
|
|
|
|
static const modrm_encoding_t modrm_encodings[] = {
|
|
{ .mod = 0b00, .rm = 0b100, .has_sib = 1, .disp_len = 0 },
|
|
{ .mod = 0b00, .rm = 0b101, .has_sib = 0, .disp_len = 4 },
|
|
|
|
{ .mod = 0b01, .rm = 0b000, .has_sib = 0, .disp_len = 1 },
|
|
{ .mod = 0b01, .rm = 0b001, .has_sib = 0, .disp_len = 1 },
|
|
{ .mod = 0b01, .rm = 0b010, .has_sib = 0, .disp_len = 1 },
|
|
{ .mod = 0b01, .rm = 0b011, .has_sib = 0, .disp_len = 1 },
|
|
{ .mod = 0b01, .rm = 0b100, .has_sib = 1, .disp_len = 1 },
|
|
{ .mod = 0b01, .rm = 0b101, .has_sib = 0, .disp_len = 1 },
|
|
{ .mod = 0b01, .rm = 0b110, .has_sib = 0, .disp_len = 1 },
|
|
{ .mod = 0b01, .rm = 0b111, .has_sib = 0, .disp_len = 1 },
|
|
|
|
{ .mod = 0b10, .rm = 0b000, .has_sib = 0, .disp_len = 4 },
|
|
{ .mod = 0b10, .rm = 0b001, .has_sib = 0, .disp_len = 4 },
|
|
{ .mod = 0b10, .rm = 0b010, .has_sib = 0, .disp_len = 4 },
|
|
{ .mod = 0b10, .rm = 0b011, .has_sib = 0, .disp_len = 4 },
|
|
{ .mod = 0b10, .rm = 0b100, .has_sib = 1, .disp_len = 4 },
|
|
{ .mod = 0b10, .rm = 0b101, .has_sib = 0, .disp_len = 4 },
|
|
{ .mod = 0b10, .rm = 0b110, .has_sib = 0, .disp_len = 4 },
|
|
{ .mod = 0b10, .rm = 0b111, .has_sib = 0, .disp_len = 4 },
|
|
};
|
|
static const unsigned modrm_encodings_len = sizeof(modrm_encodings) / sizeof(modrm_encoding_t);
|
|
|
|
// analyze ModRM and determine if it employs SIB byte,
|
|
// as well as any displacements
|
|
static void analyze_modrm(const uint8_t modrm, uint8_t* has_sib, uint8_t* disp_len)
|
|
{
|
|
const uint8_t mod = modrm >> 6;
|
|
const uint8_t rm = modrm & 0b111;
|
|
|
|
// default values
|
|
*has_sib = 0;
|
|
*disp_len = 0;
|
|
|
|
// now lets look up in table and if matches
|
|
// set proper values
|
|
for (unsigned i = 0; i < modrm_encodings_len; i++)
|
|
{
|
|
const modrm_encoding_t* encoding = &modrm_encodings[i];
|
|
if (encoding->mod == mod && encoding->rm == rm)
|
|
{
|
|
*has_sib = encoding->has_sib;
|
|
*disp_len = encoding->disp_len;
|
|
}
|
|
}
|
|
}
|
|
|
|
static unsigned imm2length(uint8_t imm)
|
|
{
|
|
switch (imm)
|
|
{
|
|
case IMM_B: return 1;
|
|
case IMM_W: return 2;
|
|
case IMM_D: return 4;
|
|
case IMM_O: return 8;
|
|
|
|
default: return 0;
|
|
}
|
|
}
|
|
|
|
static unsigned value2length(uint8_t value)
|
|
{
|
|
switch (value)
|
|
{
|
|
case VALUE_B: return 1;
|
|
case VALUE_W: return 2;
|
|
case VALUE_D: return 4;
|
|
case VALUE_P: return 6;
|
|
case VALUE_O: return 8;
|
|
case VALUE_T: return 10;
|
|
|
|
default: return 0;
|
|
}
|
|
}
|
|
|
|
int rtdisasm_analyze_single(const uint8_t* code, uint8_t size)
|
|
{
|
|
const uint8_t* cur = code;
|
|
const uint8_t* const end = code + size;
|
|
if (cur == end) return -1;
|
|
|
|
// skip standard prefixes
|
|
while (is_std_prefix(*cur))
|
|
{
|
|
if (++cur == end) return -1;
|
|
}
|
|
|
|
unsigned type = INSTRUCTION_STD;
|
|
|
|
// first, we need to test vex prefix, because only then comes the rex
|
|
int vex = test_vex_prefix(*cur);
|
|
if (vex)
|
|
{
|
|
// it's vex, lets advance 2 or 3 bytes
|
|
cur += vex;
|
|
if (cur >= end) return -1;
|
|
type = INSTRUCTION_VEX;
|
|
}
|
|
|
|
// test if its rex prefix, if so we will look specifically for
|
|
// instructions with rex prefix
|
|
int rex = test_rex_prefix(*cur);
|
|
if (rex != -1)
|
|
{
|
|
// it's rex, so advance 1 byte
|
|
if (++cur >= end) return -1;
|
|
}
|
|
|
|
const instruction_t* ins = find_instruction(cur, type, vex, rex);
|
|
if (!ins) return 0; // no instruction
|
|
|
|
// since we now instruction, we need advance past opcode bytes
|
|
cur += ins->opcode_len;
|
|
if (cur >= end) return -1;
|
|
|
|
// if instruction has ModRM, we need to analyze it,
|
|
// since it can lead to SIB byte
|
|
if (ins->config.has_modrm)
|
|
{
|
|
// consume ModRM byte
|
|
uint8_t modrm = *cur++;
|
|
if (cur >= end) return -1;
|
|
|
|
uint8_t has_sib, disp_len;
|
|
analyze_modrm(modrm, &has_sib, &disp_len);
|
|
|
|
if (has_sib)
|
|
{
|
|
// consume SIB byte
|
|
if (++cur >= end) return -1;
|
|
}
|
|
|
|
// add displacement
|
|
cur += disp_len;
|
|
if (cur >= end) return -1;
|
|
}
|
|
|
|
// now we need to skip the immediate values
|
|
if (type == INSTRUCTION_STD)
|
|
{
|
|
if (ins->config.has_imm)
|
|
cur += imm2length(ins->std.imm);
|
|
else if (ins->config.has_value)
|
|
cur += value2length(ins->std.value);
|
|
|
|
if (cur >= end) return -1;
|
|
}
|
|
|
|
// return length of entire decoded instruction
|
|
return (int)((uintptr_t)cur-(uintptr_t)code);
|
|
}
|