implement VEX parser

This commit is contained in:
mykola2312 2024-08-13 18:10:37 +03:00
parent db68712b3e
commit ce12de6fc3
2 changed files with 58 additions and 6 deletions

61
genc.py
View file

@ -7,6 +7,11 @@ class InstructionType(Enum):
VEX = 1
EVEX = 2
def __str__(self):
if self == InstructionType.STANDARD: return "std"
elif self == InstructionType.VEX: return "vex"
elif self == InstructionType.EVEX: return "evex"
class Instruction:
def __init__(self, ins):
self._opc = ins.find("opc").text
@ -23,7 +28,7 @@ class Instruction:
pass
def __str__(self):
return f"{self.mnemonic} rex {self.rex} bytes {self.bytes} has_modrm {self.has_modrm()}"
return f"<{self.get_type()}> {self.mnemonic} bytes {self.bytes} has_modrm {self.has_modrm()}"
class InstructionCommon:
REX_REGEX = re.compile("^REX\\.(.)")
@ -60,8 +65,6 @@ class StandardInstruction(Instruction):
if imm: self.imm = imm.group(1)
if value: self.value = value.group(1)
if opreg: self.opreg = opreg.group(1)
print(self)
def get_type(self):
return InstructionType.STANDARD
@ -70,11 +73,59 @@ class StandardInstruction(Instruction):
return self.modrm or self.digit is not None
def __str__(self):
return f"{super().__str__()} digit {self.digit} modrm {self.modrm} imm {self.imm} value {self.value} opreg {self.opreg}"
return f"{super().__str__()} rex {self.rex} digit {self.digit} modrm {self.modrm} imm {self.imm} value {self.value} opreg {self.opreg}"
class VEXInstruction(Instruction):
def __init__(self, ins):
raise NotImplementedError("VEX is not implemented")
super().__init__(ins)
# fix string because intel employees keep bashing keyboard with random keys
self._opc = re.sub(r"\. ", ".", self._opc)
parts = self._opc.split(" ")
(vex, opc) = (parts[0], "".join(parts[1:]))
print(vex, opc)
vex_parts = vex.split(".")
self.lig = False
if "128" in vex_parts or "L0" in vex_parts or "LZ" in vex_parts:
self.l = False
elif "256" in vex_parts or "L1" in vex_parts:
self.l = True
elif "LIG" in vex_parts:
self.l = False
self.lig = True
else: raise RuntimeError("VEX.L is unknown!")
self.wig = False
if "W0" in vex_parts: self.w = False
elif "W1" in vex_parts: self.w = True
elif "WIG" in vex_parts:
self.wig = True
self.w = False
else: raise RuntimeError("VEX.W is uknown!")
self.bytes = InstructionCommon.BYTES_REGEX.findall(opc)
modrm = InstructionCommon.MODRM_REGEX.search(opc)
imm = InstructionCommon.IMM_REGEX.search(opc)
self.modrm = True if modrm else False
self.imm = imm.group(1) if imm else None
print(self)
def get_type(self):
return InstructionType.VEX
def has_modrm(self):
return self.modrm
def __str__(self):
return f"{super().__str__()} l {self.l} lig {self.lig} w {self.w} wig {self.wig}"
class EVEXInstruction(Instruction):
def __init__(self, ins):

View file

@ -2,4 +2,5 @@ The Intel OpCode Syntax can tell if there is ModRM byte, as well as if registers
and can tell size of immediate (ib iw id for example). The /digit can also indiciate presense of ModRM.
The size of displacement is also dictated by cw/cd.
ModRM also can tell if there is SIB byte or no.
VEX prefixes. 0xC5 for 2-byte VEX and 0xC4 for 3-byte prefix
VEX prefixes. 0xC5 for 2-byte VEX and 0xC4 for 3-byte prefix
For VEX instructions all we need to know is VEX prefix length, opcode bytes, ModRM presence and immediates