154 lines
No EOL
4.6 KiB
Python
154 lines
No EOL
4.6 KiB
Python
import re
|
|
import xml.etree.ElementTree as ET
|
|
from enum import Enum
|
|
|
|
class InstructionType(Enum):
|
|
STANDARD = 0
|
|
VEX = 1
|
|
EVEX = 2
|
|
|
|
def __str__(self):
|
|
if self == InstructionType.STANDARD: return "std"
|
|
elif self == InstructionType.VEX: return "vex"
|
|
elif self == InstructionType.EVEX: return "evex"
|
|
|
|
class Instruction:
|
|
def __init__(self, ins):
|
|
self._opc = ins.find("opc").text
|
|
self.x32m = ins.attrib["x32m"]
|
|
self.x64m = ins.attrib["x64m"]
|
|
self.mnemonic = ins.find("mnem").text
|
|
|
|
self.bytes = None
|
|
|
|
def get_type(self):
|
|
pass
|
|
|
|
def has_modrm(self):
|
|
pass
|
|
|
|
def __str__(self):
|
|
return f"<{self.get_type()}> {self.mnemonic} bytes {self.bytes} has_modrm {self.has_modrm()}"
|
|
|
|
class InstructionCommon:
|
|
REX_REGEX = re.compile("^REX\\.(.)")
|
|
BYTES_REGEX = re.compile("([0-9A-F][0-9A-F])")
|
|
DIGIT_REGEX = re.compile("\\/(\\d)")
|
|
MODRM_REGEX = re.compile("\\/r")
|
|
IMM_REGEX = re.compile("i(.)")
|
|
VALUE_REGEX = re.compile("c(.)")
|
|
OPREG_REGEX = re.compile("r(.)")
|
|
|
|
class StandardInstruction(Instruction):
|
|
def __init__(self, ins):
|
|
super().__init__(ins)
|
|
|
|
rex = InstructionCommon.REX_REGEX.search(self._opc)
|
|
bytes = InstructionCommon.BYTES_REGEX.findall(self._opc)
|
|
digit = InstructionCommon.DIGIT_REGEX.search(self._opc)
|
|
modrm = InstructionCommon.MODRM_REGEX.search(self._opc)
|
|
imm = InstructionCommon.IMM_REGEX.search(self._opc)
|
|
value = InstructionCommon.VALUE_REGEX.search(self._opc)
|
|
opreg = InstructionCommon.OPREG_REGEX.search(self._opc)
|
|
self.bytes = bytes
|
|
|
|
self.rex = None
|
|
self.digit = None
|
|
self.modrm = False
|
|
self.imm = None
|
|
self.value = None
|
|
self.opreg = None
|
|
|
|
if rex: self.rex = rex.group(1)
|
|
if digit: self.digit = int(digit.group(1))
|
|
if modrm: self.modrm = True
|
|
if imm: self.imm = imm.group(1)
|
|
if value: self.value = value.group(1)
|
|
if opreg: self.opreg = opreg.group(1)
|
|
|
|
def get_type(self):
|
|
return InstructionType.STANDARD
|
|
|
|
def has_modrm(self):
|
|
return self.modrm or self.digit is not None
|
|
|
|
def __str__(self):
|
|
return f"{super().__str__()} rex {self.rex} digit {self.digit} modrm {self.modrm} imm {self.imm} value {self.value} opreg {self.opreg}"
|
|
|
|
class VEXInstruction(Instruction):
|
|
def __init__(self, ins):
|
|
super().__init__(ins)
|
|
|
|
# fix string because intel employees keep bashing keyboard with random keys
|
|
self._opc = re.sub(r"\. ", ".", self._opc)
|
|
|
|
parts = self._opc.split(" ")
|
|
(vex, opc) = (parts[0], "".join(parts[1:]))
|
|
|
|
print(vex, opc)
|
|
|
|
vex_parts = vex.split(".")
|
|
|
|
self.lig = False
|
|
if "128" in vex_parts or "L0" in vex_parts or "LZ" in vex_parts:
|
|
self.l = False
|
|
elif "256" in vex_parts or "L1" in vex_parts:
|
|
self.l = True
|
|
elif "LIG" in vex_parts:
|
|
self.l = False
|
|
self.lig = True
|
|
else: raise RuntimeError("VEX.L is unknown!")
|
|
|
|
self.wig = False
|
|
if "W0" in vex_parts: self.w = False
|
|
elif "W1" in vex_parts: self.w = True
|
|
elif "WIG" in vex_parts:
|
|
self.wig = True
|
|
self.w = False
|
|
else: raise RuntimeError("VEX.W is uknown!")
|
|
|
|
self.bytes = InstructionCommon.BYTES_REGEX.findall(opc)
|
|
|
|
modrm = InstructionCommon.MODRM_REGEX.search(opc)
|
|
imm = InstructionCommon.IMM_REGEX.search(opc)
|
|
|
|
self.modrm = True if modrm else False
|
|
self.imm = imm.group(1) if imm else None
|
|
|
|
print(self)
|
|
|
|
|
|
def get_type(self):
|
|
return InstructionType.VEX
|
|
|
|
def has_modrm(self):
|
|
return self.modrm
|
|
|
|
def __str__(self):
|
|
return f"{super().__str__()} l {self.l} lig {self.lig} w {self.w} wig {self.wig}"
|
|
|
|
class EVEXInstruction(Instruction):
|
|
def __init__(self, ins):
|
|
raise NotImplementedError("EVEX is not implemented")
|
|
|
|
def parse_instruction(ins):
|
|
opc = ins.find("opc").text
|
|
if "EVEX" in opc: return EVEXInstruction(ins)
|
|
elif "VEX" in opc: return VEXInstruction(ins)
|
|
else: return StandardInstruction(ins)
|
|
|
|
class InstructionGroup:
|
|
def __init__(self, common):
|
|
self.brief = common.find("brief").text
|
|
self.instructions = [parse_instruction(ins) for ins in common.iter("ins")]
|
|
|
|
def parse_file(path):
|
|
tree = ET.parse(path)
|
|
root = tree.getroot()
|
|
|
|
groups = [InstructionGroup(common) for common in root.iter("common")]
|
|
return groups
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parse_file("xml/raw/x86/Intel/AZ.xml") |