begin implementing intel opcode syntax parser
This commit is contained in:
parent
24c290e29e
commit
5998950f23
1 changed files with 31 additions and 63 deletions
92
genc.py
92
genc.py
|
|
@ -1,83 +1,51 @@
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
class OpCode:
|
class Instruction:
|
||||||
OPCODE_REGEX = re.compile("[0-9A-F][0-9A-F]")
|
REX_REGEX = re.compile("^REX\\.(.)")
|
||||||
|
BYTES_REGEX = re.compile("([0-9A-F][0-9A-F])")
|
||||||
|
DIGIT_REGEX = re.compile("\\/(\\d)")
|
||||||
|
MODRM_REGEX = re.compile("\\/r")
|
||||||
|
IMM_REGEX = re.compile("i(.)")
|
||||||
|
VALUE_REGEX = re.compile("c(.)")
|
||||||
|
OPREG_REGEX = re.compile("r(.)")
|
||||||
|
|
||||||
def __init__(self, ins, operand_encodings):
|
def __init__(self, ins):
|
||||||
self.x32m = ins.attrib["x32m"]
|
self.x32m = ins.attrib["x32m"]
|
||||||
self.x64m = ins.attrib["x64m"]
|
self.x64m = ins.attrib["x64m"]
|
||||||
self.args = ins.find("args").text
|
|
||||||
|
|
||||||
opc = ins.find("opc")
|
opc = ins.find("opc").text
|
||||||
self.opcode = OpCode.OPCODE_REGEX.findall(opc.text)
|
if "VEX" in opc: return
|
||||||
|
|
||||||
openc = opc.attrib.get("openc")
|
rex = Instruction.REX_REGEX.search(opc)
|
||||||
if openc:
|
bytes = Instruction.BYTES_REGEX.findall(opc)
|
||||||
self.operand_encoding = operand_encodings.get(openc, openc)
|
digit = Instruction.DIGIT_REGEX.search(opc)
|
||||||
else: self.operand_encoding = None
|
modrm = Instruction.MODRM_REGEX.search(opc)
|
||||||
|
imm = Instruction.IMM_REGEX.search(opc)
|
||||||
|
value = Instruction.VALUE_REGEX.search(opc)
|
||||||
|
opreg = Instruction.OPREG_REGEX.search(opc)
|
||||||
|
|
||||||
def __str__(self):
|
print(ins.find("mnem").text)
|
||||||
return f"\topcode {self.opcode} args {self.args} op_enc {self.operand_encoding}"
|
if rex: print("rex\t", rex.group(1))
|
||||||
|
print(bytes)
|
||||||
def __eq__(self, other):
|
if digit: print("digit\t", digit.group(1))
|
||||||
return self.opcode == other.opcode and self.operand_encoding == other.operand_encoding
|
if modrm: print("modrm\t", modrm.group(0))
|
||||||
|
if imm: print("imm\t", imm.group(1))
|
||||||
def __key(self):
|
if value: print("value\t", value.group(1))
|
||||||
return ("".join(self.opcode), "".join(self.operand_encoding or []))
|
if opreg: print("opreg\t", opreg.group(1))
|
||||||
|
|
||||||
def __hash__(self):
|
|
||||||
return hash(self.__key())
|
|
||||||
|
|
||||||
class Instruction:
|
|
||||||
SKIP_16BIT_REALMODE = ["rel16", "imm16", "ptr16:16"]
|
|
||||||
|
|
||||||
def contains_16bit_mode(args):
|
|
||||||
for needle in Instruction.SKIP_16BIT_REALMODE:
|
|
||||||
if needle in args:
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
class InstructionGroup:
|
||||||
def __init__(self, common):
|
def __init__(self, common):
|
||||||
self.brief = common.find("brief").text
|
self.brief = common.find("brief").text
|
||||||
|
self.instructions = [Instruction(ins) for ins in common.iter("ins")]
|
||||||
operand_encodings = {}
|
|
||||||
for operand_encoding in common.iter("oprndenc"):
|
|
||||||
name = operand_encoding.attrib["openc"]
|
|
||||||
|
|
||||||
operands = []
|
|
||||||
operands.append(operand_encoding.find("oprnd1").text)
|
|
||||||
operands.append(operand_encoding.find("oprnd2").text)
|
|
||||||
operands.append(operand_encoding.find("oprnd3").text)
|
|
||||||
operands.append(operand_encoding.find("oprnd4").text)
|
|
||||||
|
|
||||||
operand_encodings[name] = operands
|
|
||||||
|
|
||||||
self.opcodes = []
|
|
||||||
for ins in common.iter("ins"):
|
|
||||||
self.opcodes.append(OpCode(ins, operand_encodings))
|
|
||||||
|
|
||||||
# remove 16 bit real mode displacement value opcodes
|
|
||||||
self.opcodes = list(filter(lambda op: not Instruction.contains_16bit_mode(op.args), self.opcodes))
|
|
||||||
|
|
||||||
# de-duplicate opcodes with set
|
|
||||||
_opcodes = self.opcodes
|
|
||||||
self.opcodes = set()
|
|
||||||
for op in _opcodes:
|
|
||||||
self.opcodes.add(op)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_file(path):
|
def parse_file(path):
|
||||||
tree = ET.parse(path)
|
tree = ET.parse(path)
|
||||||
root = tree.getroot()
|
root = tree.getroot()
|
||||||
|
|
||||||
instructions = []
|
groups = [InstructionGroup(common) for common in root.iter("common")]
|
||||||
for common in root:
|
return groups
|
||||||
instructions.append(Instruction(common))
|
|
||||||
|
|
||||||
for instruction in instructions:
|
|
||||||
print(instruction.brief)
|
|
||||||
for opcode in instruction.opcodes:
|
|
||||||
print(opcode)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue