Parsable-Instructions/genc.py
2024-07-31 23:29:08 +03:00

84 lines
No EOL
2.6 KiB
Python

import re
import xml.etree.ElementTree as ET
class OpCode:
OPCODE_REGEX = re.compile("[0-9A-F][0-9A-F]")
def __init__(self, ins, operand_encodings):
self.x32m = ins.attrib["x32m"]
self.x64m = ins.attrib["x64m"]
self.args = ins.find("args").text
opc = ins.find("opc")
self.opcode = OpCode.OPCODE_REGEX.findall(opc.text)
openc = opc.attrib.get("openc")
if openc:
self.operand_encoding = operand_encodings.get(openc, openc)
else: self.operand_encoding = None
def __str__(self):
return f"\topcode {self.opcode} args {self.args} op_enc {self.operand_encoding}"
def __eq__(self, other):
return self.opcode == other.opcode and self.operand_encoding == other.operand_encoding
def __key(self):
return ("".join(self.opcode), "".join(self.operand_encoding or []))
def __hash__(self):
return hash(self.__key())
class Instruction:
SKIP_16BIT_REALMODE = ["rel16", "imm16", "ptr16:16"]
def contains_16bit_mode(args):
for needle in Instruction.SKIP_16BIT_REALMODE:
if needle in args:
return True
def __init__(self, common):
self.brief = common.find("brief").text
operand_encodings = {}
for operand_encoding in common.iter("oprndenc"):
name = operand_encoding.attrib["openc"]
operands = []
operands.append(operand_encoding.find("oprnd1").text)
operands.append(operand_encoding.find("oprnd2").text)
operands.append(operand_encoding.find("oprnd3").text)
operands.append(operand_encoding.find("oprnd4").text)
operand_encodings[name] = operands
self.opcodes = []
for ins in common.iter("ins"):
self.opcodes.append(OpCode(ins, operand_encodings))
# remove 16 bit real mode displacement value opcodes
self.opcodes = list(filter(lambda op: not Instruction.contains_16bit_mode(op.args), self.opcodes))
# de-duplicate opcodes with set
_opcodes = self.opcodes
self.opcodes = set()
for op in _opcodes:
self.opcodes.add(op)
def parse_file(path):
tree = ET.parse(path)
root = tree.getroot()
instructions = []
for common in root:
instructions.append(Instruction(common))
for instruction in instructions:
print(instruction.brief)
for opcode in instruction.opcodes:
print(opcode)
if __name__ == "__main__":
parse_file("xml/raw/x86/Intel/AZ.xml")