diff --git a/genc.py b/genc.py index 99e454d..82c3363 100644 --- a/genc.py +++ b/genc.py @@ -19,6 +19,15 @@ class OpCode: def __str__(self): return f"\topcode {self.opcode} args {self.args} op_enc {self.operand_encoding}" + + def __eq__(self, other): + return self.opcode == other.opcode and self.operand_encoding == other.operand_encoding + + def __key(self): + return ("".join(self.opcode), "".join(self.operand_encoding or [])) + + def __hash__(self): + return hash(self.__key()) class Instruction: SKIP_16BIT_REALMODE = ["rel16", "imm16", "ptr16:16"] @@ -50,6 +59,12 @@ class Instruction: # remove 16 bit real mode displacement value opcodes self.opcodes = list(filter(lambda op: not Instruction.contains_16bit_mode(op.args), self.opcodes)) + # de-duplicate opcodes with set + _opcodes = self.opcodes + self.opcodes = set() + for op in _opcodes: + self.opcodes.add(op) + def parse_file(path): tree = ET.parse(path) diff --git a/plan.txt b/plan.txt index c76c29b..da3750e 100644 --- a/plan.txt +++ b/plan.txt @@ -1,3 +1,4 @@ Just parse all prefixes and skip them, but some of the prefixes like 0x66 can dictate size of immediate "args" parser field would tell if instruction has immediate and what type it could be, -while "op_enc" dictates presense of ModRM, and ModRM tells if instruction has SIB byte \ No newline at end of file +while "op_enc" dictates presense of ModRM, and ModRM tells if instruction has SIB byte. +Decode VEX prefixes. 0xC5 for 3-byte VEX and 0xC4 for 2-byte prefix \ No newline at end of file